Sunday, 30 November 2025

325.E-LEARNING PLATFORM ANALYTICS USING DATA STEP | PROC SQL | PROC MEANS | PROC RANK | PROC UNIVARIATE | PROC SGPLOT | DATE FUNCTIONS AND MACROS

E-LEARNING PLATFORM ANALYTICS USING DATA STEP | PROC SQL | PROC MEANS | PROC RANK | PROC UNIVARIATE | PROC SGPLOT | DATE FUNCTIONS AND MACROS

options nocenter;

1) CREATE RAW DATASET: elearning_raw

data work.elearning_raw;

  informat Launch_Date date9. Last_Active_Date date9.;

  format   Launch_Date date9. Last_Active_Date date9.;

  length Platform_Name $30 Country $20 Category $20;

  input Platform_Name :$30. Active_Users Avg_Course_Price Completion_Rate Country :$20.

       Category :$20. Launch_Date :date9. Last_Active_Date :date9.;

datalines;

"LearnSphere"       125000 49.99 72.5 "USA"        "General"  15JAN2015 10NOV2025

"CodeHive"          90000  79.00 65.3 "India"      "Tech"     02MAR2017 20OCT2025

"SkillUp"           45000  29.99 58.1 "UK"         "Business" 10SEP2019 05NOV2025

"EduPulse"          30000  59.50 80.2 "Canada"     "Design"   22JUN2016 01NOV2025

"BrightLearn"       175000 19.99 45.7 "USA"        "Kids"     01JAN2014 29OCT2025

"DataTrail"         60000  99.00 70.0 "Germany"    "Tech"     15AUG2018 12NOV2025

"MyTutor"           25000  15.00 55.0 "India"      "Tutoring" 03APR2020 03NOV2025

"ProCert"           85000  149.0 82.6 "USA"        "Professional" 12DEC2013 08NOV2025

"LanguageLoop"      42000  39.00 67.2 "Spain"      "Language" 17MAY2016 30OCT2025

"CreativeCamp"      22000  34.50 60.5 "Australia"  "Design"   11NOV2019 11NOV2025

"ExamReady"         98000  9.99  50.0 "India"      "TestPrep" 07SEP2015 02NOV2025

"UpSkillNow"        52000  24.99 61.8 "UK"         "Business" 29FEB2016 06NOV2025

;

run;

proc print data=work.elearning_raw;

run;

OUTPUT:

ObsLaunch_DateLast_Active_DatePlatform_NameCountryCategoryActive_UsersAvg_Course_PriceCompletion_Rate
115JAN201510NOV2025"LearnSphere""USA""General"12500049.9972.5
202MAR201720OCT2025"CodeHive""India""Tech"9000079.0065.3
310SEP201905NOV2025"SkillUp""UK""Business"4500029.9958.1
422JUN201601NOV2025"EduPulse""Canada""Design"3000059.5080.2
501JAN201429OCT2025"BrightLearn""USA""Kids"17500019.9945.7
615AUG201812NOV2025"DataTrail""Germany""Tech"6000099.0070.0
703APR202003NOV2025"MyTutor""India""Tutoring"2500015.0055.0
812DEC201308NOV2025"ProCert""USA""Professional"85000149.0082.6
917MAY201630OCT2025"LanguageLoop""Spain""Language"4200039.0067.2
1011NOV201911NOV2025"CreativeCamp""Australia""Design"2200034.5060.5
1107SEP201502NOV2025"ExamReady""India""TestPrep"980009.9950.0
1229FEB201606NOV2025"UpSkillNow""UK""Business"5200024.9961.8


2) Macro to compute date-based metrics using INTCK and INTNX

%macro compute_date_metrics(data=work.elearning_raw, out=work.elearning_dates);

  data &out;

    set &data;

    /* Today's date - use TODAY() to get system date */

    Today = today();

    format Today date9.;

    /* Days, months, years since launch */

    Days_Since_Launch = intck('day', Launch_Date, Today);

    Months_Since_Launch = intck('month', Launch_Date, Today);

    Years_Since_Launch = intck('year', Launch_Date, Today);


    /* Days between launch and last active (platform lifetime so far) */

    Lifetime_Days = intck('day', Launch_Date, Last_Active_Date);


    /* Next billing cycle date example: shift last active by 1 month to next cycle */

    Next_Billing_Cycle = intnx('month', Last_Active_Date, 1, 'same');

    /* Use INTNX 'samemonth' alignment or 'beginning','end' as required */

    format Next_Billing_Cycle date9.;


    /* Create a flag for recently_active: last active within last 30 days */

    Recently_Active_Flag = (intck('day', Last_Active_Date, Today) <= 30);


    /* Clean up odd values (safeguard) */

    if Days_Since_Launch < 0 then Days_Since_Launch = .;

    if Lifetime_Days < 0 then Lifetime_Days = .;


    label Days_Since_Launch = "Days since Launch (INTCK day)"

          Months_Since_Launch = "Months since Launch (INTCK month)"

          Years_Since_Launch = "Years since Launch (INTCK year)"

          Lifetime_Days = "Days between Launch and Last Active"

          Next_Billing_Cycle = "Next Billing Cycle (INTNX month +1)";

  run;

  proc print data=&out;

  run;

%mend compute_date_metrics;


%compute_date_metrics();

OUTPUT:

ObsLaunch_DateLast_Active_DatePlatform_NameCountryCategoryActive_UsersAvg_Course_PriceCompletion_RateTodayDays_Since_LaunchMonths_Since_LaunchYears_Since_LaunchLifetime_DaysNext_Billing_CycleRecently_Active_Flag
115JAN201510NOV2025"LearnSphere""USA""General"12500049.9972.501DEC2025397313110395210DEC20251
202MAR201720OCT2025"CodeHive""India""Tech"9000079.0065.301DEC202531961058315420NOV20250
310SEP201905NOV2025"SkillUp""UK""Business"4500029.9958.101DEC20252274756224805DEC20251
422JUN201601NOV2025"EduPulse""Canada""Design"3000059.5080.201DEC202534491149341901DEC20251
501JAN201429OCT2025"BrightLearn""USA""Kids"17500019.9945.701DEC2025435214311431929NOV20250
615AUG201812NOV2025"DataTrail""Germany""Tech"6000099.0070.001DEC20252665887264612DEC20251
703APR202003NOV2025"MyTutor""India""Tutoring"2500015.0055.001DEC20252068685204003DEC20251
812DEC201308NOV2025"ProCert""USA""Professional"85000149.0082.601DEC2025437214412434908DEC20251
917MAY201630OCT2025"LanguageLoop""Spain""Language"4200039.0067.201DEC202534851159345330NOV20250
1011NOV201911NOV2025"CreativeCamp""Australia""Design"2200034.5060.501DEC20252212736219211DEC20251
1107SEP201502NOV2025"ExamReady""India""TestPrep"980009.9950.001DEC2025373812310370902DEC20251
1229FEB201606NOV2025"UpSkillNow""UK""Business"5200024.9961.801DEC202535631189353806DEC20251


3) PROC SQL: Aggregations by Country and Category

proc sql;

  create table work.country_summary as

  select Country,

         count(*) as Platform_Count,

         sum(Active_Users) as Total_Active_Users,

         mean(Completion_Rate) as Avg_Completion_Rate format=6.2,

         mean(Avg_Course_Price) as Avg_Price format=6.2

  from work.elearning_dates

  group by Country

  order by Total_Active_Users desc;

quit;

proc print data=work.country_summary;

run;

OUTPUT:

ObsCountryPlatform_CountTotal_Active_UsersAvg_Completion_RateAvg_Price
1"USA"338500066.9372.99
2"India"321300056.7734.66
3"UK"29700059.9527.49
4"Germany"16000070.0099.00
5"Spain"14200067.2039.00
6"Canada"13000080.2059.50
7"Australia"12200060.5034.50


4) PROC MEANS: descriptive statistics for numeric fields

proc means data=work.elearning_dates n mean median std min max nmiss;

  var Active_Users Avg_Course_Price Completion_Rate Days_Since_Launch Lifetime_Days;

  output out=work.means_summary mean= / autoname;

run;

OUTPUT:

The MEANS Procedure

VariableLabelNMeanMedianStd DevMinimumMaximumN Miss
Active_Users
Avg_Course_Price
Completion_Rate
Days_Since_Launch
Lifetime_Days
 
 
 
Days since Launch (INTCK day)
Days between Launch and Last Active
12
12
12
12
12
70750.00
50.8291667
64.0750000
3278.92
3251.58
56000.00
36.7500000
63.5500000
3467.00
3436.00
45938.94
40.7191604
11.2504646
808.4332163
807.2529691
22000.00
9.9900000
45.7000000
2068.00
2040.00
175000.00
149.0000000
82.6000000
4372.00
4349.00
0
0
0
0
0

5) PROC RANK usage: macro to rank platforms by chosen variable

%macro rank_platforms(data=work.elearning_dates, out=work.ranked_platforms, byvar=Active_Users, ties=low);

  proc rank data=&data out=&out ties=&ties descending;

    var &byvar;

    ranks Rank_&byvar;

  run;

  proc print data=&out;

  run;

%mend rank_platforms;


%rank_platforms(byvar=Active_Users, out=work.rank_by_users);

OUTPUT:

ObsLaunch_DateLast_Active_DatePlatform_NameCountryCategoryActive_UsersAvg_Course_PriceCompletion_RateTodayDays_Since_LaunchMonths_Since_LaunchYears_Since_LaunchLifetime_DaysNext_Billing_CycleRecently_Active_FlagRank_Active_Users
115JAN201510NOV2025"LearnSphere""USA""General"12500049.9972.501DEC2025397313110395210DEC202512
202MAR201720OCT2025"CodeHive""India""Tech"9000079.0065.301DEC202531961058315420NOV202504
310SEP201905NOV2025"SkillUp""UK""Business"4500029.9958.101DEC20252274756224805DEC202518
422JUN201601NOV2025"EduPulse""Canada""Design"3000059.5080.201DEC202534491149341901DEC2025110
501JAN201429OCT2025"BrightLearn""USA""Kids"17500019.9945.701DEC2025435214311431929NOV202501
615AUG201812NOV2025"DataTrail""Germany""Tech"6000099.0070.001DEC20252665887264612DEC202516
703APR202003NOV2025"MyTutor""India""Tutoring"2500015.0055.001DEC20252068685204003DEC2025111
812DEC201308NOV2025"ProCert""USA""Professional"85000149.0082.601DEC2025437214412434908DEC202515
917MAY201630OCT2025"LanguageLoop""Spain""Language"4200039.0067.201DEC202534851159345330NOV202509
1011NOV201911NOV2025"CreativeCamp""Australia""Design"2200034.5060.501DEC20252212736219211DEC2025112
1107SEP201502NOV2025"ExamReady""India""TestPrep"980009.9950.001DEC2025373812310370902DEC202513
1229FEB201606NOV2025"UpSkillNow""UK""Business"5200024.9961.801DEC202535631189353806DEC202517


%rank_platforms(byvar=Completion_Rate, out=work.rank_by_completion);

OUTPUT:

ObsLaunch_DateLast_Active_DatePlatform_NameCountryCategoryActive_UsersAvg_Course_PriceCompletion_RateTodayDays_Since_LaunchMonths_Since_LaunchYears_Since_LaunchLifetime_DaysNext_Billing_CycleRecently_Active_FlagRank_Completion_Rate
115JAN201510NOV2025"LearnSphere""USA""General"12500049.9972.501DEC2025397313110395210DEC202513
202MAR201720OCT2025"CodeHive""India""Tech"9000079.0065.301DEC202531961058315420NOV202506
310SEP201905NOV2025"SkillUp""UK""Business"4500029.9958.101DEC20252274756224805DEC202519
422JUN201601NOV2025"EduPulse""Canada""Design"3000059.5080.201DEC202534491149341901DEC202512
501JAN201429OCT2025"BrightLearn""USA""Kids"17500019.9945.701DEC2025435214311431929NOV2025012
615AUG201812NOV2025"DataTrail""Germany""Tech"6000099.0070.001DEC20252665887264612DEC202514
703APR202003NOV2025"MyTutor""India""Tutoring"2500015.0055.001DEC20252068685204003DEC2025110
812DEC201308NOV2025"ProCert""USA""Professional"85000149.0082.601DEC2025437214412434908DEC202511
917MAY201630OCT2025"LanguageLoop""Spain""Language"4200039.0067.201DEC202534851159345330NOV202505
1011NOV201911NOV2025"CreativeCamp""Australia""Design"2200034.5060.501DEC20252212736219211DEC202518
1107SEP201502NOV2025"ExamReady""India""TestPrep"980009.9950.001DEC2025373812310370902DEC2025111
1229FEB201606NOV2025"UpSkillNow""UK""Business"5200024.9961.801DEC202535631189353806DEC202517


/* Merge ranks into a single table using PROC SQL */

proc sql;

  create table work.platforms_ranked as

  select a.Platform_Name, a.Active_Users, a.Avg_Course_Price, a.Completion_Rate,

         a.Country, a.Category, a.Launch_Date, a.Last_Active_Date,

         b.Rank_Active_Users, c.Rank_Completion_Rate

  from work.elearning_dates a

  left join work.rank_by_users b on a.Platform_Name = b.Platform_Name

  left join work.rank_by_completion c on a.Platform_Name = c.Platform_Name

  order by Rank_Active_Users;

quit;

proc print data=work.platforms_ranked;

run;

OUTPUT:

ObsPlatform_NameActive_UsersAvg_Course_PriceCompletion_RateCountryCategoryLaunch_DateLast_Active_DateRank_Active_UsersRank_Completion_Rate
1"BrightLearn"17500019.9945.7"USA""Kids"01JAN201429OCT2025112
2"LearnSphere"12500049.9972.5"USA""General"15JAN201510NOV202523
3"ExamReady"980009.9950.0"India""TestPrep"07SEP201502NOV2025311
4"CodeHive"9000079.0065.3"India""Tech"02MAR201720OCT202546
5"ProCert"85000149.0082.6"USA""Professional"12DEC201308NOV202551
6"DataTrail"6000099.0070.0"Germany""Tech"15AUG201812NOV202564
7"UpSkillNow"5200024.9961.8"UK""Business"29FEB201606NOV202577
8"SkillUp"4500029.9958.1"UK""Business"10SEP201905NOV202589
9"LanguageLoop"4200039.0067.2"Spain""Language"17MAY201630OCT202595
10"EduPulse"3000059.5080.2"Canada""Design"22JUN201601NOV2025102
11"MyTutor"2500015.0055.0"India""Tutoring"03APR202003NOV20251110
12"CreativeCamp"2200034.5060.5"Australia""Design"11NOV201911NOV2025128


6) PROC UNIVARIATE: detailed distribution checks (Active_Users)

proc univariate data=work.elearning_dates;

  var Active_Users Avg_Course_Price Completion_Rate;

  histogram Active_Users / normal;

  inset mean median std / pos=ne;

run;

OUTPUT:

The UNIVARIATE Procedure

Variable: Active_Users

Moments
N12Sum Weights12
Mean70750Sum Observations849000
Std Deviation45938.9417Variance2110386364
Skewness1.11957429Kurtosis0.97488514
Uncorrected SS8.3281E10Corrected SS2.32143E10
Coeff Variation64.9313663Std Error Mean13261.4302
Basic Statistical Measures
LocationVariability
Mean70750.00Std Deviation45939
Median56000.00Variance2110386364
Mode.Range153000
  Interquartile Range58000
Tests for Location: Mu0=0
TestStatisticp Value
Student's tt5.33502Pr > |t|0.0002
SignM6Pr >= |M|0.0005
Signed RankS39Pr >= |S|0.0005
Quantiles (Definition 5)
LevelQuantile
100% Max175000
99%175000
95%175000
90%125000
75% Q394000
50% Median56000
25% Q136000
10%25000
5%22000
1%22000
0% Min22000
Extreme Observations
LowestHighest
ValueObsValueObs
2200010850008
250007900002
3000049800011
4200091250001
4500031750005

The UNIVARIATE Procedure

Histogram for Active_Users


The UNIVARIATE Procedure

Fitted Normal Distribution for Active_Users

Parameters for Normal Distribution
ParameterSymbolEstimate
MeanMu70750
Std DevSigma45938.94
Goodness-of-Fit Tests for Normal Distribution
TestStatisticp Value
Kolmogorov-SmirnovD0.17584327Pr > D>0.150
Cramer-von MisesW-Sq0.06677709Pr > W-Sq>0.250
Anderson-DarlingA-Sq0.44404111Pr > A-Sq0.240
Quantiles for Normal Distribution
PercentQuantile
ObservedEstimated
1.022000.0-36119.96
5.022000.0-4812.83
10.025000.011876.88
25.036000.039764.65
50.056000.070750.00
75.094000.0101735.35
90.0125000.0129623.12
95.0175000.0146312.83
99.0175000.0177619.96

The UNIVARIATE Procedure

Variable: Avg_Course_Price

Moments
N12Sum Weights12
Mean50.8291667Sum Observations609.95
Std Deviation40.7191604Variance1658.05003
Skewness1.46257027Kurtosis1.98284584
Uncorrected SS49241.8005Corrected SS18238.5503
Coeff Variation80.1098328Std Error Mean11.7546091
Basic Statistical Measures
LocationVariability
Mean50.82917Std Deviation40.71916
Median36.75000Variance1658
Mode.Range139.01000
  Interquartile Range46.76000
Tests for Location: Mu0=0
TestStatisticp Value
Student's tt4.32419Pr > |t|0.0012
SignM6Pr >= |M|0.0005
Signed RankS39Pr >= |S|0.0005
Quantiles (Definition 5)
LevelQuantile
100% Max149.00
99%149.00
95%149.00
90%99.00
75% Q369.25
50% Median36.75
25% Q122.49
10%15.00
5%9.99
1%9.99
0% Min9.99
Extreme Observations
LowestHighest
ValueObsValueObs
9.991149.991
15.00759.504
19.99579.002
24.991299.006
29.993149.008

The UNIVARIATE Procedure

Variable: Completion_Rate

Moments
N12Sum Weights12
Mean64.075Sum Observations768.9
Std Deviation11.2504646Variance126.572955
Skewness0.10555216Kurtosis-0.5589518
Uncorrected SS50659.57Corrected SS1392.3025
Coeff Variation17.5582749Std Error Mean3.24772939
Basic Statistical Measures
LocationVariability
Mean64.07500Std Deviation11.25046
Median63.55000Variance126.57295
Mode.Range36.90000
  Interquartile Range14.70000
Tests for Location: Mu0=0
TestStatisticp Value
Student's tt19.72917Pr > |t|<.0001
SignM6Pr >= |M|0.0005
Signed RankS39Pr >= |S|0.0005
Quantiles (Definition 5)
LevelQuantile
100% Max82.60
99%82.60
95%82.60
90%80.20
75% Q371.25
50% Median63.55
25% Q156.55
10%50.00
5%45.70
1%45.70
0% Min45.70
Extreme Observations
LowestHighest
ValueObsValueObs
45.7567.29
50.01170.06
55.0772.51
58.1380.24
60.51082.68

7) PROC SGPLOT: visualizations

/* Bar: Total Active Users by Category */

proc sgplot data=work.elearning_dates;

  vbar Category / response=Active_Users stat=sum datalabel;

  title "Total Active Users by Category";

run;

OUTPUT:

The SGPlot Procedure


/* Scatter: Avg Course Price vs Completion Rate (bubble sized by Active Users) */

proc sgplot data=work.elearning_dates;

  bubble x=Avg_Course_Price y=Completion_Rate size=Active_Users / datalabel=Platform_Name;

  xaxis label="Average Course Price (USD)";

  yaxis label="Completion Rate (%)";

  title "Price vs Completion Rate (bubble size = Active Users)";

run;

OUTPUT:

The SGPlot Procedure


/* Box plot: Completion rate by Category */

proc sgplot data=work.elearning_dates;

  vbox Completion_Rate / category=Category;

  title "Completion Rate Distribution by Category";

run;

OUTPUT:

The SGPlot Procedure


8) Example: create top N macro using ranked table

%macro top_n_by(data=work.platforms_ranked, byrank=Rank_Active_Users, n=5, out=work.topN);

  data &out;

    set &data;

    if &byrank < &n then output;

  run;

%mend top_n_by;


%top_n_by(n=5, out=work.top5_by_users);


/* show top 5 by users */

proc print data=work.top5_by_users label noobs;

  var Platform_Name Active_Users Rank_Active_Users Completion_Rate Rank_Completion_Rate Country Category;

  title "Top 5 Platforms by Active Users";

run;

 OUTPUT:

Top 5 Platforms by Active Users

Platform_NameActive_UsersRank for Variable Active_UsersCompletion_RateRank for Variable Completion_RateCountryCategory
"BrightLearn"175000145.712"USA""Kids"
"LearnSphere"125000272.53"USA""General"
"ExamReady"98000350.011"India""TestPrep"
"CodeHive"90000465.36"India""Tech"



To Visit My Previous Software Company Analysis Dataset:Click Here
To Visit My Previous Vote Program Dataset:Click Here
To Visit My Previous Audi Cars Performance Analysis Dataset:Click Here
To Visit My Previous Global Clothing Trends Dataset:Click Here  



Follow Us On : 


 


--- FOLLOW OUR BLOG FOR MORE INFORMATION.

--->PLEASE DO COMMENTS AND SHARE OUR BLOG.