167.MASTERING THE DATASET OF ONLINE STREAMING PLATFORM USER ENGAGEMENT DATA | SELECT | WHERE | PROC MEANS | PROC FREQ | PROC SQL | PROC SGPLOT | %MACRO

MASTERING THE DATASET OF ONLINE STREAMING PLATFORM USER ENGAGEMENT DATA | SELECT | WHERE | PROC MEANS | PROC FREQ | PROC SQL | PROC SGPLOT | %MACRO 

/*Creating a unique dataset centered around Online Streaming Platform User Engagement.*/

/*Dataset: Online Streaming Platform User Engagement*/

Dataset Overview

We'll simulate a dataset named streaming_data with the following variables:

UserID: Unique identifier for each user.

SubscriptionType: Type of subscription (Basic, Standard, Premium).

JoinDate: Date the user joined the platform.

LastActiveDate: Date the user last accessed the platform.

TotalWatchTime: Total hours watched.

PreferredDevice: Primary device used (Mobile, Tablet, Desktop, SmartTV).

Region: Geographic region (North America, Europe, Asia, South America, Africa, Oceania).

ContentGenre: Most-watched genre (Drama, Comedy, Action, Documentary, Horror, Sci-Fi).

MonthlySpend: Monthly subscription fee.


/*Data Simulation in SAS*/

data streaming_data;

    call streaminit(123);

    array subs[3] $8 _temporary_ ('Basic', 'Standard', 'Premium');

    array devices[4] $8 _temporary_ ('Mobile', 'Tablet', 'Desktop', 'SmartTV');

    array regions[6] $15 _temporary_ ('North America', 'Europe', 'Asia', 'South America', 'Africa', 'Oceania');

    array genres[6] $12 _temporary_ ('Drama', 'Comedy', 'Action', 'Documentary', 'Horror', 'Sci-Fi');


    do UserID = 1 to 20;

        SubscriptionType = subs[ceil(rand('uniform') * dim(subs))];

        JoinDate = today() - floor(rand('uniform') * (365*2 + 1));

        LastActiveDate = JoinDate + floor(rand('uniform') * (365*2 + 1));

        TotalWatchTime = rand('normal', 50, 20);

        PreferredDevice = devices[ceil(rand('uniform') * dim(devices))];

        Region = regions[ceil(rand('uniform') * dim(regions))];

        ContentGenre = genres[ceil(rand('uniform') * dim(genres))];


        select (SubscriptionType);

            when ('Basic') MonthlySpend = 8.99;

            when ('Standard') MonthlySpend = 12.99;

            when ('Premium') MonthlySpend = 15.99;

            otherwise MonthlySpend = .;

        end;


        output;

    end;

run;

proc print;run;

 Output:

Obs UserID SubscriptionType JoinDate LastActiveDate TotalWatchTime PreferredDevice Region ContentGenre MonthlySpend
1 1 Standard 20319 20376 29.2346 Tablet Asia Comedy 12.99
2 2 Basic 20287 20961 18.3710 Mobile Asia Sci-Fi 8.99
3 3 Premium 19727 19975 47.4942 Mobile North America Drama 15.99
4 4 Premium 19808 20484 66.4539 SmartTV South America Sci-Fi 15.99
5 5 Standard 19671 19991 17.0466 SmartTV Oceania Documentary 12.99
6 6 Standard 19643 20204 64.3154 Desktop Oceania Drama 12.99
7 7 Premium 19772 20468 38.0424 SmartTV Oceania Sci-Fi 15.99
8 8 Premium 20308 20835 73.3662 SmartTV Africa Drama 15.99
9 9 Basic 20294 20878 52.3234 Tablet Europe Sci-Fi 8.99
10 10 Standard 19864 20372 31.7349 SmartTV Africa Documentary 12.99
11 11 Premium 20203 20739 42.7715 SmartTV Oceania Sci-Fi 15.99
12 12 Basic 19638 20026 30.6133 Tablet Oceania Sci-Fi 8.99
13 13 Premium 20228 20461 -17.6225 Mobile North America Comedy 15.99
14 14 Premium 20088 20649 55.4296 Tablet Oceania Action 15.99
15 15 Standard 19912 20036 90.7307 Desktop Europe Drama 12.99
16 16 Premium 20215 20415 58.2090 Tablet Europe Action 15.99
17 17 Premium 19786 20354 36.4762 SmartTV North America Documentary 15.99
18 18 Standard 19939 20512 31.4062 SmartTV Europe Action 12.99
19 19 Premium 19852 20529 81.3090 Desktop North America Sci-Fi 15.99
20 20 Standard 19742 19835 17.4546 Desktop South America Drama 12.99


/*Overall Summary*/

proc means data=streaming_data mean median std min max;

    var TotalWatchTime MonthlySpend;

run;

 Output:

                                                                  The MEANS Procedure

Variable Mean Median Std Dev Minimum Maximum
TotalWatchTime
MonthlySpend
43.2580060
13.8900000
40.4069189
14.4900000
25.5910934
2.5319017
-17.6224539
8.9900000
90.7307123
15.9900000

/*Subscription Type Breakdown*/

proc means data=streaming_data mean median std min max;

    class SubscriptionType;

    var TotalWatchTime MonthlySpend;

run;

 Output:

                                                                           The MEANS Procedure

SubscriptionType N Obs Variable Mean Median Std Dev Minimum Maximum
Basic 3
TotalWatchTime
MonthlySpend
33.7692130
8.9900000
30.6132616
8.9900000
17.1947825
0
18.3710136
8.9900000
52.3233638
8.9900000
Premium 10
TotalWatchTime
MonthlySpend
48.1929445
15.9900000
51.4618784
15.9900000
27.5244934
0
-17.6224539
15.9900000
81.3089556
15.9900000
Standard 7
TotalWatchTime
MonthlySpend
40.2747193
12.9900000
31.4062350
12.9900000
27.2565853
0
17.0465673
12.9900000
90.7307123
12.9900000

/*Preferred Devices*/

proc freq data=streaming_data;

    tables PreferredDevice / nocum nopercent;

run;

 Output:

                                                                The FREQ Procedure

PreferredDevice Frequency
Desktop 4
Mobile 3
SmartTV 8
Tablet 5

/*Content Genres by Region*/

proc freq data=streaming_data;

    tables Region*ContentGenre / nocol nopercent;

run;

 Output:

                                                                           The FREQ Procedure

Frequency
Row Pct
Table of Region by ContentGenre
Region ContentGenre
Action Comedy Documentary Drama Sci-Fi Total
Africa
0
0.00
0
0.00
1
50.00
1
50.00
0
0.00
2
 
Asia
0
0.00
1
50.00
0
0.00
0
0.00
1
50.00
2
 
Europe
2
50.00
0
0.00
0
0.00
1
25.00
1
25.00
4
 
North America
0
0.00
1
25.00
1
25.00
1
25.00
1
25.00
4
 
Oceania
1
16.67
0
0.00
1
16.67
1
16.67
3
50.00
6
 
South America
0
0.00
0
0.00
0
0.00
1
50.00
1
50.00
2
 
Total
3
2
3
5
7
20

/*User Tenure Calculation*/

data streaming_data;

    set streaming_data;

    TenureDays = LastActiveDate - JoinDate;

run;

proc print;run;

 Output:

Obs UserID SubscriptionType JoinDate LastActiveDate TotalWatchTime PreferredDevice Region ContentGenre MonthlySpend TenureDays
1 1 Standard 20319 20376 29.2346 Tablet Asia Comedy 12.99 57
2 2 Basic 20287 20961 18.3710 Mobile Asia Sci-Fi 8.99 674
3 3 Premium 19727 19975 47.4942 Mobile North America Drama 15.99 248
4 4 Premium 19808 20484 66.4539 SmartTV South America Sci-Fi 15.99 676
5 5 Standard 19671 19991 17.0466 SmartTV Oceania Documentary 12.99 320
6 6 Standard 19643 20204 64.3154 Desktop Oceania Drama 12.99 561
7 7 Premium 19772 20468 38.0424 SmartTV Oceania Sci-Fi 15.99 696
8 8 Premium 20308 20835 73.3662 SmartTV Africa Drama 15.99 527
9 9 Basic 20294 20878 52.3234 Tablet Europe Sci-Fi 8.99 584
10 10 Standard 19864 20372 31.7349 SmartTV Africa Documentary 12.99 508
11 11 Premium 20203 20739 42.7715 SmartTV Oceania Sci-Fi 15.99 536
12 12 Basic 19638 20026 30.6133 Tablet Oceania Sci-Fi 8.99 388
13 13 Premium 20228 20461 -17.6225 Mobile North America Comedy 15.99 233
14 14 Premium 20088 20649 55.4296 Tablet Oceania Action 15.99 561
15 15 Standard 19912 20036 90.7307 Desktop Europe Drama 12.99 124
16 16 Premium 20215 20415 58.2090 Tablet Europe Action 15.99 200
17 17 Premium 19786 20354 36.4762 SmartTV North America Documentary 15.99 568
18 18 Standard 19939 20512 31.4062 SmartTV Europe Action 12.99 573
19 19 Premium 19852 20529 81.3090 Desktop North America Sci-Fi 15.99 677
20 20 Standard 19742 19835 17.4546 Desktop South America Drama 12.99 93

/*Average Tenure by Subscription Type*/

proc means data=streaming_data mean;

    class SubscriptionType;

    var TenureDays;

run;

 Output:

                                                                 The MEANS Procedure

Analysis Variable : TenureDays
SubscriptionType N Obs Mean
Basic 3 548.6666667
Premium 10 492.2000000
Standard 7 319.4285714

/*Top 5 Regions by Average Watch Time*/

proc sql outobs=5;

    select Region, mean(TotalWatchTime) as AvgWatchTime

    from streaming_data

    group by Region

    order by AvgWatchTime desc

    limit 5;

quit;

 Output:

Region AvgWatchTime
Europe 58.16734
Africa 52.55053
South America 41.95428
Oceania 41.36978
North America 36.91422


/*Monthly Revenue by Subscription Type*/

proc sql;

    select SubscriptionType, count(*) as UserCount,

           sum(MonthlySpend) as TotalRevenue,

           mean(MonthlySpend) as AvgRevenue

    from streaming_data

    group by SubscriptionType;

quit;

 Output:

SubscriptionType UserCount TotalRevenue AvgRevenue
Basic 3 26.97 8.99
Premium 10 159.9 15.99
Standard 7 90.93 12.99

/*Most Popular Genre per Region*/

/* Step 1: Aggregate the data */

proc sql;

    create table genre_counts as

    select Region, ContentGenre, count(*) as GenreCount

    from streaming_data

    group by Region, ContentGenre;

quit;

 Output:

Obs Region ContentGenre GenreCount
1 Africa Documentary 1
2 Africa Drama 1
3 Asia Comedy 1
4 Asia Sci-Fi 1
5 Europe Action 2
6 Europe Drama 1
7 Europe Sci-Fi 1
8 North America Comedy 1
9 North America Documentary 1
10 North America Drama 1
11 North America Sci-Fi 1
12 Oceania Action 1
13 Oceania Documentary 1
14 Oceania Drama 1
15 Oceania Sci-Fi 3
16 South America Drama 1
17 South America Sci-Fi 1

/* Step 2: Identify the maximum count per region */

proc sql;

    create table max_genre_counts as

    select Region, max(GenreCount) as MaxGenreCount

    from genre_counts

    group by Region;

quit;

 Output:

Obs Region MaxGenreCount
1 Africa 1
2 Asia 1
3 Europe 2
4 North America 1
5 Oceania 3
6 South America 1

/* Step 3: Join the results to get the most popular genres per region */

proc sql;

    create table top_genres as

    select gc.Region, gc.ContentGenre, gc.GenreCount

    from genre_counts gc

    inner join max_genre_counts mgc

        on gc.Region = mgc.Region and gc.GenreCount = mgc.MaxGenreCount

    order by gc.Region;

quit;

proc print data=top_genres;run;

 Output:

Obs Region ContentGenre GenreCount
1 Africa Documentary 1
2 Africa Drama 1
3 Asia Comedy 1
4 Asia Sci-Fi 1
5 Europe Action 2
6 North America Documentary 1
7 North America Comedy 1
8 North America Sci-Fi 1
9 North America Drama 1
10 Oceania Sci-Fi 3
11 South America Sci-Fi 1
12 South America Drama 1

/*Macro to Generate Genre Popularity Report for a Given Region*/

%macro genre_report(region);

    proc sql;

        title "Top Genres in &region";

        select ContentGenre, count(*) as Count

        from streaming_data

        where Region = "&region"

        group by ContentGenre

        order by Count desc;

    quit;

    title;

%mend genre_report;


%genre_report(Europe);

 Output:

                                                                 Top Genres in Europe

ContentGenre Count
Action 2
Sci-Fi 1
Drama 1

/*Macro to Compare Watch Time Across Subscription Types*/

%macro compare_watchtime;

    proc sgplot data=streaming_data;

        vbox TotalWatchTime / category=SubscriptionType;

        title "Watch Time Distribution by Subscription Type";

    run;

%mend compare_watchtime;


%compare_watchtime;

Log:

NOTE: PROCEDURE SGPLOT used (Total process time):

      real time           2.93 seconds

      cpu time            0.60 seconds


NOTE: Listing image output written to SGPlot1.png.

NOTE: There were 20 observations read from the data set WORK.STREAMING_DATA.


/*Watch Time Distribution*/

proc sgplot data=streaming_data;

    histogram TotalWatchTime;

    density TotalWatchTime / type=normal;

    title "Distribution of Total Watch Time";

run;

Log:

NOTE: PROCEDURE SGPLOT used (Total process time):

      real time           0.76 seconds

      cpu time            0.12 seconds


NOTE: Listing image output written to SGPlot3.png.

NOTE: There were 20 observations read from the data set WORK.STREAMING_DATA.


/*Monthly Spend by Subscription Type*/

proc sgplot data=streaming_data;

    vbar SubscriptionType / response=MonthlySpend stat=mean;

    title "Average Monthly Spend by Subscription Type";

run;

Log:
NOTE: PROCEDURE SGPLOT used (Total process time):
      real time           0.48 seconds
      cpu time            0.07 seconds

NOTE: Listing image output written to SGPlot5.png.
NOTE: There were 20 observations read from the data set WORK.STREAMING_DATA.


PRACTICE AND COMMENT YOUR CODE: 

-->PLEASE FOLLOW OUR BLOG FOR MORE UPDATES.

PLEASE FOLLOW OUR TELEGRAM CHANNEL CLICK HERE

PLEASE FOLLOW OUR FACEBOOK PAGE  CLICK HERE

Comments