STEP-BY-STEP YOUTUBE CHANNEL ANALYSIS WITH PROCS | PROC PRINT | PROC FREQ | PROC MEANS | PROC SQL | PROC SORT | TURBOCHARGE YOUR SAS SKILLS
/*Creating a dataset for the top 10 Indian YouTube channels */
1. Create the dataset with at least 10 Indian YouTube channels
options nocenter;
data yt_top10_india;
length Channel_Name $40 Category $25;
input Rank Channel_Name $ 1-32 Subscribers_M Videos Category $ 34-60;
datalines;
1 T-Series 295 23624 Music_Entertainment
2 SET_India 184 155432 Entertainment
3 Zee_Music_Company 117 14265 Music
4 Goldmines 105 10691 Movies_Entertainment
5 Sony_SAB 101 99431 Comedy_Entertainment
6 ChuChu_TV_Nursery_Rhymes 96.4 874 Kids_Education
7 Zee_TV 90.8 210810 Entertainment
8 Colors_TV 80.4 254335 Entertainment
9 T-Series_Bhakti_Sagar 76.5 30162 Devotional_Music
10 Tips_Official 75.9 7934 Music
;
run;
proc print data=yt_top10_india;
title "Top 10 Indian YouTube Channels (2025)";
run;
Output:
| Obs | Channel_Name | Category | Rank | Subscribers_M | Videos |
|---|---|---|---|---|---|
| 1 | T-Series | Music_Entertainment | 1 | 295.0 | 23624 |
| 2 | SET_India | Entertainment | 2 | 184.0 | 155432 |
| 3 | Zee_Music_Company | Music | 3 | 117.0 | 14265 |
| 4 | Goldmines | Movies_Entertainment | 4 | 105.0 | 10691 |
| 5 | Sony_SAB | Comedy_Entertainment | 5 | 101.0 | 99431 |
| 6 | ChuChu_TV_Nursery_Rhymes | Kids_Education | 6 | 96.4 | 874 |
| 7 | Zee_TV | Entertainment | 7 | 90.8 | 210810 |
| 8 | Colors_TV | Entertainment | 8 | 80.4 | 254335 |
| 9 | T-Series_Bhakti_Sagar | Devotional_Music | 9 | 76.5 | 30162 |
| 10 | Tips_Official | Music | 10 | 75.9 | 7934 |
2. PROC MEANS: Descriptive Statistics
proc means data=yt_top10_india mean min max std n;
var Subscribers_M Videos;
title "Subscribers & Video Statistics";
run;
Output:
The MEANS Procedure
| Variable | Mean | Minimum | Maximum | Std Dev | N |
|---|---|---|---|---|---|
Subscribers_M Videos | 122.2000000 80755.80 | 75.9000000 874.0000000 | 295.0000000 254335.00 | 68.3800003 94167.18 | 10 10 |
3. PROC SORT: Sort channels by Subscribers in descending order
proc sort data=yt_top10_india out=sorted_channels;
by descending Subscribers_M;
run;
proc print data=sorted_channels;
run;
Output:
| Obs | Channel_Name | Category | Rank | Subscribers_M | Videos |
|---|---|---|---|---|---|
| 1 | T-Series | Music_Entertainment | 1 | 295.0 | 23624 |
| 2 | SET_India | Entertainment | 2 | 184.0 | 155432 |
| 3 | Zee_Music_Company | Music | 3 | 117.0 | 14265 |
| 4 | Goldmines | Movies_Entertainment | 4 | 105.0 | 10691 |
| 5 | Sony_SAB | Comedy_Entertainment | 5 | 101.0 | 99431 |
| 6 | ChuChu_TV_Nursery_Rhymes | Kids_Education | 6 | 96.4 | 874 |
| 7 | Zee_TV | Entertainment | 7 | 90.8 | 210810 |
| 8 | Colors_TV | Entertainment | 8 | 80.4 | 254335 |
| 9 | T-Series_Bhakti_Sagar | Devotional_Music | 9 | 76.5 | 30162 |
| 10 | Tips_Official | Music | 10 | 75.9 | 7934 |
4. PROC FREQ: Frequency for Category variable
proc freq data=yt_top10_india;
tables Category / nocum nopercent;
title "Frequency of Channel Category";
run;
Output:
The FREQ Procedure
| Category | Frequency |
|---|---|
| Comedy_Entertainment | 1 |
| Devotional_Music | 1 |
| Entertainment | 3 |
| Kids_Education | 1 |
| Movies_Entertainment | 1 |
| Music | 2 |
| Music_Entertainment | 1 |
5. PROC SQL: Query channels with over 100 million subscribers
proc sql;
title "Channels with 100M+ Subscribers";
select Channel_Name, Subscribers_M
from yt_top10_india
where Subscribers_M > 100;
quit;
Output:
| Channel_Name | Subscribers_M |
|---|---|
| T-Series | 295 |
| SET_India | 184 |
| Zee_Music_Company | 117 |
| Goldmines | 105 |
| Sony_SAB | 101 |
6. PROC SQL: Aggregation by Category
proc sql;
title "Average Subscribers by Channel Category";
select Category,
count(*) as Channel_Count,
mean(Subscribers_M) as Avg_Subscribers format=8.2
from yt_top10_india
group by Category;
quit;
Output:
| Category | Channel_Count | Avg_Subscribers |
|---|---|---|
| Comedy_Entertainment | 1 | 101.00 |
| Devotional_Music | 1 | 76.50 |
| Entertainment | 3 | 118.40 |
| Kids_Education | 1 | 96.40 |
| Movies_Entertainment | 1 | 105.00 |
| Music | 2 | 96.45 |
| Music_Entertainment | 1 | 295.00 |
7. PROC SQL: Limit (top 5 by videos) using OUTOBS=
proc sql outobs=5;
title "Top 5 Channels by Number of Videos";
select Channel_Name, Videos
from yt_top10_india
order by Videos desc;
quit;
Output:
| Channel_Name | Videos |
|---|---|
| Colors_TV | 254335 |
| Zee_TV | 210810 |
| SET_India | 155432 |
| Sony_SAB | 99431 |
| T-Series_Bhakti_Sagar | 30162 |
8. Macro: Print Top N Channels (by Subscribers)
%macro print_top_channels(n);
proc sql outobs=&n;
title "Top &n YouTube Channels by Subscribers";
select *
from yt_top10_india
order by Subscribers_M desc;
quit;
%mend print_top_channels;
%print_top_channels(5);
Output:
| Channel_Name | Category | Rank | Subscribers_M | Videos |
|---|---|---|---|---|
| T-Series | Music_Entertainment | 1 | 295 | 23624 |
| SET_India | Entertainment | 2 | 184 | 155432 |
| Zee_Music_Company | Music | 3 | 117 | 14265 |
| Goldmines | Movies_Entertainment | 4 | 105 | 10691 |
| Sony_SAB | Comedy_Entertainment | 5 | 101 | 99431 |
9. Macro: Loop Over Categories and Print Channels by Category
%macro analyze_by_category;
Gather unique categories into macro variables
proc sql noprint;
select distinct Category into :cat1-:cat10
from yt_top10_india;
%let catcount=&sqlobs;
quit;
%do i=1 %to &catcount;
%let currcat=&&cat&i;
proc print data=yt_top10_india noobs;
where Category="&currcat";
title "YouTube Channels in Category: &currcat";
run;
%end;
%mend analyze_by_category;
%analyze_by_category
Output:
| Channel_Name | Category | Rank | Subscribers_M | Videos |
|---|---|---|---|---|
| Sony_SAB | Comedy_Entertainment | 5 | 101 | 99431 |
| Channel_Name | Category | Rank | Subscribers_M | Videos |
|---|---|---|---|---|
| T-Series_Bhakti_Sagar | Devotional_Music | 9 | 76.5 | 30162 |
| Channel_Name | Category | Rank | Subscribers_M | Videos |
|---|---|---|---|---|
| SET_India | Entertainment | 2 | 184.0 | 155432 |
| Zee_TV | Entertainment | 7 | 90.8 | 210810 |
| Colors_TV | Entertainment | 8 | 80.4 | 254335 |
| Channel_Name | Category | Rank | Subscribers_M | Videos |
|---|---|---|---|---|
| ChuChu_TV_Nursery_Rhymes | Kids_Education | 6 | 96.4 | 874 |
| Channel_Name | Category | Rank | Subscribers_M | Videos |
|---|---|---|---|---|
| Goldmines | Movies_Entertainment | 4 | 105 | 10691 |
| Channel_Name | Category | Rank | Subscribers_M | Videos |
|---|---|---|---|---|
| Zee_Music_Company | Music | 3 | 117.0 | 14265 |
| Tips_Official | Music | 10 | 75.9 | 7934 |
| Channel_Name | Category | Rank | Subscribers_M | Videos |
|---|---|---|---|---|
| T-Series | Music_Entertainment | 1 | 295 | 23624 |
10. Advanced - Calculate and display percent share of total subscribers per channel
proc sql noprint;
select sum(Subscribers_M) into :total_subs from yt_top10_india;
quit;
proc sql;
title "Share of Subscribers by Channel";
select Channel_Name,
Subscribers_M,
(Subscribers_M/&total_subs)*100 as Percent_Share format=6.2
from yt_top10_india
order by Percent_Share desc;
quit;
Output:
| Channel_Name | Subscribers_M | Percent_Share |
|---|---|---|
| T-Series | 295 | 24.14 |
| SET_India | 184 | 15.06 |
| Zee_Music_Company | 117 | 9.57 |
| Goldmines | 105 | 8.59 |
| Sony_SAB | 101 | 8.27 |
| ChuChu_TV_Nursery_Rhymes | 96.4 | 7.89 |
| Zee_TV | 90.8 | 7.43 |
| Colors_TV | 80.4 | 6.58 |
| T-Series_Bhakti_Sagar | 76.5 | 6.26 |
| Tips_Official | 75.9 | 6.21 |
11. Data step + procs: Mark all music-related channels and analyze them
data music_channels;
set yt_top10_india;
if index(Category,'Music') > 0 then MusicFlag = 1;
else MusicFlag = 0;
run;
proc print data=music_channels;
title "Music-related Channels Flagged";
run;
Output:
| Obs | Channel_Name | Category | Rank | Subscribers_M | Videos | MusicFlag |
|---|---|---|---|---|---|---|
| 1 | T-Series | Music_Entertainment | 1 | 295.0 | 23624 | 1 |
| 2 | SET_India | Entertainment | 2 | 184.0 | 155432 | 0 |
| 3 | Zee_Music_Company | Music | 3 | 117.0 | 14265 | 1 |
| 4 | Goldmines | Movies_Entertainment | 4 | 105.0 | 10691 | 0 |
| 5 | Sony_SAB | Comedy_Entertainment | 5 | 101.0 | 99431 | 0 |
| 6 | ChuChu_TV_Nursery_Rhymes | Kids_Education | 6 | 96.4 | 874 | 0 |
| 7 | Zee_TV | Entertainment | 7 | 90.8 | 210810 | 0 |
| 8 | Colors_TV | Entertainment | 8 | 80.4 | 254335 | 0 |
| 9 | T-Series_Bhakti_Sagar | Devotional_Music | 9 | 76.5 | 30162 | 1 |
| 10 | Tips_Official | Music | 10 | 75.9 | 7934 | 1 |
proc means data=music_channels mean min max;
title "Summary Statistics for Music Channels";
where MusicFlag=1;
var Subscribers_M;
run;
Output:
The MEANS Procedure
| Analysis Variable : Subscribers_M | ||
|---|---|---|
| Mean | Minimum | Maximum |
| 141.1000000 | 75.9000000 | 295.0000000 |
proc sql;
title "Music Channels, Descending by Subscribers";
select Channel_Name, Subscribers_M
from music_channels
where MusicFlag=1
order by Subscribers_M desc;
quit;
Output:
| Channel_Name | Subscribers_M |
|---|---|
| T-Series | 295 |
| Zee_Music_Company | 117 |
| T-Series_Bhakti_Sagar | 76.5 |
| Tips_Official | 75.9 |
No comments:
Post a Comment