214.ANALYZING TOP YOUTUBE VIDEOS SINCE LAUNCH BASED ON VIEWS LIKES GENRE ARTIST COUNTRY USING ONLY PROC SQL AND PROC MACRO IN SAS WITH DETAILED INSIGHTS
ANALYZING TOP YOUTUBE VIDEOS SINCE LAUNCH BASED ON VIEWS LIKES GENRE ARTIST COUNTRY USING ONLY PROC SQL AND PROC MACRO IN SAS WITH DETAILED INSIGHTS
/*Creating a unique dataset top YouTube videos globally */
Step 1: Creating the Dataset
data YouTube_Top_Videos;
infile datalines dsd truncover;
length Video_ID 8 Title $40 Artist $30 Country $20 Genre $20 Release_Year 4
Views_Million 8 Likes_Million 8 Language $15;
input Video_ID Title :$40. Artist :$30. Country :$20. Genre :$20. Release_Year
Views_Million Likes_Million Language :$15.;
datalines;
1,"Baby Shark Dance","Pinkfong","South_Korea","Kids",2016,14000,60,"Korean"
2,"Despacito","Luis_Fonsi","Puerto_Rico","Reggaeton",2017,9000,50,"Spanish"
3,"Johny Johny Yes Papa","LooLoo_Kids","Romania","Kids",2016,7000,35,"English"
4,"Bath Song","Cocomelon","USA","Kids",2018,6500,34,"English"
5,"Shape of You","Ed_Sheeran","UK","Pop",2017,6000,45,"English"
6,"See You Again","Wiz_Khalifa","USA","HipHop",2015,5800,42,"English"
7,"Phonics Song","ChuChu_TV","India","Kids",2014,5300,30,"English"
8,"Uptown Funk","Mark_Ronson","UK","Funk",2014,4500,40,"English"
9,"Gangnam Style","PSY","South_Korea","KPop",2012,4200,39,"Korean"
10,"Learning Colors","Miroshka_TV","Russia","Kids",2018,4100,28,"Russian"
11,"Sorry","Justin_Bieber","Canada","Pop",2015,3900,35,"English"
12,"Roar","Katy_Perry","USA","Pop",2013,3600,33,"English"
13,"Sugar","Maroon_5","USA","Pop",2015,3500,30,"English"
14,"Let Her Go","Passenger","UK","Indie",2012,3300,25,"English"
15,"Bailando","Enrique_Iglesias","Spain","Latin",2014,3100,26,"Spanish"
16,"Perfect","Ed_Sheeran","UK","Pop",2017,3000,27,"English"
;
run;
proc print;
run;
Output:
| Obs | Video_ID | Title | Artist | Country | Genre | Release_Year | Views_Million | Likes_Million | Language |
|---|---|---|---|---|---|---|---|---|---|
| 1 | 1 | Baby Shark Dance | Pinkfong | South_Korea | Kids | 2016 | 14000 | 60 | Korean |
| 2 | 2 | Despacito | Luis_Fonsi | Puerto_Rico | Reggaeton | 2017 | 9000 | 50 | Spanish |
| 3 | 3 | Johny Johny Yes Papa | LooLoo_Kids | Romania | Kids | 2016 | 7000 | 35 | English |
| 4 | 4 | Bath Song | Cocomelon | USA | Kids | 2018 | 6500 | 34 | English |
| 5 | 5 | Shape of You | Ed_Sheeran | UK | Pop | 2017 | 6000 | 45 | English |
| 6 | 6 | See You Again | Wiz_Khalifa | USA | HipHop | 2015 | 5800 | 42 | English |
| 7 | 7 | Phonics Song | ChuChu_TV | India | Kids | 2014 | 5300 | 30 | English |
| 8 | 8 | Uptown Funk | Mark_Ronson | UK | Funk | 2014 | 4500 | 40 | English |
| 9 | 9 | Gangnam Style | PSY | South_Korea | KPop | 2012 | 4200 | 39 | Korean |
| 10 | 10 | Learning Colors | Miroshka_TV | Russia | Kids | 2018 | 4100 | 28 | Russian |
| 11 | 11 | Sorry | Justin_Bieber | Canada | Pop | 2015 | 3900 | 35 | English |
| 12 | 12 | Roar | Katy_Perry | USA | Pop | 2013 | 3600 | 33 | English |
| 13 | 13 | Sugar | Maroon_5 | USA | Pop | 2015 | 3500 | 30 | English |
| 14 | 14 | Let Her Go | Passenger | UK | Indie | 2012 | 3300 | 25 | English |
| 15 | 15 | Bailando | Enrique_Iglesias | Spain | Latin | 2014 | 3100 | 26 | Spanish |
| 16 | 16 | Perfect | Ed_Sheeran | UK | Pop | 2017 | 3000 | 27 | English |
Step 2: Exploring Basic Data Using SQL
2.1: Top 5 Most Viewed Videos
proc sql outobs=5;
title "Top 5 Most Viewed YouTube Videos";
select Title, Artist, Views_Million
from YouTube_Top_Videos
order by Views_Million desc;
quit;
Output:
| Top 5 Most Viewed YouTube Videos |
| Title | Artist | Views_Million |
|---|---|---|
| Baby Shark Dance | Pinkfong | 14000 |
| Despacito | Luis_Fonsi | 9000 |
| Johny Johny Yes Papa | LooLoo_Kids | 7000 |
| Bath Song | Cocomelon | 6500 |
| Shape of You | Ed_Sheeran | 6000 |
2.2: Grouping Videos by Genre
proc sql;
title "Number of Videos by Genre";
select Genre, count(*) as Count
from YouTube_Top_Videos
group by Genre;
quit;
Output:
| Number of Videos by Genre |
| Genre | Count |
|---|---|
| Funk | 1 |
| HipHop | 1 |
| Indie | 1 |
| KPop | 1 |
| Kids | 5 |
| Latin | 1 |
| Pop | 5 |
| Reggaeton | 1 |
Step 3: Advanced SQL Analysis
3.1: Top Artists by Cumulative Views
proc sql;
title "Top Artists by Total Views";
select Artist, sum(Views_Million) as Total_Views
from YouTube_Top_Videos
group by Artist
order by Total_Views desc;
quit;
Output:
| Top Artists by Total Views |
| Artist | Total_Views |
|---|---|
| Pinkfong | 14000 |
| Ed_Sheeran | 9000 |
| Luis_Fonsi | 9000 |
| LooLoo_Kids | 7000 |
| Cocomelon | 6500 |
| Wiz_Khalifa | 5800 |
| ChuChu_TV | 5300 |
| Mark_Ronson | 4500 |
| PSY | 4200 |
| Miroshka_TV | 4100 |
| Justin_Bieber | 3900 |
| Katy_Perry | 3600 |
| Maroon_5 | 3500 |
| Passenger | 3300 |
| Enrique_Iglesias | 3100 |
3.2: Average Likes per Genre
proc sql;
title "Average Likes per Genre";
select Genre, avg(Likes_Million) as Avg_Likes
from YouTube_Top_Videos
group by Genre;
quit;
Output:
| Average Likes per Genre |
| Genre | Avg_Likes |
|---|---|
| Funk | 40 |
| HipHop | 42 |
| Indie | 25 |
| KPop | 39 |
| Kids | 37.4 |
| Latin | 26 |
| Pop | 34 |
| Reggaeton | 50 |
3.3: Videos Released After 2015 with Over 5B Views
proc sql;
title "Videos Released After 2015 With Over 5 Billion Views";
select Title, Artist, Release_Year, Views_Million
from YouTube_Top_Videos
where Release_Year > 2015 and Views_Million > 5000;
quit;
Output:
| Videos Released After 2015 With Over 5 Billion Views |
| Title | Artist | Release_Year | Views_Million |
|---|---|---|---|
| Baby Shark Dance | Pinkfong | 2016 | 14000 |
| Despacito | Luis_Fonsi | 2017 | 9000 |
| Johny Johny Yes Papa | LooLoo_Kids | 2016 | 7000 |
| Bath Song | Cocomelon | 2018 | 6500 |
| Shape of You | Ed_Sheeran | 2017 | 6000 |
Step 4: Macro Programming for Automation
4.1: Macro for Listing Top Videos by Country
%macro top_country_videos(ctry);
proc sql;
title "Top Videos from &ctry";
select Title, Artist, Views_Million
from YouTube_Top_Videos
where Country = "&ctry"
order by Views_Million desc;
quit;
%mend;
%top_country_videos(UK);
Output:
| Top Videos from UK |
| Title | Artist | Views_Million |
|---|---|---|
| Shape of You | Ed_Sheeran | 6000 |
| Uptown Funk | Mark_Ronson | 4500 |
| Let Her Go | Passenger | 3300 |
| Perfect | Ed_Sheeran | 3000 |
%top_country_videos(USA);
Output:
| Top Videos from USA |
| Title | Artist | Views_Million |
|---|---|---|
| Bath Song | Cocomelon | 6500 |
| See You Again | Wiz_Khalifa | 5800 |
| Roar | Katy_Perry | 3600 |
| Sugar | Maroon_5 | 3500 |
%top_country_videos(India);
Output:
| Top Videos from India |
| Title | Artist | Views_Million |
|---|---|---|
| Phonics Song | ChuChu_TV | 5300 |
4.2: Macro to Filter Videos Based on Language and Genre
%macro filter_language_genre(lang, gen);
proc sql;
title "Videos in &lang Language and &gen Genre";
select Title, Artist, Country, Views_Million, Likes_Million
from YouTube_Top_Videos
where Language = "&lang" and Genre = "&gen"
order by Views_Million desc;
quit;
%mend;
%filter_language_genre(English, Pop);
Output:
| Videos in English Language and Pop Genre |
| Title | Artist | Country | Views_Million | Likes_Million |
|---|---|---|---|---|
| Shape of You | Ed_Sheeran | UK | 6000 | 45 |
| Sorry | Justin_Bieber | Canada | 3900 | 35 |
| Roar | Katy_Perry | USA | 3600 | 33 |
| Sugar | Maroon_5 | USA | 3500 | 30 |
| Perfect | Ed_Sheeran | UK | 3000 | 27 |
%filter_language_genre(English, Kids);
Output:
| Videos in English Language and Kids Genre |
| Title | Artist | Country | Views_Million | Likes_Million |
|---|---|---|---|---|
| Johny Johny Yes Papa | LooLoo_Kids | Romania | 7000 | 35 |
| Bath Song | Cocomelon | USA | 6500 | 34 |
| Phonics Song | ChuChu_TV | India | 5300 | 30 |
Step 5: Data Summary Views
5.1: Overall View and Like Stats
proc sql;
title "Overall View and Like Statistics";
select
count(*) as Total_Videos,
avg(Views_Million) as Avg_Views,
avg(Likes_Million) as Avg_Likes,
max(Views_Million) as Max_Views,
min(Views_Million) as Min_Views
from YouTube_Top_Videos;
quit;
Output:
| Overall View and Like Statistics |
| Total_Videos | Avg_Views | Avg_Likes | Max_Views | Min_Views |
|---|---|---|---|---|
| 16 | 5425 | 36.1875 | 14000 | 3000 |
5.2: Most Liked Video per Genre
proc sql;
title "Most Liked Video per Genre";
select a.Genre, a.Title, a.Artist, a.Likes_Million
from YouTube_Top_Videos a
inner join (
select Genre, max(Likes_Million) as Max_Likes
from YouTube_Top_Videos
group by Genre
) b
on a.Genre = b.Genre and a.Likes_Million = b.Max_Likes;
quit;
Output:
| Most Liked Video per Genre |
| Genre | Title | Artist | Likes_Million |
|---|---|---|---|
| Kids | Baby Shark Dance | Pinkfong | 60 |
| Reggaeton | Despacito | Luis_Fonsi | 50 |
| Pop | Shape of You | Ed_Sheeran | 45 |
| HipHop | See You Again | Wiz_Khalifa | 42 |
| Funk | Uptown Funk | Mark_Ronson | 40 |
| KPop | Gangnam Style | PSY | 39 |
| Indie | Let Her Go | Passenger | 25 |
| Latin | Bailando | Enrique_Iglesias | 26 |
To Visit My Previous Sas Project 11-20 Questions:Click HereTo Visit My Previous %Let %Put %Eval In Sas:Click HereTo Visit My Previous Creating Assessing Cleaning Manipulation In Sas:Click HereTo Visit My Previous Proc Contents In Sas:Click Here--->PLEASE FOLLOW OUR BLOG FOR MORE INFORMATION.--->PLEASE DO COMMENTS AND SHARE OUR BLOG.PLEASE FOLLOW OUR INSTAGRAM PAGE CLICK HERE
Comments
Post a Comment