189.MOVIE DATA ANALYSIS USING PROC SQL | PRINT | MEANS | FREQ | SGPLOT | SORT | RANK | REPORT | MACROS | GENRES | REVIEWS | EARNINGS | FORMAT | CONTENTS | IMPORT | EXPORT | APPEND | COMPARE | TRANSPOSE | AUTOMATION
- Get link
- X
- Other Apps
MOVIE DATA ANALYSIS USING PROC SQL | PRINT | MEANS | FREQ | SGPLOT | SORT | RANK | REPORT | MACROS | GENRES | REVIEWS | EARNINGS | FORMAT | CONTENTS | IMPORT | EXPORT | APPEND | COMPARE | TRANSPOSE | AUTOMATION
Step 1: Creating the Datasets in SAS
data movies;
infile datalines dsd truncover;
input Movie_ID :$5. Title :$50. Release_Date :date9. Director :$30. Language :$15. Duration_Min Certificate :$5.;
format Release_Date date9.;
datalines;
M001,The Last Ember,14JUL2022,Lara Quintana,English,130,PG-13
M002,Midnight Sonata,20NOV2021,Hiro Tanaka,Japanese,118,R
M003,Desert Rose,10MAR2020,Anjali Mehra,Hindi,142,U/A
M004,Galactic Trials,01JAN2023,Tom Eckhart,English,150,PG
M005,Silent Whispers,25DEC2022,Elena Costa,Spanish,95,R
M006,Shadows of Yore,18JUN2019,Marco Valli,Italian,105,PG
;
run;
proc print;run;
Output:
Obs | Movie_ID | Title | Release_Date | Director | Language | Duration_Min | Certificate |
---|---|---|---|---|---|---|---|
1 | M001 | The Last Ember | 14JUL2022 | Lara Quintana | English | 130 | PG-13 |
2 | M002 | Midnight Sonata | 20NOV2021 | Hiro Tanaka | Japanese | 118 | R |
3 | M003 | Desert Rose | 10MAR2020 | Anjali Mehra | Hindi | 142 | U/A |
4 | M004 | Galactic Trials | 01JAN2023 | Tom Eckhart | English | 150 | PG |
5 | M005 | Silent Whispers | 25DEC2022 | Elena Costa | Spanish | 95 | R |
6 | M006 | Shadows of Yore | 18JUN2019 | Marco Valli | Italian | 105 | PG |
data genres;
input Genre_ID $ Genre_Name $20.;
datalines;
G01 Action
G02 Drama
G03 Romance
G04 SciFi
G05 Thriller
G06 Fantasy
;
run;
proc print;run;
Output:
Obs | Genre_ID | Genre_Name |
---|---|---|
1 | G01 | Action |
2 | G02 | Drama |
3 | G03 | Romance |
4 | G04 | SciFi |
5 | G05 | Thriller |
6 | G06 | Fantasy |
data movie_genres;
input Movie_ID $ Genre_ID $;
datalines;
M001 G01
M001 G04
M002 G03
M002 G02
M003 G02
M003 G05
M004 G04
M004 G06
M005 G05
M006 G02
M006 G06
;
run;
proc print;run;
Output:
Obs | Movie_ID | Genre_ID |
---|---|---|
1 | M001 | G01 |
2 | M001 | G04 |
3 | M002 | G03 |
4 | M002 | G02 |
5 | M003 | G02 |
6 | M003 | G05 |
7 | M004 | G04 |
8 | M004 | G06 |
9 | M005 | G05 |
10 | M006 | G02 |
11 | M006 | G06 |
data movie_reviews;
input Movie_ID $ Critic_Score Audience_Score;
datalines;
M001 85 88
M002 91 80
M003 78 83
M004 88 92
M005 72 60
M006 81 85
;
run;
proc print;run;
Output:
Obs | Movie_ID | Critic_Score | Audience_Score |
---|---|---|---|
1 | M001 | 85 | 88 |
2 | M002 | 91 | 80 |
3 | M003 | 78 | 83 |
4 | M004 | 88 | 92 |
5 | M005 | 72 | 60 |
6 | M006 | 81 | 85 |
data movie_earnings;
input Movie_ID $ Domestic_M Gross_International_M;
Total_Earnings_M = sum(Domestic_M, Gross_International_M);
datalines;
M001 120 140
M002 50 60
M003 30 25
M004 200 250
M005 15 10
M006 40 55
;
run;
proc print;run;
Output:
Obs | Movie_ID | Domestic_M | Gross_International_M | Total_Earnings_M |
---|---|---|---|---|
1 | M001 | 120 | 140 | 260 |
2 | M002 | 50 | 60 | 110 |
3 | M003 | 30 | 25 | 55 |
4 | M004 | 200 | 250 | 450 |
5 | M005 | 15 | 10 | 25 |
6 | M006 | 40 | 55 | 95 |
Step 2: Using PROC SQL for Joining Datasets
proc sql;
create table movie_summary as
select a.Movie_ID, Title, Release_Date, Director, Language, Duration_Min, Certificate,
Critic_Score, Audience_Score, Domestic_M, Gross_International_M, Total_Earnings_M
from movies as a
left join movie_reviews as b on a.Movie_ID = b.Movie_ID
left join movie_earnings as c on a.Movie_ID = c.Movie_ID;
quit;
Step 3: Using PROC PRINT and PROC MEANS for Summary
Show the Movie Summary Table
proc print data=movie_summary noobs label;
title "Movie Summary Report";
run;
Output:
Movie Summary Report |
Movie_ID | Title | Release_Date | Director | Language | Duration_Min | Certificate | Critic_Score | Audience_Score | Domestic_M | Gross_International_M | Total_Earnings_M |
---|---|---|---|---|---|---|---|---|---|---|---|
M001 | The Last Ember | 14JUL2022 | Lara Quintana | English | 130 | PG-13 | 85 | 88 | 120 | 140 | 260 |
M002 | Midnight Sonata | 20NOV2021 | Hiro Tanaka | Japanese | 118 | R | 91 | 80 | 50 | 60 | 110 |
M003 | Desert Rose | 10MAR2020 | Anjali Mehra | Hindi | 142 | U/A | 78 | 83 | 30 | 25 | 55 |
M004 | Galactic Trials | 01JAN2023 | Tom Eckhart | English | 150 | PG | 88 | 92 | 200 | 250 | 450 |
M005 | Silent Whispers | 25DEC2022 | Elena Costa | Spanish | 95 | R | 72 | 60 | 15 | 10 | 25 |
M006 | Shadows of Yore | 18JUN2019 | Marco Valli | Italian | 105 | PG | 81 | 85 | 40 | 55 | 95 |
Summary Statistics on Earnings
proc means data=movie_summary mean min max maxdec=1;
var Domestic_M Gross_International_M Total_Earnings_M;
title "Box Office Earnings Summary";
run;
Output:
Box Office Earnings Summary |
Variable | Mean | Minimum | Maximum | ||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
|
|
Step 4: Frequency Analysis with PROC FREQ
proc freq data=movies;
tables Certificate Language / nocum;
title "Frequency of Movie Certificates and Languages";
run;
Output:
Frequency of Movie Certificates and
Languages |
Certificate | Frequency | Percent |
---|---|---|
PG | 2 | 33.33 |
PG-13 | 1 | 16.67 |
R | 2 | 33.33 |
U/A | 1 | 16.67 |
Language | Frequency | Percent |
---|---|---|
English | 2 | 33.33 |
Hindi | 1 | 16.67 |
Italian | 1 | 16.67 |
Japanese | 1 | 16.67 |
Spanish | 1 | 16.67 |
Step 5: PROC GCHART (or SGPLOT) for Visualization
Plot Critic vs Audience Scores
proc sgplot data=movie_reviews;
title "🌟 Critic vs Audience Scores";
scatter x=Critic_Score y=Audience_Score / datalabel=Movie_ID;
run;
Log:
NOTE: PROCEDURE SGPLOT used (Total process time):
real time 3.21 seconds
cpu time 0.48 seconds
NOTE: Listing image output written to SGPlot1.png.
NOTE: There were 6 observations read from the data set WORK.MOVIE_REVIEWS.
Step 6: PROC SQL – Multi-genre Movie Listing
proc sql;
select a.Title, b.Genre_Name
from movie_genres as mg
inner join movies as a on mg.Movie_ID = a.Movie_ID
inner join genres as b on mg.Genre_ID = b.Genre_ID
order by a.Title;
quit;
Output:
Title | Genre_Name |
---|---|
Desert Rose | Thriller |
Desert Rose | Drama |
Galactic Trials | SciFi |
Galactic Trials | Fantasy |
Midnight Sonata | Romance |
Midnight Sonata | Drama |
Shadows of Yore | Drama |
Shadows of Yore | Fantasy |
Silent Whispers | Thriller |
The Last Ember | Action |
The Last Ember | SciFi |
Step 7: Using SAS MACRO for Automated Reports
Define a Macro to Generate Summary for a Given Movie ID
%macro movie_report(mid);
%put Generating Report for Movie ID: ∣
proc sql;
select Title, Release_Date, Director, Language, Duration_Min
into :title, :rdate, :director, :lang, :dur
from movies
where Movie_ID = "&mid";
quit;
%put Title = &title;
%put Release Date = &rdate;
%put Director = &director;
proc sql;
select Critic_Score, Audience_Score
into :cs, :as
from movie_reviews
where Movie_ID = "&mid";
quit;
proc sql;
select Domestic_M, Gross_International_M, Total_Earnings_M
into :dm, :gi, :total
from movie_earnings
where Movie_ID = "&mid";
quit;
%put Critic Score = &cs;
%put Audience Score = &as;
%put Total Earnings = &total M;
title "📽️ Report for &title (&mid)";
proc print data=movie_summary(where=(Movie_ID="&mid")) noobs;
run;
%mend movie_report;
%movie_report(M004)
Output:
Report for Galactic Trials (M004) |
Title | Release_Date | Director | Language | Duration_Min |
---|---|---|---|---|
Galactic Trials | 01JAN2023 | Tom Eckhart | English | 150 |
Report for Galactic Trials (M004) |
Critic_Score | Audience_Score |
---|---|
88 | 92 |
Report for Galactic Trials (M004) |
Domestic_M | Gross_International_M | Total_Earnings_M |
---|---|---|
200 | 250 | 450 |
Report for Galactic Trials (M004) |
Movie_ID | Title | Release_Date | Director | Language | Duration_Min | Certificate | Critic_Score | Audience_Score | Domestic_M | Gross_International_M | Total_Earnings_M |
---|---|---|---|---|---|---|---|---|---|---|---|
M004 | Galactic Trials | 01JAN2023 | Tom Eckhart | English | 150 | PG | 88 | 92 | 200 | 250 | 450 |
Step 8: Advanced Filtering with SQL
List movies with:
Critic Score > 80
Total Earnings > 100 Million
proc sql;
select Title, Critic_Score, Total_Earnings_M
from movie_summary
where Critic_Score > 80 and Total_Earnings_M > 100
order by Total_Earnings_M desc;
quit;
Output:
Title | Critic_Score | Total_Earnings_M |
---|---|---|
Galactic Trials | 88 | 450 |
The Last Ember | 85 | 260 |
Midnight Sonata | 91 | 110 |
Step 9: PROC SORT + PROC RANK – Top Performers
Ranking Movies by Total Earnings
proc sort data=movie_summary out=sorted_movies;
by descending Total_Earnings_M;
run;
proc rank data=sorted_movies out=ranked_movies ties=low descending;
var Total_Earnings_M;
ranks Earnings_Rank;
run;
Output:
proc print data=ranked_movies noobs;
var Movie_ID Title Total_Earnings_M Earnings_Rank;
title "Ranked List of Movies by Earnings";
run;
Output:
Ranked List of Movies by Earnings |
Movie_ID | Title | Total_Earnings_M | Earnings_Rank |
---|---|---|---|
M004 | Galactic Trials | 450 | 1 |
M001 | The Last Ember | 260 | 2 |
M002 | Midnight Sonata | 110 | 3 |
M006 | Shadows of Yore | 95 | 4 |
M003 | Desert Rose | 55 | 5 |
M005 | Silent Whispers | 25 | 6 |
Step 10: Generate Genre-wise Summary using PROC SQL and GROUP BY
proc sql;
create table genre_summary as
select b.Genre_Name, count(a.Movie_ID) as Movie_Count
from movie_genres as a
inner join genres as b on a.Genre_ID = b.Genre_ID
group by b.Genre_Name;
quit;
proc print data=genre_summary;
title "Genre-Wise Movie Count";
run;
Genre-Wise Movie Count |
Obs | Genre_Name | Movie_Count |
---|---|---|
1 | Action | 1 |
2 | Drama | 3 |
3 | Fantasy | 2 |
4 | Romance | 1 |
5 | SciFi | 2 |
6 | Thriller | 2 |
- Get link
- X
- Other Apps
Comments
Post a Comment