MOVIE INDUSTRY DATA ANALYSIS USING SAS | PROFITABILITY INSIGHTS | STATISTICAL EVALUATION | VISUAL ANALYTICS | PROC SQL | PROC MEANS | PROC CORR | PROC UNIVARIATE | PROC SGPLOT | MACROS | DATE FUNCTIONS
options nocenter;
1.MOVIE DATASET CREATION
data movies;
length Movie_Name $25 Genre $15;
format Release_Date date9.;
input Movie_Name $ Genre $ Budget Revenue IMDb_Rating Release_Date :date9.;
Year = year(Release_Date);
datalines;
Inception SciFi 160 829 8.8 16JUL2010
Titanic Romance 200 2200 7.9 19DEC1997
Avatar SciFi 237 2920 7.8 18DEC2009
Joker Drama 55 1074 8.4 04OCT2019
Interstellar SciFi 165 677 8.6 07NOV2014
Gladiator Action 103 460 8.5 05MAY2000
Matrix SciFi 63 466 8.7 31MAR1999
Frozen Animation 150 1280 7.4 27NOV2013
Avengers Action 220 1519 8.0 04MAY2012
Parasite Thriller 11 258 8.6 30MAY2019
Dangal Sports 70 300 8.4 23DEC2016
RRR Action 72 160 8.0 25MAR2022
;
run;
proc print data=movies;
run;
OUTPUT:
| Obs | Movie_Name | Genre | Release_Date | Budget | Revenue | IMDb_Rating | Year |
|---|---|---|---|---|---|---|---|
| 1 | Inception | SciFi | 16JUL2010 | 160 | 829 | 8.8 | 2010 |
| 2 | Titanic | Romance | 19DEC1997 | 200 | 2200 | 7.9 | 1997 |
| 3 | Avatar | SciFi | 18DEC2009 | 237 | 2920 | 7.8 | 2009 |
| 4 | Joker | Drama | 04OCT2019 | 55 | 1074 | 8.4 | 2019 |
| 5 | Interstellar | SciFi | 07NOV2014 | 165 | 677 | 8.6 | 2014 |
| 6 | Gladiator | Action | 05MAY2000 | 103 | 460 | 8.5 | 2000 |
| 7 | Matrix | SciFi | 31MAR1999 | 63 | 466 | 8.7 | 1999 |
| 8 | Frozen | Animation | 27NOV2013 | 150 | 1280 | 7.4 | 2013 |
| 9 | Avengers | Action | 04MAY2012 | 220 | 1519 | 8.0 | 2012 |
| 10 | Parasite | Thriller | 30MAY2019 | 11 | 258 | 8.6 | 2019 |
| 11 | Dangal | Sports | 23DEC2016 | 70 | 300 | 8.4 | 2016 |
| 12 | RRR | Action | 25MAR2022 | 72 | 160 | 8.0 | 2022 |
2.PROFIT CALCULATION USING PROC SQL
proc sql;
create table movie_profit as
select *,
(Revenue - Budget) as Profit format=comma10.
from movies;
quit;
proc print data=movie_profit;
run;
OUTPUT:
| Obs | Movie_Name | Genre | Release_Date | Budget | Revenue | IMDb_Rating | Year | Profit |
|---|---|---|---|---|---|---|---|---|
| 1 | Inception | SciFi | 16JUL2010 | 160 | 829 | 8.8 | 2010 | 669 |
| 2 | Titanic | Romance | 19DEC1997 | 200 | 2200 | 7.9 | 1997 | 2,000 |
| 3 | Avatar | SciFi | 18DEC2009 | 237 | 2920 | 7.8 | 2009 | 2,683 |
| 4 | Joker | Drama | 04OCT2019 | 55 | 1074 | 8.4 | 2019 | 1,019 |
| 5 | Interstellar | SciFi | 07NOV2014 | 165 | 677 | 8.6 | 2014 | 512 |
| 6 | Gladiator | Action | 05MAY2000 | 103 | 460 | 8.5 | 2000 | 357 |
| 7 | Matrix | SciFi | 31MAR1999 | 63 | 466 | 8.7 | 1999 | 403 |
| 8 | Frozen | Animation | 27NOV2013 | 150 | 1280 | 7.4 | 2013 | 1,130 |
| 9 | Avengers | Action | 04MAY2012 | 220 | 1519 | 8.0 | 2012 | 1,299 |
| 10 | Parasite | Thriller | 30MAY2019 | 11 | 258 | 8.6 | 2019 | 247 |
| 11 | Dangal | Sports | 23DEC2016 | 70 | 300 | 8.4 | 2016 | 230 |
| 12 | RRR | Action | 25MAR2022 | 72 | 160 | 8.0 | 2022 | 88 |
3.DATE ANALYSIS USING INTNX AND INTCK
data movie_dates;
set movie_profit;
Years_Since_Release = intck('year', Release_Date, today());
Next_Anniversary = intnx('year', Release_Date, Years_Since_Release + 1, 'same');
format Next_Anniversary date9.;
run;
proc print data=movie_dates;
run;
OUTPUT:
| Obs | Movie_Name | Genre | Release_Date | Budget | Revenue | IMDb_Rating | Year | Profit | Years_Since_Release | Next_Anniversary |
|---|---|---|---|---|---|---|---|---|---|---|
| 1 | Inception | SciFi | 16JUL2010 | 160 | 829 | 8.8 | 2010 | 669 | 15 | 16JUL2026 |
| 2 | Titanic | Romance | 19DEC1997 | 200 | 2200 | 7.9 | 1997 | 2,000 | 28 | 19DEC2026 |
| 3 | Avatar | SciFi | 18DEC2009 | 237 | 2920 | 7.8 | 2009 | 2,683 | 16 | 18DEC2026 |
| 4 | Joker | Drama | 04OCT2019 | 55 | 1074 | 8.4 | 2019 | 1,019 | 6 | 04OCT2026 |
| 5 | Interstellar | SciFi | 07NOV2014 | 165 | 677 | 8.6 | 2014 | 512 | 11 | 07NOV2026 |
| 6 | Gladiator | Action | 05MAY2000 | 103 | 460 | 8.5 | 2000 | 357 | 25 | 05MAY2026 |
| 7 | Matrix | SciFi | 31MAR1999 | 63 | 466 | 8.7 | 1999 | 403 | 26 | 31MAR2026 |
| 8 | Frozen | Animation | 27NOV2013 | 150 | 1280 | 7.4 | 2013 | 1,130 | 12 | 27NOV2026 |
| 9 | Avengers | Action | 04MAY2012 | 220 | 1519 | 8.0 | 2012 | 1,299 | 13 | 04MAY2026 |
| 10 | Parasite | Thriller | 30MAY2019 | 11 | 258 | 8.6 | 2019 | 247 | 6 | 30MAY2026 |
| 11 | Dangal | Sports | 23DEC2016 | 70 | 300 | 8.4 | 2016 | 230 | 9 | 23DEC2026 |
| 12 | RRR | Action | 25MAR2022 | 72 | 160 | 8.0 | 2022 | 88 | 3 | 25MAR2026 |
4.MACRO FOR PROFIT CLASSIFICATION
%macro profit_flag(ds=, out=);
data &out;
set &ds;
length Profit_Category $15;
if Profit >= 500 then Profit_Category = 'BLOCKBUSTER';
else if Profit >= 100 then Profit_Category = 'HIT';
else if Profit > 0 then Profit_Category = 'AVERAGE';
else Profit_Category = 'FLOP';
run;
proc print data=&out;
run;
%mend;
%profit_flag(ds=movie_profit, out=movie_analysis);
OUTPUT:
| Obs | Movie_Name | Genre | Release_Date | Budget | Revenue | IMDb_Rating | Year | Profit | Profit_Category |
|---|---|---|---|---|---|---|---|---|---|
| 1 | Inception | SciFi | 16JUL2010 | 160 | 829 | 8.8 | 2010 | 669 | BLOCKBUSTER |
| 2 | Titanic | Romance | 19DEC1997 | 200 | 2200 | 7.9 | 1997 | 2,000 | BLOCKBUSTER |
| 3 | Avatar | SciFi | 18DEC2009 | 237 | 2920 | 7.8 | 2009 | 2,683 | BLOCKBUSTER |
| 4 | Joker | Drama | 04OCT2019 | 55 | 1074 | 8.4 | 2019 | 1,019 | BLOCKBUSTER |
| 5 | Interstellar | SciFi | 07NOV2014 | 165 | 677 | 8.6 | 2014 | 512 | BLOCKBUSTER |
| 6 | Gladiator | Action | 05MAY2000 | 103 | 460 | 8.5 | 2000 | 357 | HIT |
| 7 | Matrix | SciFi | 31MAR1999 | 63 | 466 | 8.7 | 1999 | 403 | HIT |
| 8 | Frozen | Animation | 27NOV2013 | 150 | 1280 | 7.4 | 2013 | 1,130 | BLOCKBUSTER |
| 9 | Avengers | Action | 04MAY2012 | 220 | 1519 | 8.0 | 2012 | 1,299 | BLOCKBUSTER |
| 10 | Parasite | Thriller | 30MAY2019 | 11 | 258 | 8.6 | 2019 | 247 | HIT |
| 11 | Dangal | Sports | 23DEC2016 | 70 | 300 | 8.4 | 2016 | 230 | HIT |
| 12 | RRR | Action | 25MAR2022 | 72 | 160 | 8.0 | 2022 | 88 | AVERAGE |
5.DESCRIPTIVE STATISTICS USING PROC MEANS
proc means data=movie_analysis mean min max;
var Budget Revenue Profit IMDb_Rating;
title "SUMMARY STATISTICS OF MOVIE FINANCIALS";
run;
OUTPUT:
The MEANS Procedure
| Variable | Mean | Minimum | Maximum |
|---|---|---|---|
Budget Revenue Profit IMDb_Rating | 125.5000000 1011.92 886.4166667 8.2583333 | 11.0000000 160.0000000 88.0000000 7.4000000 | 237.0000000 2920.00 2683.00 8.8000000 |
6.CORRELATION ANALYSIS USING PROC CORR
proc corr data=movie_analysis;
var Budget Revenue IMDb_Rating Profit;
title "CORRELATION ANALYSIS OF MOVIE VARIABLES";
run;
OUTPUT:
The CORR Procedure
| 4 Variables: | Budget Revenue IMDb_Rating Profit |
|---|
| Simple Statistics | ||||||
|---|---|---|---|---|---|---|
| Variable | N | Mean | Std Dev | Sum | Minimum | Maximum |
| Budget | 12 | 125.50000 | 73.04980 | 1506 | 11.00000 | 237.00000 |
| Revenue | 12 | 1012 | 849.30040 | 12143 | 160.00000 | 2920 |
| IMDb_Rating | 12 | 8.25833 | 0.42950 | 99.10000 | 7.40000 | 8.80000 |
| Profit | 12 | 886.41667 | 791.63541 | 10637 | 88.00000 | 2683 |
| Pearson Correlation Coefficients, N = 12 Prob > |r| under H0: Rho=0 | ||||
|---|---|---|---|---|
| Budget | Revenue | IMDb_Rating | Profit | |
| Budget | 1.00000 | 0.80560 0.0016 | -0.48461 0.1103 | 0.77201 0.0033 |
| Revenue | 0.80560 0.0016 | 1.00000 | -0.59288 0.0422 | 0.99850 <.0001 |
| IMDb_Rating | -0.48461 0.1103 | -0.59288 0.0422 | 1.00000 | -0.59135 0.0429 |
| Profit | 0.77201 0.0033 | 0.99850 <.0001 | -0.59135 0.0429 | 1.00000 |
7.DISTRIBUTION ANALYSIS USING PROC UNIVARIATE
proc univariate data=movie_analysis;
var Profit;
histogram Profit;
inset mean median std skewness;
title "PROFIT DISTRIBUTION ANALYSIS";
run;
OUTPUT:
The UNIVARIATE Procedure
Variable: Profit
| Moments | |||
|---|---|---|---|
| N | 12 | Sum Weights | 12 |
| Mean | 886.416667 | Sum Observations | 10637 |
| Std Deviation | 791.635414 | Variance | 626686.629 |
| Skewness | 1.2950494 | Kurtosis | 1.14365626 |
| Uncorrected SS | 16322367 | Corrected SS | 6893552.92 |
| Coeff Variation | 89.3073702 | Std Error Mean | 228.52546 |
| Basic Statistical Measures | |||
|---|---|---|---|
| Location | Variability | ||
| Mean | 886.4167 | Std Deviation | 791.63541 |
| Median | 590.5000 | Variance | 626687 |
| Mode | . | Range | 2595 |
| Interquartile Range | 912.50000 | ||
| Tests for Location: Mu0=0 | ||||
|---|---|---|---|---|
| Test | Statistic | p Value | ||
| Student's t | t | 3.878853 | Pr > |t| | 0.0026 |
| Sign | M | 6 | Pr >= |M| | 0.0005 |
| Signed Rank | S | 39 | Pr >= |S| | 0.0005 |
| Quantiles (Definition 5) | |
|---|---|
| Level | Quantile |
| 100% Max | 2683.0 |
| 99% | 2683.0 |
| 95% | 2683.0 |
| 90% | 2000.0 |
| 75% Q3 | 1214.5 |
| 50% Median | 590.5 |
| 25% Q1 | 302.0 |
| 10% | 230.0 |
| 5% | 88.0 |
| 1% | 88.0 |
| 0% Min | 88.0 |
| Extreme Observations | |||
|---|---|---|---|
| Lowest | Highest | ||
| Value | Obs | Value | Obs |
| 88 | 12 | 1019 | 4 |
| 230 | 11 | 1130 | 8 |
| 247 | 10 | 1299 | 9 |
| 357 | 6 | 2000 | 2 |
| 403 | 7 | 2683 | 3 |
The UNIVARIATE Procedure
8.DATA VISUALIZATION USING PROC SGPLOT
A. Revenue vs Budget
proc sgplot data=movie_analysis;
scatter x=Budget y=Revenue / datalabel=Movie_Name;
title "BUDGET VS REVENUE SCATTER PLOT";
run;
OUTPUT:
B. Profit by Genre
proc sgplot data=movie_analysis;
vbar Genre / response=Profit stat=mean;
title "AVERAGE PROFIT BY GENRE";
run;
OUTPUT:
9.PROC FREQ
Genre Distribution
proc freq data=movie_analysis;
tables Genre*Profit_Category;
title "GENRE AND PROFIT CATEGORY DISTRIBUTION";
run;
OUTPUT:
The FREQ Procedure
|
| |||||||||||||||||||||||||||||||||||||||||||||||||||||||
10.PROC: PROC RANK
Ranking Movies by Profit
proc rank data=movie_analysis out=movie_rank descending;
var Profit;
ranks Profit_Rank;
run;
proc print data=movie_rank;
run;
OUTPUT:
| Obs | Movie_Name | Genre | Release_Date | Budget | Revenue | IMDb_Rating | Year | Profit | Profit_Category | Profit_Rank |
|---|---|---|---|---|---|---|---|---|---|---|
| 1 | Inception | SciFi | 16JUL2010 | 160 | 829 | 8.8 | 2010 | 669 | BLOCKBUSTER | 6 |
| 2 | Titanic | Romance | 19DEC1997 | 200 | 2200 | 7.9 | 1997 | 2,000 | BLOCKBUSTER | 2 |
| 3 | Avatar | SciFi | 18DEC2009 | 237 | 2920 | 7.8 | 2009 | 2,683 | BLOCKBUSTER | 1 |
| 4 | Joker | Drama | 04OCT2019 | 55 | 1074 | 8.4 | 2019 | 1,019 | BLOCKBUSTER | 5 |
| 5 | Interstellar | SciFi | 07NOV2014 | 165 | 677 | 8.6 | 2014 | 512 | BLOCKBUSTER | 7 |
| 6 | Gladiator | Action | 05MAY2000 | 103 | 460 | 8.5 | 2000 | 357 | HIT | 9 |
| 7 | Matrix | SciFi | 31MAR1999 | 63 | 466 | 8.7 | 1999 | 403 | HIT | 8 |
| 8 | Frozen | Animation | 27NOV2013 | 150 | 1280 | 7.4 | 2013 | 1,130 | BLOCKBUSTER | 4 |
| 9 | Avengers | Action | 04MAY2012 | 220 | 1519 | 8.0 | 2012 | 1,299 | BLOCKBUSTER | 3 |
| 10 | Parasite | Thriller | 30MAY2019 | 11 | 258 | 8.6 | 2019 | 247 | HIT | 10 |
| 11 | Dangal | Sports | 23DEC2016 | 70 | 300 | 8.4 | 2016 | 230 | HIT | 11 |
| 12 | RRR | Action | 25MAR2022 | 72 | 160 | 8.0 | 2022 | 88 | AVERAGE | 12 |
10.PIE Chart
proc sgplot data=olympic_efficiency;
pie Efficiency_Level / datalabel;
run;
/* Note: In practice above there is an Invalid in this code Find it,Correct it and Use it /*
No comments:
Post a Comment