142.GLOBAL MOVIE TRENDS ANALYSIS: A COMPREHENSIVE STUDY OF MOVIE REVENUE | GENRE DISTRIBUTION | RATING PERFORMANCE AND AWARD WINNING TRENDS ACROSS MULTIPLE COUNTRIES AND DIRECTORS IN SAS

GLOBAL MOVIE TRENDS ANALYSIS: A COMPREHENSIVE STUDY OF MOVIE REVENUE |  GENRE DISTRIBUTION | RATING PERFORMANCE AND AWARD WINNING TRENDS  ACROSS MULTIPLE COUNTRIES AND DIRECTORS IN SAS


 /* Step 1: Creating a Detailed Dataset for Global Movie Trends */

/* This dataset contains details of popular movies, including financials, ratings, and key cast */

DATA Movies;

    INPUT MovieID $ Title $ Genre $ ReleaseYear Budget Revenue IMDB_Rating RottenTomatoes_Score 

          Country $ Director $ Duration Language $ LeadActor $ LeadActress $ AwardsWon;

    DATALINES;

    M001 Inception Sci-Fi 2010 160 830 8.8 87 USA Nolan 148 English DiCaprio Page 4

    M002 Parasite Thriller 2019 11 264 8.6 98 South_Korea Bong_Joon_Ho 132 Korean Kang_Ho Park 6

    M003 Avatar Sci-Fi 2009 237 2847 7.8 82 USA Cameron 162 English Worthington Saldana 3

    M004 Interstellar Sci-Fi 2014 165 677 8.6 72 USA Nolan 169 English McConaughey Hathaway 5

    M005 Joker Thriller 2019 55 1074 8.4 68 USA Phillips 122 English Phoenix Beetz 2

    M006 TheGodfather Crime 1972 6 250 9.2 98 USA Coppola 175 English Brando Keaton 7

    M007 Titanic Romance 1997 200 2187 7.8 88 USA Cameron 195 English DiCaprio Winslet 11

    M008 SpiritedAway Animation 2001 19 383 8.6 97 Japan Miyazaki 125 Japanese Irino Hiiragi 1

    M009 TheDarkKnight Action 2008 185 1005 9.0 94 USA Nolan 152 English Bale Gyllenhaal 8

    M010 Gladiator Action 2000 103 465 8.5 76 USA Scott 155 English Crowe Nielsen 5

    ;

RUN;

/* Step 2: Displaying the Dataset */

PROC PRINT DATA=Movies; RUN;


OUTPUT:

Obs MovieID Title Genre ReleaseYear Budget Revenue IMDB_Rating RottenTomatoes_Score Country Director Duration Language LeadActor LeadActress AwardsWon
1 M001 Inceptio Sci-Fi 2010 160 830 8.8 87 USA Nolan 148 English DiCaprio Page 4
2 M002 Parasite Thriller 2019 11 264 8.6 98 South_Ko Bong_Joo 132 Korean Kang_Ho Park 6
3 M003 Avatar Sci-Fi 2009 237 2847 7.8 82 USA Cameron 162 English Worthing Saldana 3
4 M004 Interste Sci-Fi 2014 165 677 8.6 72 USA Nolan 169 English McConaug Hathaway 5
5 M005 Joker Thriller 2019 55 1074 8.4 68 USA Phillips 122 English Phoenix Beetz 2
6 M006 TheGodfa Crime 1972 6 250 9.2 98 USA Coppola 175 English Brando Keaton 7
7 M007 Titanic Romance 1997 200 2187 7.8 88 USA Cameron 195 English DiCaprio Winslet 11
8 M008 Spirited Animatio 2001 19 383 8.6 97 Japan Miyazaki 125 Japanese Irino Hiiragi 1
9 M009 TheDarkK Action 2008 185 1005 9.0 94 USA Nolan 152 English Bale Gyllenha 8
10 M010 Gladiato Action 2000 103 465 8.5 76 USA Scott 155 English Crowe Nielsen 5


/* Step 3: Calculating Summary Statistics */

PROC MEANS DATA=Movies MEAN MIN MAX STD;

    VAR Budget Revenue IMDB_Rating RottenTomatoes_Score Duration AwardsWon;

RUN;


OUTPUT:

                                                                  The MEANS Procedure

Variable Mean Minimum Maximum Std Dev
Budget
Revenue
IMDB_Rating
RottenTomatoes_Score
Duration
AwardsWon
114.1000000
998.2000000
8.5300000
86.0000000
153.5000000
5.2000000
6.0000000
250.0000000
7.8000000
68.0000000
122.0000000
1.0000000
237.0000000
2847.00
9.2000000
98.0000000
195.0000000
11.0000000
86.3988812
864.7533752
0.4522782
11.1255462
23.0904020
2.9739611


/* Step 4: Frequency Analysis of Movie Genres, Languages, and Countries */

PROC FREQ DATA=Movies;

    TABLES Genre Language Country AwardsWon / NOCUM;

RUN;


OUTPUT:

                                                                   The FREQ Procedure

Genre Frequency Percent
Action 2 20.00
Animatio 1 10.00
Crime 1 10.00
Romance 1 10.00
Sci-Fi 3 30.00
Thriller 2 20.00


Language Frequency Percent
English 8 80.00
Japanese 1 10.00
Korean 1 10.00


Country Frequency Percent
Japan 1 10.00
South_Ko 1 10.00
USA 8 80.00


AwardsWon Frequency Percent
1 1 10.00
2 1 10.00
3 1 10.00
4 1 10.00
5 2 20.00
6 1 10.00
7 1 10.00
8 1 10.00
11 1 10.00


/* Step 5: Sorting Data by Revenue and Awards */

PROC SORT DATA=Movies; BY DESCENDING Revenue; RUN;

PROC SORT DATA=Movies; BY DESCENDING AwardsWon; RUN;

PROC PRINT DATA=Movies; RUN;


OUTPUT:

Obs MovieID Title Genre ReleaseYear Budget Revenue IMDB_Rating RottenTomatoes_Score Country Director Duration Language LeadActor LeadActress AwardsWon
1 M007 Titanic Romance 1997 200 2187 7.8 88 USA Cameron 195 English DiCaprio Winslet 11
2 M009 TheDarkK Action 2008 185 1005 9.0 94 USA Nolan 152 English Bale Gyllenha 8
3 M006 TheGodfa Crime 1972 6 250 9.2 98 USA Coppola 175 English Brando Keaton 7
4 M002 Parasite Thriller 2019 11 264 8.6 98 South_Ko Bong_Joo 132 Korean Kang_Ho Park 6
5 M004 Interste Sci-Fi 2014 165 677 8.6 72 USA Nolan 169 English McConaug Hathaway 5
6 M010 Gladiato Action 2000 103 465 8.5 76 USA Scott 155 English Crowe Nielsen 5
7 M001 Inceptio Sci-Fi 2010 160 830 8.8 87 USA Nolan 148 English DiCaprio Page 4
8 M003 Avatar Sci-Fi 2009 237 2847 7.8 82 USA Cameron 162 English Worthing Saldana 3
9 M005 Joker Thriller 2019 55 1074 8.4 68 USA Phillips 122 English Phoenix Beetz 2
10 M008 Spirited Animatio 2001 19 383 8.6 97 Japan Miyazaki 125 Japanese Irino Hiiragi 1


/* Step 6: Using PROC SQL to Filter High Revenue, Long Duration, and Award-Winning Movies */

PROC SQL;

    CREATE TABLE HighRevenueMovies AS SELECT * FROM Movies WHERE Revenue > 1000;

PROC PRINT DATA=HighRevenueMovies; RUN;

OUTPUT:

Obs MovieID Title Genre ReleaseYear Budget Revenue IMDB_Rating RottenTomatoes_Score Country Director Duration Language LeadActor LeadActress AwardsWon
1 M007 Titanic Romance 1997 200 2187 7.8 88 USA Cameron 195 English DiCaprio Winslet 11
2 M009 TheDarkK Action 2008 185 1005 9.0 94 USA Nolan 152 English Bale Gyllenha 8
3 M003 Avatar Sci-Fi 2009 237 2847 7.8 82 USA Cameron 162 English Worthing Saldana 3
4 M005 Joker Thriller 2019 55 1074 8.4 68 USA Phillips 122 English Phoenix Beetz 2

    

CREATE TABLE LongDurationMovies AS SELECT * FROM Movies WHERE Duration > 150;

PROC PRINT DATA=LongDurationMovies; RUN;

OUTPUT:

Obs MovieID Title Genre ReleaseYear Budget Revenue IMDB_Rating RottenTomatoes_Score Country Director Duration Language LeadActor LeadActress AwardsWon
1 M007 Titanic Romance 1997 200 2187 7.8 88 USA Cameron 195 English DiCaprio Winslet 11
2 M009 TheDarkK Action 2008 185 1005 9.0 94 USA Nolan 152 English Bale Gyllenha 8
3 M006 TheGodfa Crime 1972 6 250 9.2 98 USA Coppola 175 English Brando Keaton 7
4 M004 Interste Sci-Fi 2014 165 677 8.6 72 USA Nolan 169 English McConaug Hathaway 5
5 M010 Gladiato Action 2000 103 465 8.5 76 USA Scott 155 English Crowe Nielsen 5
6 M003 Avatar Sci-Fi 2009 237 2847 7.8 82 USA Cameron 162 English Worthing Saldana 3


CREATE TABLE AwardWinningMovies AS SELECT * FROM Movies WHERE AwardsWon >3;

QUIT;

PROC PRINT DATA=AwardWinningMovies; RUN;

OUTPUT:

Obs MovieID Title Genre ReleaseYear Budget Revenue IMDB_Rating RottenTomatoes_Score Country Director Duration Language LeadActor LeadActress AwardsWon
1 M007 Titanic Romance 1997 200 2187 7.8 88 USA Cameron 195 English DiCaprio Winslet 11
2 M009 TheDarkK Action 2008 185 1005 9.0 94 USA Nolan 152 English Bale Gyllenha 8
3 M006 TheGodfa Crime 1972 6 250 9.2 98 USA Coppola 175 English Brando Keaton 7
4 M002 Parasite Thriller 2019 11 264 8.6 98 South_Ko Bong_Joo 132 Korean Kang_Ho Park 6
5 M004 Interste Sci-Fi 2014 165 677 8.6 72 USA Nolan 169 English McConaug Hathaway 5
6 M010 Gladiato Action 2000 103 465 8.5 76 USA Scott 155 English Crowe Nielsen 5
7 M001 Inceptio Sci-Fi 2010 160 830 8.8 87 USA Nolan 148 English DiCaprio Page 4


/* Step 7: Generating a Detailed Report */

PROC REPORT DATA=Movies NOWD;

    COLUMN Title Genre Budget Revenue IMDB_Rating Director Duration AwardsWon;

    DEFINE Title / DISPLAY 'Movie Title';

    DEFINE Genre / GROUP 'Genre';

    DEFINE Budget / ANALYSIS SUM 'Budget ($M)';

    DEFINE Revenue / ANALYSIS SUM 'Revenue ($M)';

    DEFINE IMDB_Rating / ANALYSIS MEAN 'Avg IMDb Rating';

    DEFINE Director / DISPLAY 'Director';

    DEFINE Duration / ANALYSIS MEAN 'Avg Duration (mins)';

    DEFINE AwardsWon / ANALYSIS SUM 'Total Awards Won';

RUN;


OUTPUT:

Movie Title Genre Budget ($M) Revenue ($M) Avg IMDb Rating Director Avg Duration (mins) Total Awards Won
TheDarkK Action 185 1005 9 Nolan 152 8
Gladiato   103 465 8.5 Scott 155 5
Spirited Animatio 19 383 8.6 Miyazaki 125 1
TheGodfa Crime 6 250 9.2 Coppola 175 7
Titanic Romance 200 2187 7.8 Cameron 195 11
Interste Sci-Fi 165 677 8.6 Nolan 169 5
Inceptio   160 830 8.8 Nolan 148 4
Avatar   237 2847 7.8 Cameron 162 3
Parasite Thriller 11 264 8.6 Bong_Joo 132 6
Joker   55 1074 8.4 Phillips 122 2


/* Step 8: Analyzing Revenue and Budget Relationships */

PROC CORR DATA=Movies; 

 VAR Budget Revenue AwardsWon; 

RUN;


OUTPUT:

                                                                The CORR Procedure

3 Variables: Budget Revenue AwardsWon


Simple Statistics
Variable N Mean Std Dev Sum Minimum Maximum
Budget 10 114.10000 86.39888 1141 6.00000 237.00000
Revenue 10 998.20000 864.75338 9982 250.00000 2847
AwardsWon 10 5.20000 2.97396 52.00000 1.00000 11.00000


Pearson Correlation Coefficients, N = 10
Prob > |r| under H0: Rho=0
  Budget Revenue AwardsWon
Budget
1.00000
 
0.77670
0.0082
0.29180
0.4133
Revenue
0.77670
0.0082
1.00000
 
0.15669
0.6655
AwardsWon
0.29180
0.4133
0.15669
0.6655
1.00000
 


/* Step 9: Grouping by Director and Analyzing Performance */

PROC MEANS DATA=Movies MEAN MAX MIN;

    CLASS Director;

    VAR Revenue IMDB_Rating AwardsWon;

RUN;


OUTPUT:

                                                            The MEANS Procedure

Director N Obs Variable Mean Maximum Minimum
Bong_Joo 1
Revenue
IMDB_Rating
AwardsWon
264.0000000
8.6000000
6.0000000
264.0000000
8.6000000
6.0000000
264.0000000
8.6000000
6.0000000
Cameron 2
Revenue
IMDB_Rating
AwardsWon
2517.00
7.8000000
7.0000000
2847.00
7.8000000
11.0000000
2187.00
7.8000000
3.0000000
Coppola 1
Revenue
IMDB_Rating
AwardsWon
250.0000000
9.2000000
7.0000000
250.0000000
9.2000000
7.0000000
250.0000000
9.2000000
7.0000000
Miyazaki 1
Revenue
IMDB_Rating
AwardsWon
383.0000000
8.6000000
1.0000000
383.0000000
8.6000000
1.0000000
383.0000000
8.6000000
1.0000000
Nolan 3
Revenue
IMDB_Rating
AwardsWon
837.3333333
8.8000000
5.6666667
1005.00
9.0000000
8.0000000
677.0000000
8.6000000
4.0000000
Phillips 1
Revenue
IMDB_Rating
AwardsWon
1074.00
8.4000000
2.0000000
1074.00
8.4000000
2.0000000
1074.00
8.4000000
2.0000000
Scott 1
Revenue
IMDB_Rating
AwardsWon
465.0000000
8.5000000
5.0000000
465.0000000
8.5000000
5.0000000
465.0000000
8.5000000
5.0000000


/* Step 10: Advanced Data Transformation - Normalizing Revenue and Awards */

DATA MoviesNormalized;

    SET Movies;

    Revenue_Normalized = (Revenue - 250) / (2847 - 250);

    Awards_Normalized = AwardsWon / 11;

RUN;

PROC PRINT DATA=MoviesNormalized; RUN;


OUTPUT:

Obs MovieID Title Genre ReleaseYear Budget Revenue IMDB_Rating RottenTomatoes_Score Country Director Duration Language LeadActor LeadActress AwardsWon Revenue_Normalized Awards_Normalized
1 M007 Titanic Romance 1997 200 2187 7.8 88 USA Cameron 195 English DiCaprio Winslet 11 0.74586 1.00000
2 M009 TheDarkK Action 2008 185 1005 9.0 94 USA Nolan 152 English Bale Gyllenha 8 0.29072 0.72727
3 M006 TheGodfa Crime 1972 6 250 9.2 98 USA Coppola 175 English Brando Keaton 7 0.00000 0.63636
4 M002 Parasite Thriller 2019 11 264 8.6 98 South_Ko Bong_Joo 132 Korean Kang_Ho Park 6 0.00539 0.54545
5 M004 Interste Sci-Fi 2014 165 677 8.6 72 USA Nolan 169 English McConaug Hathaway 5 0.16442 0.45455
6 M010 Gladiato Action 2000 103 465 8.5 76 USA Scott 155 English Crowe Nielsen 5 0.08279 0.45455
7 M001 Inceptio Sci-Fi 2010 160 830 8.8 87 USA Nolan 148 English DiCaprio Page 4 0.22333 0.36364
8 M003 Avatar Sci-Fi 2009 237 2847 7.8 82 USA Cameron 162 English Worthing Saldana 3 1.00000 0.27273
9 M005 Joker Thriller 2019 55 1074 8.4 68 USA Phillips 122 English Phoenix Beetz 2 0.31729 0.18182
10 M008 Spirited Animatio 2001 19 383 8.6 97 Japan Miyazaki 125 Japanese Irino Hiiragi 1 0.05121 0.09091

/* Step 11: Creating Boxplots for Revenue and Awards Distribution */

PROC SGPLOT DATA=Movies;

    VBOX Revenue / CATEGORY=Genre;

    TITLE "Revenue Distribution by Genre";

RUN;

PROC SGPLOT DATA=Movies;

    VBOX AwardsWon / CATEGORY=Genre;

    TITLE "Awards Distribution by Genre";

RUN;


/* Step 12: Exporting the Final Processed Dataset */

PROC EXPORT DATA=Movies

    OUTFILE="/home/movies_analysis.csv"

    DBMS=CSV

    REPLACE;

RUN;


/* Additional Analysis: Revenue per Minute */

DATA RevenuePerMinute;

    SET Movies;

    Revenue_Per_Minute = Revenue / Duration;

RUN;

PROC PRINT DATA=RevenuePerMinute; RUN;


OUTPUT:

Obs MovieID Title Genre ReleaseYear Budget Revenue IMDB_Rating RottenTomatoes_Score Country Director Duration Language LeadActor LeadActress AwardsWon Revenue_Per_Minute
1 M007 Titanic Romance 1997 200 2187 7.8 88 USA Cameron 195 English DiCaprio Winslet 11 11.2154
2 M009 TheDarkK Action 2008 185 1005 9.0 94 USA Nolan 152 English Bale Gyllenha 8 6.6118
3 M006 TheGodfa Crime 1972 6 250 9.2 98 USA Coppola 175 English Brando Keaton 7 1.4286
4 M002 Parasite Thriller 2019 11 264 8.6 98 South_Ko Bong_Joo 132 Korean Kang_Ho Park 6 2.0000
5 M004 Interste Sci-Fi 2014 165 677 8.6 72 USA Nolan 169 English McConaug Hathaway 5 4.0059
6 M010 Gladiato Action 2000 103 465 8.5 76 USA Scott 155 English Crowe Nielsen 5 3.0000
7 M001 Inceptio Sci-Fi 2010 160 830 8.8 87 USA Nolan 148 English DiCaprio Page 4 5.6081
8 M003 Avatar Sci-Fi 2009 237 2847 7.8 82 USA Cameron 162 English Worthing Saldana 3 17.5741
9 M005 Joker Thriller 2019 55 1074 8.4 68 USA Phillips 122 English Phoenix Beetz 2 8.8033
10 M008 Spirited Animatio 2001 19 383 8.6 97 Japan Miyazaki 125 Japanese Irino Hiiragi 1 3.0640


/* Comparing Ratings: IMDb vs Rotten Tomatoes */

PROC SGPLOT DATA=Movies;

    SCATTER X=IMDB_Rating Y=RottenTomatoes_Score / GROUP=Genre;

    TITLE "Comparison of IMDb and Rotten Tomatoes Ratings";

RUN;


/* Adding More Insights - Average Revenue per Genre */

PROC SQL;

    CREATE TABLE GenreRevenue AS

    SELECT Genre, AVG(Revenue) AS Avg_Revenue

    FROM Movies

    GROUP BY Genre;

QUIT;

PROC PRINT DATA=GenreRevenue; RUN;


OUTPUT:

Obs Genre Avg_Revenue
1 Action 735.00
2 Animatio 383.00
3 Crime 250.00
4 Romance 2187.00
5 Sci-Fi 1451.33
6 Thriller 669.00


/* Calculating Median Revenue for Each Genre */

PROC MEANS DATA=Movies MEDIAN;

    CLASS Genre;

    VAR Revenue;

RUN;


OUTPUT:

                                                                    The MEANS Procedure

Analysis Variable : Revenue
Genre N Obs Median
Action 2 735.0000000
Animatio 1 383.0000000
Crime 1 250.0000000
Romance 1 2187.00
Sci-Fi 3 830.0000000
Thriller 2 669.0000000



PRACTICE AND COMMENT YOUR CODE: 

-->PLEASE FOLLOW OUR BLOG FOR MORE UPDATES.

TO FOLLOW OUR TELEGRAM CHANNEL CLICK HERE



Comments