Monday, 8 December 2025

333.VIDEO GAMES DATASET CREATION AND ANALYSIS USING DATA STEP | PROC SQL | PROC MEANS | PROC RANK | PROC SGPLOT WITH DATE FUNCTIONS AND POPULARITY GRADING MACROS

VIDEO GAMES DATASET CREATION AND ANALYSIS USING DATA STEP | PROC SQL | PROC MEANS | PROC RANK | PROC SGPLOT WITH DATE FUNCTIONS AND POPULARITY GRADING MACROS 

options nocenter;

1) Create dataset using a DATA step (explicit row creation)

data work.video_games;

    length Game_Name $40 Genre $15 Platform $12;

    format Release_Date yymmdd10.;

    informat Release_Date yymmdd10.;

    input Game_ID Game_Name :$40. Genre :$15. Release_Date :yymmdd10. Sales_Million

          Rating Platform :$12.;

    /* Derived variable */

    Sales = Sales_Million;

datalines;

1 Monster_Hunter_World ACTION 2018-01-26 20.0 8.5 PS4

2 Zelda_Breath_of_the_Wild ADVENTURE 2017-03-03 25.0 9.7 Nintendo_Switch

3 Minecraft SANDBOX 2011-11-18 200.0 9.0 Multi

4 Among_Us PARTY 2018-06-15 60.0 8.0 Multi

5 God_of_War ACTION 2018-04-20 12.0 9.1 PS4

6 Fortnite BATTLE_ROYALE 2017-07-21 350.0 7.8 Multi

7 The_Witcher_3 RPG 2015-05-19 28.0 9.3 Multi

8 Grand_Theft_Auto_V ACTION 2013-09-17 145.0 9.4 Multi

9 PUBG BATTLE_ROYALE 2017-12-20 70.0 7.5 Multi

10 Animal_Crossing_NEW_HORIZONS SIMULATION 2020-03-20 31.0 8.9 Nintendo_Switch

11 Super_Mario_Odyssey PLATFORM 2017-10-27 20.0 9.2 Nintendo_Switch

12 Halo_Infinite SHOOTER 2021-12-08 6.5 7.0 Xbox

;

run;

proc print data=work.video_games;

run;

OUTPUT:

ObsGame_NameGenrePlatformRelease_DateGame_IDSales_MillionRatingSales
1Monster_Hunter_WorldACTIONPS42018-01-26120.08.520.0
2Zelda_Breath_of_the_WildADVENTURENintendo_Swi2017-03-03225.09.725.0
3MinecraftSANDBOXMulti2011-11-183200.09.0200.0
4Among_UsPARTYMulti2018-06-15460.08.060.0
5God_of_WarACTIONPS42018-04-20512.09.112.0
6FortniteBATTLE_ROYALEMulti2017-07-216350.07.8350.0
7The_Witcher_3RPGMulti2015-05-19728.09.328.0
8Grand_Theft_Auto_VACTIONMulti2013-09-178145.09.4145.0
9PUBGBATTLE_ROYALEMulti2017-12-20970.07.570.0
10Animal_Crossing_NEW_HORIZONSSIMULATIONNintendo_Swi2020-03-201031.08.931.0
11Super_Mario_OdysseyPLATFORMNintendo_Swi2017-10-271120.09.220.0
12Halo_InfiniteSHOOTERXbox2021-12-08126.57.06.5


2) Same dataset creation using PROC SQL (INSERT SELECT style)

This demonstrates SQL-style creation and the differences to DATA step

proc sql;

  create table work.video_games_sql as

  select input(put(Release_Date, yymmdd10.), yymmdd10.) as Release_Date format=yymmdd10.,

         Game_ID,Game_Name,Genre,Sales_Million as Sales,Rating,Platform

  from work.video_games;

quit;

proc print data=work.video_games_sql;

run;

OUTPUT:

ObsRelease_DateGame_IDGame_NameGenreSalesRatingPlatform
12018-01-261Monster_Hunter_WorldACTION20.08.5PS4
22017-03-032Zelda_Breath_of_the_WildADVENTURE25.09.7Nintendo_Swi
32011-11-183MinecraftSANDBOX200.09.0Multi
42018-06-154Among_UsPARTY60.08.0Multi
52018-04-205God_of_WarACTION12.09.1PS4
62017-07-216FortniteBATTLE_ROYALE350.07.8Multi
72015-05-197The_Witcher_3RPG28.09.3Multi
82013-09-178Grand_Theft_Auto_VACTION145.09.4Multi
92017-12-209PUBGBATTLE_ROYALE70.07.5Multi
102020-03-2010Animal_Crossing_NEW_HORIZONSSIMULATION31.08.9Nintendo_Swi
112017-10-2711Super_Mario_OdysseyPLATFORM20.09.2Nintendo_Swi
122021-12-0812Halo_InfiniteSHOOTER6.57.0Xbox

/* Note: In practice above PROC SQL example reads from the DATA step table to

   demonstrate an SQL-based transformation. PROC SQL can also insert rows

   directly via INSERT INTO ... VALUES but that's verbose for many rows. */


3) Date calculations: age of game (years since release) using INTCK and INTNX

data work.video_games_dates;

  set work.video_games;

  Today = today();

  format Today yymmdd10.;


  /* INTCK counting whole boundary crossings (how many year boundaries crossed)

     This gives the difference in years but may behave differently across dates */

  Age_years_intck = intck('year', Release_Date, Today);


  /* INTNX: shift the Release_Date by N years and compare -- precise age with floor

     INTNX with 'same' alignment and then compute interval between dates */

  /* Compute anniversary date at current year */

  anniv_this_year = intnx('year', Release_Date, Age_years_intck, 'same');

  /* adjust if anniversary hasn't occurred yet */

  if anniv_this_year > Today then Age_years_intnx = Age_years_intck - 1;

  else Age_years_intnx = Age_years_intck;


  format anniv_this_year yymmdd10.;

run;

proc print data=work.video_games_dates;

run;

OUTPUT:

ObsGame_NameGenrePlatformRelease_DateGame_IDSales_MillionRatingSalesTodayAge_years_intckanniv_this_yearAge_years_intnx
1Monster_Hunter_WorldACTIONPS42018-01-26120.08.520.02025-12-0972025-01-267
2Zelda_Breath_of_the_WildADVENTURENintendo_Swi2017-03-03225.09.725.02025-12-0982025-03-038
3MinecraftSANDBOXMulti2011-11-183200.09.0200.02025-12-09142025-11-1814
4Among_UsPARTYMulti2018-06-15460.08.060.02025-12-0972025-06-157
5God_of_WarACTIONPS42018-04-20512.09.112.02025-12-0972025-04-207
6FortniteBATTLE_ROYALEMulti2017-07-216350.07.8350.02025-12-0982025-07-218
7The_Witcher_3RPGMulti2015-05-19728.09.328.02025-12-09102025-05-1910
8Grand_Theft_Auto_VACTIONMulti2013-09-178145.09.4145.02025-12-09122025-09-1712
9PUBGBATTLE_ROYALEMulti2017-12-20970.07.570.02025-12-0982025-12-207
10Animal_Crossing_NEW_HORIZONSSIMULATIONNintendo_Swi2020-03-201031.08.931.02025-12-0952025-03-205
11Super_Mario_OdysseyPLATFORMNintendo_Swi2017-10-271120.09.220.02025-12-0982025-10-278
12Halo_InfiniteSHOOTERXbox2021-12-08126.57.06.52025-12-0942025-12-084


4) PROC MEANS: Summary statistics on Sales and Rating

proc means data=work.video_games n mean median std min max mindec=2 maxdec=2;

  var Sales Rating;

  title "Summary statistics for Sales (million) and Rating";

run;

OUTPUT:

Summary statistics for Sales (million) and Rating

The MEANS Procedure

VariableNMeanMedianStd DevMinimumMaximum
Sales
Rating
12
12
80.63
8.62
29.50
8.95
103.30
0.85
6.50
7.00
350.00
9.70

/* Note: In practice above there is an Invalid in this code Find it,Correct it and Use it /*

5) PROC RANK: Create ranks by Sales and Rating (descending = highest is rank 1)

proc rank data=work.video_games out=work.video_games_ranked ties=mean descending;

  var Sales Rating;

  ranks Sales_Rank Rating_Rank;

run;

proc print data=work.video_games_ranked;

run;

OUTPUT:

ObsGame_NameGenrePlatformRelease_DateGame_IDSales_MillionRatingSalesSales_RankRating_Rank
1Monster_Hunter_WorldACTIONPS42018-01-26120.08.520.09.58
2Zelda_Breath_of_the_WildADVENTURENintendo_Swi2017-03-03225.09.725.08.01
3MinecraftSANDBOXMulti2011-11-183200.09.0200.02.06
4Among_UsPARTYMulti2018-06-15460.08.060.05.09
5God_of_WarACTIONPS42018-04-20512.09.112.011.05
6FortniteBATTLE_ROYALEMulti2017-07-216350.07.8350.01.010
7The_Witcher_3RPGMulti2015-05-19728.09.328.07.03
8Grand_Theft_Auto_VACTIONMulti2013-09-178145.09.4145.03.02
9PUBGBATTLE_ROYALEMulti2017-12-20970.07.570.04.011
10Animal_Crossing_NEW_HORIZONSSIMULATIONNintendo_Swi2020-03-201031.08.931.06.07
11Super_Mario_OdysseyPLATFORMNintendo_Swi2017-10-271120.09.220.09.54
12Halo_InfiniteSHOOTERXbox2021-12-08126.57.06.512.012


6) Macro for popularity grading: combines Sales and Rating into a score

 and assigns grades (A, B, C, D).

%macro popularity_grades(indata=work.video_games, outdata=work.video_games_pop);

  /* We use PROC SQL to compute normalized score, then DATA step to assign grade */

  proc sql;

    create table &outdata. as

    select *,

      /* Normalize Sales and Rating to 0-100 scale using min/max */

      (Sales - (select min(Sales) from &indata.)) /

      (case when (select max(Sales) from &indata.) - (select min(Sales) from &indata.) = 0

            then 1 else (select max(Sales) from &indata.) - (select min(Sales) from &indata.) end) * 70

      as Sales_Score,


      (Rating - (select min(Rating) from &indata.)) /

      (case when (select max(Rating) from &indata.) - (select min(Rating) from &indata.) = 0

            then 1 else (select max(Rating) from &indata.) - (select min(Rating) from &indata.) end) * 30

      as Rating_Score

    from &indata.;

  quit;

  proc print data=&outdata.;

  run;

  /* Total score as weighted sum: Sales 70%, Rating 30% */

  data &outdata.;

    set &outdata.;

    Popularity_Score = Sales_Score + Rating_Score; /* 0-100-ish scale */


    length Popularity_Grade $1.;

    if Popularity_Score >= 80 then Popularity_Grade = 'A';

    else if Popularity_Score >= 60 then Popularity_Grade = 'B';

    else if Popularity_Score >= 40 then Popularity_Grade = 'C';

    else Popularity_Grade = 'D';

  run;

  proc print data=&outdata.;

  run;

  /* Provide a quick PROC FREQ to show distribution of grades */

  proc freq data=&outdata.;

    tables Popularity_Grade / nocum nopercent;

    title "Distribution of popularity grades";

  run;

%mend popularity_grades;


%popularity_grades(indata=work.video_games, outdata=work.video_games_pop);

OUTPUT:

ObsGame_NameGenrePlatformRelease_DateGame_IDSales_MillionRatingSalesSales_ScoreRating_Score
1Monster_Hunter_WorldACTIONPS42018-01-26120.08.520.02.751116.6667
2Zelda_Breath_of_the_WildADVENTURENintendo_Swi2017-03-03225.09.725.03.770030.0000
3MinecraftSANDBOXMulti2011-11-183200.09.0200.039.432322.2222
4Among_UsPARTYMulti2018-06-15460.08.060.010.902511.1111
5God_of_WarACTIONPS42018-04-20512.09.112.01.120823.3333
6FortniteBATTLE_ROYALEMulti2017-07-216350.07.8350.070.00008.8889
7The_Witcher_3RPGMulti2015-05-19728.09.328.04.381425.5556
8Grand_Theft_Auto_VACTIONMulti2013-09-178145.09.4145.028.224226.6667
9PUBGBATTLE_ROYALEMulti2017-12-20970.07.570.012.94035.5556
10Animal_Crossing_NEW_HORIZONSSIMULATIONNintendo_Swi2020-03-201031.08.931.04.992721.1111
11Super_Mario_OdysseyPLATFORMNintendo_Swi2017-10-271120.09.220.02.751124.4444
12Halo_InfiniteSHOOTERXbox2021-12-08126.57.06.50.00000.0000
ObsGame_NameGenrePlatformRelease_DateGame_IDSales_MillionRatingSalesSales_ScoreRating_ScorePopularity_ScorePopularity_Grade
1Monster_Hunter_WorldACTIONPS42018-01-26120.08.520.02.751116.666719.4178D
2Zelda_Breath_of_the_WildADVENTURENintendo_Swi2017-03-03225.09.725.03.770030.000033.7700D
3MinecraftSANDBOXMulti2011-11-183200.09.0200.039.432322.222261.6545B
4Among_UsPARTYMulti2018-06-15460.08.060.010.902511.111122.0136D
5God_of_WarACTIONPS42018-04-20512.09.112.01.120823.333324.4541D
6FortniteBATTLE_ROYALEMulti2017-07-216350.07.8350.070.00008.888978.8889B
7The_Witcher_3RPGMulti2015-05-19728.09.328.04.381425.555629.9369D
8Grand_Theft_Auto_VACTIONMulti2013-09-178145.09.4145.028.224226.666754.8908C
9PUBGBATTLE_ROYALEMulti2017-12-20970.07.570.012.94035.555618.4959D
10Animal_Crossing_NEW_HORIZONSSIMULATIONNintendo_Swi2020-03-201031.08.931.04.992721.111126.1038D
11Super_Mario_OdysseyPLATFORMNintendo_Swi2017-10-271120.09.220.02.751124.444427.1955D
12Halo_InfiniteSHOOTERXbox2021-12-08126.57.06.50.00000.00000.0000D

Distribution of popularity grades

The FREQ Procedure

Popularity_GradeFrequency
B2
C1
D9

7) Visualizations: PROC SGPLOT examples

 - Bar chart of Sales by Game

- Scatter plot of Sales vs Rating with marker size proportional to Sales

proc sgplot data=work.video_games;

  vbar Game_Name / response=Sales stat=sum datalabel;

  xaxis display=(nolabel) valueattrs=(size=8) fitpolicy=rotate;

  yaxis label='Sales (Million)';

  title 'Sales by Game (Million)';

run;

OUTPUT:

The SGPlot Procedure


proc sgplot data=work.video_games;

  bubble x=Rating y=Sales size=Sales / datalabel=Game_Name;

  xaxis label='Rating (1-10)';

  yaxis label='Sales (Million)';

  title 'Sales vs Rating (Bubble sized by Sales)';

run;

OUTPUT:

The SGPlot Procedure


8) Additional PROC examples to show differences

- PROC SQL for joins/aggregations

- DATA step for row-wise manipulations

- PROC MEANS vs PROC SUMMARY note

/* Example: use PROC SQL to compute total sales by platform */

proc sql;

  create table work.sales_by_platform as

  select Platform, sum(Sales) as Total_Sales_Million

  from work.video_games

  group by Platform

  order by Total_Sales_Million desc;

quit;

proc print data=work.sales_by_platform;

run;

OUTPUT:

ObsPlatformTotal_Sales_Million
1Multi853.0
2Nintendo_Swi76.0
3PS432.0
4Xbox6.5

/* Example: DATA step to flag high selling games */

data work.video_games_flags;

  set work.video_games;

  High_Seller = (Sales >= 50); /* boolean 0/1 */

run;

proc print data=work.video_games_flags;

run;

OUTPUT:

ObsGame_NameGenrePlatformRelease_DateGame_IDSales_MillionRatingSalesHigh_Seller
1Monster_Hunter_WorldACTIONPS42018-01-26120.08.520.00
2Zelda_Breath_of_the_WildADVENTURENintendo_Swi2017-03-03225.09.725.00
3MinecraftSANDBOXMulti2011-11-183200.09.0200.01
4Among_UsPARTYMulti2018-06-15460.08.060.01
5God_of_WarACTIONPS42018-04-20512.09.112.00
6FortniteBATTLE_ROYALEMulti2017-07-216350.07.8350.01
7The_Witcher_3RPGMulti2015-05-19728.09.328.00
8Grand_Theft_Auto_VACTIONMulti2013-09-178145.09.4145.01
9PUBGBATTLE_ROYALEMulti2017-12-20970.07.570.01
10Animal_Crossing_NEW_HORIZONSSIMULATIONNintendo_Swi2020-03-201031.08.931.00
11Super_Mario_OdysseyPLATFORMNintendo_Swi2017-10-271120.09.220.00
12Halo_InfiniteSHOOTERXbox2021-12-08126.57.06.50


9) Output sample: print tables so user can inspect

proc print data=work.video_games (obs=12) noobs;

  title 'VIDEO_GAMES (DATA STEP)';

run;

OUTPUT:

VIDEO_GAMES (DATA STEP)

Game_NameGenrePlatformRelease_DateGame_IDSales_MillionRatingSales
Monster_Hunter_WorldACTIONPS42018-01-26120.08.520.0
Zelda_Breath_of_the_WildADVENTURENintendo_Swi2017-03-03225.09.725.0
MinecraftSANDBOXMulti2011-11-183200.09.0200.0
Among_UsPARTYMulti2018-06-15460.08.060.0
God_of_WarACTIONPS42018-04-20512.09.112.0
FortniteBATTLE_ROYALEMulti2017-07-216350.07.8350.0
The_Witcher_3RPGMulti2015-05-19728.09.328.0
Grand_Theft_Auto_VACTIONMulti2013-09-178145.09.4145.0
PUBGBATTLE_ROYALEMulti2017-12-20970.07.570.0
Animal_Crossing_NEW_HORIZONSSIMULATIONNintendo_Swi2020-03-201031.08.931.0
Super_Mario_OdysseyPLATFORMNintendo_Swi2017-10-271120.09.220.0
Halo_InfiniteSHOOTERXbox2021-12-08126.57.06.5

proc print data=work.video_games_ranked noobs;

  title 'VIDEO_GAMES WITH RANKS';

run;

OUTPUT:

VIDEO_GAMES WITH RANKS

Game_NameGenrePlatformRelease_DateGame_IDSales_MillionRatingSalesSales_RankRating_Rank
Monster_Hunter_WorldACTIONPS42018-01-26120.08.520.09.58
Zelda_Breath_of_the_WildADVENTURENintendo_Swi2017-03-03225.09.725.08.01
MinecraftSANDBOXMulti2011-11-183200.09.0200.02.06
Among_UsPARTYMulti2018-06-15460.08.060.05.09
God_of_WarACTIONPS42018-04-20512.09.112.011.05
FortniteBATTLE_ROYALEMulti2017-07-216350.07.8350.01.010
The_Witcher_3RPGMulti2015-05-19728.09.328.07.03
Grand_Theft_Auto_VACTIONMulti2013-09-178145.09.4145.03.02
PUBGBATTLE_ROYALEMulti2017-12-20970.07.570.04.011
Animal_Crossing_NEW_HORIZONSSIMULATIONNintendo_Swi2020-03-201031.08.931.06.07
Super_Mario_OdysseyPLATFORMNintendo_Swi2017-10-271120.09.220.09.54
Halo_InfiniteSHOOTERXbox2021-12-08126.57.06.512.012

proc print data=work.video_games_pop noobs;

  title 'VIDEO_GAMES WITH POPULARITY SCORE & GRADE';

run;

OUTPUT:

VIDEO_GAMES WITH POPULARITY SCORE & GRADE

Game_NameGenrePlatformRelease_DateGame_IDSales_MillionRatingSalesSales_ScoreRating_ScorePopularity_ScorePopularity_Grade
Monster_Hunter_WorldACTIONPS42018-01-26120.08.520.02.751116.666719.4178D
Zelda_Breath_of_the_WildADVENTURENintendo_Swi2017-03-03225.09.725.03.770030.000033.7700D
MinecraftSANDBOXMulti2011-11-183200.09.0200.039.432322.222261.6545B
Among_UsPARTYMulti2018-06-15460.08.060.010.902511.111122.0136D
God_of_WarACTIONPS42018-04-20512.09.112.01.120823.333324.4541D
FortniteBATTLE_ROYALEMulti2017-07-216350.07.8350.070.00008.888978.8889B
The_Witcher_3RPGMulti2015-05-19728.09.328.04.381425.555629.9369D
Grand_Theft_Auto_VACTIONMulti2013-09-178145.09.4145.028.224226.666754.8908C
PUBGBATTLE_ROYALEMulti2017-12-20970.07.570.012.94035.555618.4959D
Animal_Crossing_NEW_HORIZONSSIMULATIONNintendo_Swi2020-03-201031.08.931.04.992721.111126.1038D
Super_Mario_OdysseyPLATFORMNintendo_Swi2017-10-271120.09.220.02.751124.444427.1955D
Halo_InfiniteSHOOTERXbox2021-12-08126.57.06.50.00000.00000.0000D




To Visit My Previous Software Company Analysis Dataset:Click Here
To Visit My Previous Vote Program Dataset:Click Here
To Visit My Previous Audi Cars Performance Analysis Dataset:Click Here
To Visit My Previous Global Clothing Trends Dataset:Click Here  



Follow Us On : 


 


--- FOLLOW OUR BLOG FOR MORE INFORMATION.

--->PLEASE DO COMMENTS AND SHARE OUR BLOG.


No comments:

Post a Comment