163.MASTERING THE GAMESALES DATASET IN SAS USING PROC IMPORT | PROC CONTENTS | PROC PRINT | PROC SORT | PROC MEANS | PROC FREQ | PROC UNIVARIATE | PROC CORR | PROC RANK | PROC STANDARD | PROC SQL | PROC REPORT | PROC TRANSPOSE | PROC FORMAT | PROC DATASETS | PROC EXPORT

MASTERING THE GAMESALES DATASET IN SAS USING PROC IMPORT | PROC CONTENTS | PROC PRINT | PROC SORT | PROC MEANS | PROC FREQ | PROC UNIVARIATE | PROC CORR | PROC RANK | PROC STANDARD | PROC SQL | PROC REPORT | PROC TRANSPOSE | PROC FORMAT | PROC DATASETS | PROC EXPORT


/*Creating a unique dataset centered around video game sales data*/

/*Creating the GameSales Dataset*/

data GameSales;

    length GameID $5 Title $30 Platform $10 Genre $15 Publisher $20;

    input GameID $ Title &  Platform &  Genre &  Publisher & ReleaseYear NA_Sales EU_Sales JP_Sales Other_Sales;

    datalines;

G001 Adventure Quest  PC       Adventure  Epic Games     2018 1.2 0.8 0.5 0.3

G002 Battle Zone      PS4      Shooter    Activision     2019 2.5 1.7 0.6 0.4

G003 Cyber Rally      Xbox     Racing     Ubisoft        2020 1.8 1.2 0.3 0.2

G004 Dragon Slayer    Switch   RPG        Nintendo       2021 3.0 2.0 1.5 0.5

G005 Eternal Night    PC       Horror     Capcom         2017 0.9 0.7 0.2 0.1

;

run;

proc print data=GameSales;

run;

Output:

Obs GameID Title Platform Genre Publisher ReleaseYear NA_Sales EU_Sales JP_Sales Other_Sales
1 G001 Adventure Quest PC Adventure Epic Games 2018 1.2 0.8 0.5 0.3
2 G002 Battle Zone PS4 Shooter Activision 2019 2.5 1.7 0.6 0.4
3 G003 Cyber Rally Xbox Racing Ubisoft 2020 1.8 1.2 0.3 0.2
4 G004 Dragon Slayer Switch RPG Nintendo 2021 3.0 2.0 1.5 0.5
5 G005 Eternal Night PC Horror Capcom 2017 0.9 0.7 0.2 0.1


/*Data Exploration with PROC PRINT and PROC CONTENTS*/

proc contents data=GameSales;

run;

Output:

                                                               The CONTENTS Procedure

Data Set Name WORK.GAMESALES Observations 5
Member Type DATA Variables 10
Engine V9 Indexes 0
Created 14/09/2015 00:10:15 Observation Length 120
Last Modified 14/09/2015 00:10:15 Deleted Observations 0
Protection   Compressed NO
Data Set Type   Sorted NO
Label      
Data Representation WINDOWS_64    
Encoding wlatin1 Western (Windows)    

Engine/Host Dependent Information
Data Set Page Size 65536
Number of Data Set Pages 1
First Data Page 1
Max Obs per Page 545
Obs in First Data Page 5
Number of Data Set Repairs 0
ExtendObsCounter YES
Filename C:\Users\Lenovo\AppData\Local\Temp\SAS Temporary Files\_TD13272_DESKTOP-QFAA4KV_\gamesales.sas7bdat
Release Created 9.0401M2
Host Created X64_8HOME

Alphabetic List of Variables and Attributes
# Variable Type Len
8 EU_Sales Num 8
1 GameID Char 5
4 Genre Char 15
9 JP_Sales Num 8
7 NA_Sales Num 8
10 Other_Sales Num 8
3 Platform Char 10
5 Publisher Char 20
6 ReleaseYear Num 8
2 Title Char 30

/*Data Transformation: Calculating Global Sales*/

data GameSales;

    set GameSales;

    Global_Sales = sum(NA_Sales, EU_Sales, JP_Sales, Other_Sales);

run;

proc print;run;

Output:

Obs GameID Title Platform Genre Publisher ReleaseYear NA_Sales EU_Sales JP_Sales Other_Sales Global_Sales
1 G001 Adventure Quest PC Adventure Epic Games 2018 1.2 0.8 0.5 0.3 2.8
2 G002 Battle Zone PS4 Shooter Activision 2019 2.5 1.7 0.6 0.4 5.2
3 G003 Cyber Rally Xbox Racing Ubisoft 2020 1.8 1.2 0.3 0.2 3.5
4 G004 Dragon Slayer Switch RPG Nintendo 2021 3.0 2.0 1.5 0.5 7.0
5 G005 Eternal Night PC Horror Capcom 2017 0.9 0.7 0.2 0.1 1.9

/*Statistical Analysis with PROC MEANS*/

proc means data=GameSales mean median min max std;

    var NA_Sales EU_Sales JP_Sales Other_Sales Global_Sales;

run;

Output:

                                                               The MEANS Procedure

Variable Mean Median Minimum Maximum Std Dev
NA_Sales
EU_Sales
JP_Sales
Other_Sales
Global_Sales
1.8800000
1.2800000
0.6200000
0.3000000
4.0800000
1.8000000
1.2000000
0.5000000
0.3000000
3.5000000
0.9000000
0.7000000
0.2000000
0.1000000
1.9000000
3.0000000
2.0000000
1.5000000
0.5000000
7.0000000
0.8757854
0.5630275
0.5167204
0.1581139
2.0315019


/*Frequency Analysis with PROC FREQ*/

proc freq data=GameSales;

    tables Genre Platform;

run;

Output:

                                                                   The FREQ Procedure

Genre Frequency Percent Cumulative
Frequency
Cumulative
Percent
Adventure 1 20.00 1 20.00
Horror 1 20.00 2 40.00
RPG 1 20.00 3 60.00
Racing 1 20.00 4 80.00
Shooter 1 20.00 5 100.00

Platform Frequency Percent Cumulative
Frequency
Cumulative
Percent
PC 2 40.00 2 40.00
PS4 1 20.00 3 60.00
Switch 1 20.00 4 80.00
Xbox 1 20.00 5 100.00


/*Visualizing Sales Data with PROC SGPLOT*/

proc sgplot data=GameSales;

    vbar Title / response=Global_Sales stat=sum;

    title "Global Sales per Game";

run;

Log:

NOTE: PROCEDURE SGPLOT used (Total process time):
      real time           2.79 seconds
      cpu time            0.57 seconds

NOTE: Listing image output written to SGPlot1.png.
NOTE: There were 5 observations read from the data set WORK.GAMESALES.

/*Creating Custom Formats with PROC FORMAT*/

proc format;

    value $genre_fmt

        'Adventure' = 'Adventure Games'

        'Shooter' = 'Shooter Games'

        'Racing' = 'Racing Games'

        'RPG' = 'Role-Playing Games'

        'Horror' = 'Horror Games';

run;


data GameSales;

    set GameSales;

    format Genre $genre_fmt.;

run;

proc print;run;

Output:

Obs GameID Title Platform Genre Publisher ReleaseYear NA_Sales EU_Sales JP_Sales Other_Sales Global_Sales
1 G001 Adventure Quest PC Adventure Games Epic Games 2018 1.2 0.8 0.5 0.3 2.8
2 G002 Battle Zone PS4 Shooter Games Activision 2019 2.5 1.7 0.6 0.4 5.2
3 G003 Cyber Rally Xbox Racing Games Ubisoft 2020 1.8 1.2 0.3 0.2 3.5
4 G004 Dragon Slayer Switch Role-Playing Games Nintendo 2021 3.0 2.0 1.5 0.5 7.0
5 G005 Eternal Night PC Horror Games Capcom 2017 0.9 0.7 0.2 0.1 1.9

/*Sorting Data with PROC SORT*/

proc sort data=GameSales;

    by descending Global_Sales;

run;

proc print;run;

Output:

Obs GameID Title Platform Genre Publisher ReleaseYear NA_Sales EU_Sales JP_Sales Other_Sales Global_Sales
1 G004 Dragon Slayer Switch Role-Playing Games Nintendo 2021 3.0 2.0 1.5 0.5 7.0
2 G002 Battle Zone PS4 Shooter Games Activision 2019 2.5 1.7 0.6 0.4 5.2
3 G003 Cyber Rally Xbox Racing Games Ubisoft 2020 1.8 1.2 0.3 0.2 3.5
4 G001 Adventure Quest PC Adventure Games Epic Games 2018 1.2 0.8 0.5 0.3 2.8
5 G005 Eternal Night PC Horror Games Capcom 2017 0.9 0.7 0.2 0.1 1.9

/*Filtering Data with WHERE Clause*/

proc print data=GameSales;

    where ReleaseYear > 2018;

run;

Output:

Obs GameID Title Platform Genre Publisher ReleaseYear NA_Sales EU_Sales JP_Sales Other_Sales Global_Sales
1 G004 Dragon Slayer Switch Role-Playing Games Nintendo 2021 3.0 2.0 1.5 0.5 7.0
2 G002 Battle Zone PS4 Shooter Games Activision 2019 2.5 1.7 0.6 0.4 5.2
3 G003 Cyber Rally Xbox Racing Games Ubisoft 2020 1.8 1.2 0.3 0.2 3.5

/*Aggregating Sales by Genre with PROC SQL*/

proc sql;

    select Genre, sum(Global_Sales) as Total_Sales

    from GameSales

    group by Genre;

quit;

Output:

Genre Total_Sales
Adventure Games 2.8
Horror Games 1.9
Role-Playing Games 7
Racing Games 3.5
Shooter Games 5.2


/*Creating a Subset Dataset*/

data TopSellers;

    set GameSales;

    if Global_Sales > 3;

run;

proc print;run;

Output:

Obs GameID Title Platform Genre Publisher ReleaseYear NA_Sales EU_Sales JP_Sales Other_Sales Global_Sales
1 G004 Dragon Slayer Switch Role-Playing Games Nintendo 2021 3.0 2.0 1.5 0.5 7.0
2 G002 Battle Zone PS4 Shooter Games Activision 2019 2.5 1.7 0.6 0.4 5.2
3 G003 Cyber Rally Xbox Racing Games Ubisoft 2020 1.8 1.2 0.3 0.2 3.5

/*Exporting Data to CSV*/

proc export data=GameSales

    outfile="C:\SASData\GameSales.csv"

    dbms=csv

    replace;

run;


PRACTICE AND COMMENT YOUR CODE: 

-->PLEASE FOLLOW OUR BLOG FOR MORE UPDATES.

TO FOLLOW OUR TELEGRAM CHANNEL CLICK HERE

Comments