Sunday, 27 April 2025

165.MASTERING E-COMMERCE ORDER FULFILLMENT ANALYTICS USING SAS | FROM DATA CREATION WITH PROC DATA TO INSIGHTFUL REPORTING WITH PROC REPORT | LEVERAGING PROC CONTENTS | PROC PRINT | PROC SQL | PROC EXPORT | PROC MEANS | PROC FREQ | PROC SGPLOT | PROC REPORT | ENHANCED WITH MACROS FOR AUTOMATION

MASTERING E-COMMERCE ORDER FULFILLMENT ANALYTICS USING SAS | FROM DATA CREATION WITH PROC DATA TO INSIGHTFUL REPORTING WITH PROC REPORT | LEVERAGING PROC CONTENTS | PROC PRINT | PROC SQL | PROC EXPORT | PROC MEANS | PROC FREQ | PROC SGPLOT | PROC REPORT | ENHANCED WITH MACROS FOR AUTOMATION


 /*Creating a unique dataset centered around "E-commerce Order Fulfillment"*/

/*Dataset Overview: E-commerce Order Fulfillment*/

OrderID: Unique identifier for each order.

CustomerID: Identifier for the customer placing the order.

OrderDate: Date when the order was placed

ShipDate: Date when the order was shipped.

DeliveryDate: Date when the order was delivered.

ProductID: Identifier for the product ordered.

Quantity: Number of units ordered.

UnitPrice: Price per unit of the product.

ShippingCost: Cost incurred for shipping.

OrderStatus: Current status of the order (e.g., Delivered, Shipped, Pending).

Region: Geographic region of the delivery address.


/*Creating the Dataset*/

data ecommerce_orders;

    format OrderDate ShipDate DeliveryDate date9.;

    do OrderID = 1 to 15;

        CustomerID = ceil(ranuni(123)*500);

        OrderDate = '01JAN2025'd + ceil(ranuni(123)*90);

        ShipLag = ceil(ranuni(123)*5);

        DeliveryLag = ShipLag + ceil(ranuni(123)*5);

        ShipDate = OrderDate + ShipLag;

        DeliveryDate = ShipDate + DeliveryLag;

        ProductID = ceil(ranuni(123)*100);

        Quantity = ceil(ranuni(123)*10);

        UnitPrice = round(10 + ranuni(123)*90, 0.01);

        ShippingCost = round(5 + ranuni(123)*15, 0.01);

        StatusProb = ranuni(123);

        if StatusProb < 0.7 then OrderStatus = 'Delivered';

        else if StatusProb < 0.9 then OrderStatus = 'Shipped';

        else OrderStatus = 'Pending';

        RegionProb = ranuni(123);

        if RegionProb < 0.25 then Region = 'North';

        else if RegionProb < 0.5 then Region = 'South';

        else if RegionProb < 0.75 then Region = 'East';

        else Region = 'West';

        output;

    end;

    drop ShipLag DeliveryLag StatusProb RegionProb;

run;

proc print;run;

Output:

Obs OrderDate ShipDate DeliveryDate OrderID CustomerID ProductID Quantity UnitPrice ShippingCost OrderStatus Region
1 30JAN2025 31JAN2025 06FEB2025 1 376 36 3 80.78 10.97 Delivered North
2 10FEB2025 15FEB2025 22FEB2025 2 389 72 6 57.81 17.92 Delivered West
3 12MAR2025 16MAR2025 22MAR2025 3 327 53 10 14.18 14.22 Delivered South
4 30JAN2025 02FEB2025 07FEB2025 4 295 53 6 18.84 6.63 Delivered East
5 15JAN2025 19JAN2025 28JAN2025 5 242 76 10 18.47 15.16 Shipped South
6 14MAR2025 18MAR2025 24MAR2025 6 331 78 6 73.45 17.48 Delivered North
7 23JAN2025 27JAN2025 04FEB2025 7 127 26 9 52.60 15.30 Shipped East
8 27FEB2025 03MAR2025 12MAR2025 8 38 10 6 41.64 8.00 Delivered North
9 02JAN2025 04JAN2025 07JAN2025 9 500 85 7 11.20 10.14 Shipped North
10 29MAR2025 03APR2025 10APR2025 10 377 29 4 84.59 15.07 Delivered East
11 10MAR2025 15MAR2025 25MAR2025 11 437 65 9 94.00 8.15 Shipped South
12 17FEB2025 21FEB2025 28FEB2025 12 5 54 7 33.61 6.24 Delivered North
13 29MAR2025 02APR2025 11APR2025 13 259 64 5 81.69 13.53 Delivered North
14 28MAR2025 31MAR2025 06APR2025 14 203 67 2 45.11 11.89 Delivered East
15 28FEB2025 02MAR2025 08MAR2025 15 390 33 7 39.91 11.47 Delivered North


/*Exploring the Dataset*/

proc contents data=ecommerce_orders;

run;

Output:

                                                                        The CONTENTS Procedure

Data Set Name WORK.ECOMMERCE_ORDERS Observations 15
Member Type DATA Variables 11
Engine V9 Indexes 0
Created 14/09/2015 00:49:30 Observation Length 88
Last Modified 14/09/2015 00:49:30 Deleted Observations 0
Protection   Compressed NO
Data Set Type   Sorted NO
Label      
Data Representation WINDOWS_64    
Encoding wlatin1 Western (Windows)    

Engine/Host Dependent Information
Data Set Page Size 65536
Number of Data Set Pages 1
First Data Page 1
Max Obs per Page 743
Obs in First Data Page 15
Number of Data Set Repairs 0
ExtendObsCounter YES
Filename C:\Users\Lenovo\AppData\Local\Temp\SAS Temporary Files\_TD8552_DESKTOP-QFAA4KV_\ecommerce_orders.sas7bdat
Release Created 9.0401M2
Host Created X64_8HOME

Alphabetic List of Variables and Attributes
# Variable Type Len Format
5 CustomerID Num 8  
3 DeliveryDate Num 8 DATE9.
1 OrderDate Num 8 DATE9.
4 OrderID Num 8  
10 OrderStatus Char 9  
6 ProductID Num 8  
7 Quantity Num 8  
11 Region Char 5  
2 ShipDate Num 8 DATE9.
9 ShippingCost Num 8  
8 UnitPrice Num 8  

proc print data=ecommerce_orders(obs=10);

run;

Output:

Obs OrderDate ShipDate DeliveryDate OrderID CustomerID ProductID Quantity UnitPrice ShippingCost OrderStatus Region
1 30JAN2025 31JAN2025 06FEB2025 1 376 36 3 80.78 10.97 Delivered North
2 10FEB2025 15FEB2025 22FEB2025 2 389 72 6 57.81 17.92 Delivered West
3 12MAR2025 16MAR2025 22MAR2025 3 327 53 10 14.18 14.22 Delivered South
4 30JAN2025 02FEB2025 07FEB2025 4 295 53 6 18.84 6.63 Delivered East
5 15JAN2025 19JAN2025 28JAN2025 5 242 76 10 18.47 15.16 Shipped South
6 14MAR2025 18MAR2025 24MAR2025 6 331 78 6 73.45 17.48 Delivered North
7 23JAN2025 27JAN2025 04FEB2025 7 127 26 9 52.60 15.30 Shipped East
8 27FEB2025 03MAR2025 12MAR2025 8 38 10 6 41.64 8.00 Delivered North
9 02JAN2025 04JAN2025 07JAN2025 9 500 85 7 11.20 10.14 Shipped North
10 29MAR2025 03APR2025 10APR2025 10 377 29 4 84.59 15.07 Delivered East

/*Total Sales per Region*/

proc sql;

    select Region,

           sum(Quantity * UnitPrice) as TotalSales format=dollar12.2

    from ecommerce_orders

    group by Region;

quit;

Output:

Region TotalSales
East $1,015.02
North $1,934.37
South $1,172.50
West $346.86

/*Average Delivery Time per Region*/

proc sql;

    select Region,

           avg(DeliveryDate - OrderDate) as AvgDeliveryTime format=8.2

    from ecommerce_orders

    where OrderStatus = 'Delivered'

    group by Region;

quit;

Output:

Region AvgDeliveryTime
East 9.67
North 10.33
South 10.00
West 12.00

/*Creating Macro Variables with PROC SQL*/

proc sql noprint;

    select count(*) into :TotalOrders

    from ecommerce_orders;


    select sum(Quantity * UnitPrice) into :TotalSales

    from ecommerce_orders;

quit;


%put Total Orders: &TotalOrders;

Log:

Total Orders:       15

%put Total Sales: &TotalSales;

Log:

Total Sales:  4468.75

/*Automating Reports with Macros*/

%macro RegionReport(region);

    title "E-commerce Report for &region Region";

    proc sql;

        select OrderStatus,

               count(*) as OrderCount,

               sum(Quantity * UnitPrice) as TotalSales format=dollar12.2

        from ecommerce_orders

        where Region = "&region"

        group by OrderStatus;

    quit;

%mend;


%RegionReport(North);

Output:

                                                    E-commerce Report for North Region

OrderStatus OrderCount TotalSales
Delivered 6 $1,855.97
Shipped 1 $78.40

%RegionReport(South);

Output:

                                                      E-commerce Report for South Region

OrderStatus OrderCount TotalSales
Delivered 1 $141.80
Shipped 2 $1,030.70

%RegionReport(East);

Output:

                                                     E-commerce Report for East Region

OrderStatus OrderCount TotalSales
Delivered 3 $541.62
Shipped 1 $473.40

%RegionReport(West);

Output:

                                                    E-commerce Report for West Region

OrderStatus OrderCount TotalSales
Delivered 1 $346.86
 

/*Orders Over Time*/

proc sql;

    create table orders_over_time as

    select OrderDate, count(*) as Orders

    from ecommerce_orders

    group by OrderDate;

quit;

proc print;run;

Output:

Obs OrderDate Orders
1 02JAN2025 1
2 15JAN2025 1
3 23JAN2025 1
4 30JAN2025 2
5 10FEB2025 1
6 17FEB2025 1
7 27FEB2025 1
8 28FEB2025 1
9 10MAR2025 1
10 12MAR2025 1
11 14MAR2025 1
12 28MAR2025 1
13 29MAR2025 2


proc sgplot data=orders_over_time;

    series x=OrderDate y=Orders;

    title "Daily Orders Over Time";

run;


/*Sales by Region*/

proc sql;

    create table sales_by_region as

    select Region, sum(Quantity * UnitPrice) as TotalSales

    from ecommerce_orders

    group by Region;

quit;

proc print;run;

Output:

Obs Region TotalSales
1 East 1015.02
2 North 1934.37
3 South 1172.50
4 West 346.86


proc sgplot data=sales_by_region;

    vbar Region / response=TotalSales stat=sum;

    title "Total Sales by Region";

run;


/*Identifying Top 5 Products by Sales*/

proc sql;

    create table top_products as

    select ProductID,

           sum(Quantity * UnitPrice) as ProductSales

    from ecommerce_orders

    group by ProductID

    order by ProductSales desc;

quit;

proc print data=top_products(obs=5);

    title "Top 5 Products by Sales";

run;

Output:

                                                                Top 5 Products by Sales

Obs ProductID ProductSales
1 65 846.00
2 26 473.40
3 78 440.70
4 64 408.45
5 72 346.86

/*Delivery Time Analysis*/

data delivery_analysis;

    set ecommerce_orders;

    DeliveryTime = DeliveryDate - OrderDate;

run;


proc means data=delivery_analysis mean std min max;

    var DeliveryTime;

    class Region;

    title "Delivery Time Statistics by Region";

run;

Output:

                                                     Delivery Time Statistics by Region

                                                           The MEANS Procedure

Analysis Variable : DeliveryTime
Region N Obs Mean Std Dev Minimum Maximum
East 4 10.2500000 2.0615528 8.0000000 12.0000000
North 7 9.5714286 3.0472470 5.0000000 13.0000000
South 3 12.6666667 2.5166115 10.0000000 15.0000000
West 1 12.0000000 . 12.0000000 12.0000000

/*Generating a Summary Report*/

proc report data=ecommerce_orders nowd;

    column Region OrderStatus Quantity UnitPrice TotalSales;

    define Region / group;

    define OrderStatus / group;

    define Quantity / analysis sum format=comma12. 'Total Quantity';

    define UnitPrice / analysis mean format=dollar12.2 'Average Unit Price';

    define TotalSales / computed format=dollar12.2 'Total Sales';


    compute TotalSales;

        TotalSales = Quantity.sum * UnitPrice.mean;

    endcomp;


    title "Summary Report by Region and Order Status";

run;rt by Region and Order Status";

run;

Output:

                                         Summary Report by Region and Order Status

Region OrderStatus Total Quantity Average Unit Price Total Sales
East Delivered 12 $49.51 $594.16
  Shipped 9 $52.60 $473.40
North Delivered 34 $58.51 $1,989.45
  Shipped 7 $11.20 $78.40
South Delivered 10 $14.18 $141.80
  Shipped 19 $56.24 $1,068.47
West Delivered 6 $57.81 $346.86

/*Checking for Missing Values*/

proc means data=ecommerce_orders n nmiss;

    var Quantity UnitPrice ShippingCost;

    title "Missing Values Check";

run;

Output:

                                                                 Missing Values Check

                                                                 The MEANS Procedure

Variable N N Miss
Quantity
UnitPrice
ShippingCost
15
15
15
0
0
0

/*Validating Date Sequences*/

data date_validation;

    set ecommerce_orders;

    if ShipDate < OrderDate or DeliveryDate < ShipDate then Flag = 'Invalid Dates';

    else Flag = 'Valid';

run;


proc freq data=date_validation;

    tables Flag;

    title "Date Validation Results";

run;

Output:

                                                             Date Validation Results

                                                               The FREQ Procedure

Flag Frequency Percent Cumulative
Frequency
Cumulative
Percent
Valid 15 100.00 15 100.00

/*Exporting the Cleaned Dataset*/

proc export data=ecommerce_orders

    outfile='ecommerce_orders_cleaned.csv'

    dbms=csv

    replace;

run;


PRACTICE AND COMMENT YOUR CODE: 

-->PLEASE FOLLOW OUR BLOG FOR MORE UPDATES.

TO FOLLOW OUR TELEGRAM CHANNEL CLICK HERE


Friday, 25 April 2025

​164.BOOKSTORE SALES DATA ANALYSIS USING PROC SQL | PROC MEANS | PROC FREQ | PROC PRINT | PROC GCHART | PROC GPLOT | PROC EXPORT | MACROS

BOOKSTORE SALES DATA ANALYSIS USING PROC SQL | PROC MEANS | PROC FREQ | PROC PRINT | PROC GCHART | PROC GPLOT | PROC EXPORT | MACROS


/*Creating a unique dataset and applying various SAS procedures, including SQL and macros. We'll focus on a dataset related to bookstore sales.*/

/*Dataset Creation*/

data bookstore_sales;

    infile datalines dlm=',' dsd;

    input TransactionID : $10.

          CustomerID : $8.

          BookID : $10.

          AuthorID : $8.

          Genre : $15.

          Price : 8.2

          Quantity : 8.

          TransactionDate : date9.;

    format TransactionDate date9.;

datalines;

T001,C001,B001,A001,Fiction,299.99,1,01JAN2025

T002,C002,B002,A002,Non-Fiction,199.50,2,02JAN2025

T003,C001,B003,A003,Science,150.00,1,03JAN2025

T004,C003,B001,A001,Fiction,299.99,1,04JAN2025

T005,C004,B004,A004,History,250.75,1,05JAN2025

T006,C002,B005,A005,Science,175.00,2,06JAN2025

T007,C005,B006,A006,Children,120.00,3,07JAN2025

T008,C006,B007,A007,Fiction,310.00,1,08JAN2025

T009,C001,B008,A008,Non-Fiction,220.00,1,09JAN2025

T010,C007,B009,A009,History,260.00,2,10JAN2025

;

run;

proc print;run;

Output:

Obs TransactionID CustomerID BookID AuthorID Genre Price Quantity TransactionDate
1 T001 C001 B001 A001 Fiction 299.99 1 01JAN2025
2 T002 C002 B002 A002 Non-Fiction 199.50 2 02JAN2025
3 T003 C001 B003 A003 Science 150.00 1 03JAN2025
4 T004 C003 B001 A001 Fiction 299.99 1 04JAN2025
5 T005 C004 B004 A004 History 250.75 1 05JAN2025
6 T006 C002 B005 A005 Science 175.00 2 06JAN2025
7 T007 C005 B006 A006 Children 120.00 3 07JAN2025
8 T008 C006 B007 A007 Fiction 310.00 1 08JAN2025
9 T009 C001 B008 A008 Non-Fiction 220.00 1 09JAN2025
10 T010 C007 B009 A009 History 260.00 2 10JAN2025


/*View the Dataset*/

proc print data=bookstore_sales;

    title "Bookstore Sales Data";

run;

Output:

                                                                        Bookstore Sales Data

Obs TransactionID CustomerID BookID AuthorID Genre Price Quantity TransactionDate
1 T001 C001 B001 A001 Fiction 299.99 1 01JAN2025
2 T002 C002 B002 A002 Non-Fiction 199.50 2 02JAN2025
3 T003 C001 B003 A003 Science 150.00 1 03JAN2025
4 T004 C003 B001 A001 Fiction 299.99 1 04JAN2025
5 T005 C004 B004 A004 History 250.75 1 05JAN2025
6 T006 C002 B005 A005 Science 175.00 2 06JAN2025
7 T007 C005 B006 A006 Children 120.00 3 07JAN2025
8 T008 C006 B007 A007 Fiction 310.00 1 08JAN2025
9 T009 C001 B008 A008 Non-Fiction 220.00 1 09JAN2025
10 T010 C007 B009 A009 History 260.00 2 10JAN2025


/*Summary Statistics*/

proc means data=bookstore_sales n mean min max;

    var Price Quantity;

    title "Summary Statistics for Price and Quantity";

run;

Output:

                                               Summary Statistics for Price and Quantity

                                                          The MEANS Procedure

Variable N Mean Minimum Maximum
Price
Quantity
10
10
228.5230000
1.5000000
120.0000000
1.0000000
310.0000000
3.0000000

/*Frequency of Genres*/

proc freq data=bookstore_sales;

    tables Genre;

    title "Frequency of Book Genres Sold";

run;

Output:

                                                      Frequency of Book Genres Sold

                                                              The FREQ Procedure

Genre Frequency Percent Cumulative
Frequency
Cumulative
Percent
Children 1 10.00 1 10.00
Fiction 3 30.00 4 40.00
History 2 20.00 6 60.00
Non-Fiction 2 20.00 8 80.00
Science 2 20.00 10 100.00


/*Calculate Total Sale Amount*/

data bookstore_sales;

    set bookstore_sales;

    TotalAmount = Price * Quantity;

run;

proc print;run;

Output:

Obs TransactionID CustomerID BookID AuthorID Genre Price Quantity TransactionDate TotalAmount
1 T001 C001 B001 A001 Fiction 299.99 1 01JAN2025 299.99
2 T002 C002 B002 A002 Non-Fiction 199.50 2 02JAN2025 399.00
3 T003 C001 B003 A003 Science 150.00 1 03JAN2025 150.00
4 T004 C003 B001 A001 Fiction 299.99 1 04JAN2025 299.99
5 T005 C004 B004 A004 History 250.75 1 05JAN2025 250.75
6 T006 C002 B005 A005 Science 175.00 2 06JAN2025 350.00
7 T007 C005 B006 A006 Children 120.00 3 07JAN2025 360.00
8 T008 C006 B007 A007 Fiction 310.00 1 08JAN2025 310.00
9 T009 C001 B008 A008 Non-Fiction 220.00 1 09JAN2025 220.00
10 T010 C007 B009 A009 History 260.00 2 10JAN2025 520.00


/*Extract Month from Transaction Date*/

data bookstore_sales;

    set bookstore_sales;

    TransactionMonth = month(TransactionDate);

run;

proc print;run;

Output:

Obs TransactionID CustomerID BookID AuthorID Genre Price Quantity TransactionDate TotalAmount TransactionMonth
1 T001 C001 B001 A001 Fiction 299.99 1 01JAN2025 299.99 1
2 T002 C002 B002 A002 Non-Fiction 199.50 2 02JAN2025 399.00 1
3 T003 C001 B003 A003 Science 150.00 1 03JAN2025 150.00 1
4 T004 C003 B001 A001 Fiction 299.99 1 04JAN2025 299.99 1
5 T005 C004 B004 A004 History 250.75 1 05JAN2025 250.75 1
6 T006 C002 B005 A005 Science 175.00 2 06JAN2025 350.00 1
7 T007 C005 B006 A006 Children 120.00 3 07JAN2025 360.00 1
8 T008 C006 B007 A007 Fiction 310.00 1 08JAN2025 310.00 1
9 T009 C001 B008 A008 Non-Fiction 220.00 1 09JAN2025 220.00 1
10 T010 C007 B009 A009 History 260.00 2 10JAN2025 520.00 1


/*Total Sales per Genre*/

proc sql;

    select Genre, sum(TotalAmount) as TotalSales format=8.2

    from bookstore_sales

    group by Genre;

quit;

Output:

Genre TotalSales
Children 360.00
Fiction 909.98
History 770.75
Non-Fiction 619.00
Science 500.00

/*Top 3 Customers by Total Purchase*/

proc sql outobs=3;

    select CustomerID, sum(TotalAmount) as TotalSpent format=8.2

    from bookstore_sales

    group by CustomerID

    order by TotalSpent desc;

quit;

Output:

CustomerID TotalSpent
C002 749.00
C001 669.99
C007 520.00

/*Average Quantity Sold per Genre*/

proc sql;

    select Genre, avg(Quantity) as AvgQuantitySold format=8.2

    from bookstore_sales

    group by Genre;

quit;

Output:

Genre AvgQuantitySold
Children 3.00
Fiction 1.00
History 1.50
Non-Fiction 1.50
Science 1.50


/*Macro to Generate Sales Report for a Given Genre*/

%macro genre_sales_report(genre);

    proc sql;

        select * from bookstore_sales

        where Genre = "&genre";

    quit;

%mend genre_sales_report;


%genre_sales_report(Fiction)

Output:

TransactionID CustomerID BookID AuthorID Genre Price Quantity TransactionDate TotalAmount TransactionMonth
T001 C001 B001 A001 Fiction 299.99 1 01JAN2025 299.99 1
T004 C003 B001 A001 Fiction 299.99 1 04JAN2025 299.99 1
T008 C006 B007 A007 Fiction 310 1 08JAN2025 310 1


/*Macro to Identify High-Value Transactions*/

%macro high_value_transactions(threshold);

    proc sql;

        select * from bookstore_sales

        where TotalAmount > &threshold;

    quit;

%mend high_value_transactions;


%high_value_transactions(500)

Output:

TransactionID CustomerID BookID AuthorID Genre Price Quantity TransactionDate TotalAmount TransactionMonth
T010 C007 B009 A009 History 260 2 10JAN2025 520 1

/*Monthly Sales Trend*/

proc sql;

    select TransactionMonth, sum(TotalAmount) as MonthlySales format=8.2

    from bookstore_sales

    group by TransactionMonth

    order by TransactionMonth;

quit;

Output:

/*Identify Most Popular Book*/

proc sql outobs=1;

    select BookID, sum(Quantity) as TotalSold

    from bookstore_sales

    group by BookID

    order by TotalSold desc;

quit;

Output:

TransactionMonth MonthlySales
1 3159.73

/*Bar Chart of Sales per Genre*/

proc gchart data=bookstore_sales;

    vbar Genre / sumvar=TotalAmount type=sum

         subgroup=Genre

         patternid=midpoint;

    title "Total Sales per Genre";

run;

quit;

Log:

NOTE: There were 10 observations read from the data set WORK.BOOKSTORE_SALES.

NOTE: PROCEDURE GCHART used (Total process time):

      real time           2.01 seconds

      cpu time            0.50 seconds


/*Line Chart of Monthly Sales*/

proc gplot data=bookstore_sales;

    plot TotalAmount*TransactionMonth;

    title "Monthly Sales Trend";

run;

quit;

Log:

NOTE: There were 10 observations read from the data set WORK.BOOKSTORE_SALES.

NOTE: PROCEDURE GPLOT used (Total process time):

      real time           0.53 seconds

      cpu time            0.25 seconds


/*Creating Separate Datasets per Genre Using Macros*/

/* Step 1: Create a sanitized version of Genre for valid dataset names */

data bookstore_sales_sanitized;

    set bookstore_sales;

    /* Replace spaces and special characters with underscores */

    Genre_sanitized = compress(Genre, , 'kad'); /* 'kad' removes all but letters and digits */

    Genre_sanitized = translate(Genre_sanitized, '_', ' '); /* Replace spaces with underscores */

run;

proc print;run;


/* Step 2: Get distinct sanitized genres */

proc sql noprint;

    select distinct Genre_sanitized into :genre1-:genre999

    from bookstore_sales_sanitized;

    %let genre_count = &sqlobs;

quit;


/* Step 3: Macro to split dataset by sanitized genre */

%macro split_by_genre;

    %do i=1 %to &genre_count;

        data &&genre&i;

            set bookstore_sales_sanitized;

            if Genre_sanitized = "&&genre&i";

        run;

    %end;

%mend split_by_genre;


%split_by_genre

Log:

NOTE: There were 10 observations read from the data set WORK.BOOKSTORE_SALES_SANITIZED.

NOTE: The data set WORK.CHILDREN_______ has 1 observations and 11 variables.

NOTE: DATA statement used (Total process time):

      real time           0.01 seconds

      cpu time            0.01 seconds


NOTE: There were 10 observations read from the data set WORK.BOOKSTORE_SALES_SANITIZED.

NOTE: The data set WORK.FICTION________ has 3 observations and 11 variables.

NOTE: DATA statement used (Total process time):

      real time           0.00 seconds

      cpu time            0.00 seconds


NOTE: There were 10 observations read from the data set WORK.BOOKSTORE_SALES_SANITIZED.

NOTE: The data set WORK.HISTORY________ has 2 observations and 11 variables.

NOTE: DATA statement used (Total process time):

      real time           0.00 seconds

      cpu time            0.00 seconds


NOTE: There were 10 observations read from the data set WORK.BOOKSTORE_SALES_SANITIZED.

NOTE: The data set WORK.NONFICTION_____ has 2 observations and 11 variables.

NOTE: DATA statement used (Total process time):

      real time           0.01 seconds

      cpu time            0.01 seconds


NOTE: There were 10 observations read from the data set WORK.BOOKSTORE_SALES_SANITIZED.

NOTE: The data set WORK.SCIENCE________ has 2 observations and 11 variables.

NOTE: DATA statement used (Total process time):

      real time           0.00 seconds

      cpu time            0.00 seconds


/*Exporting to CSV*/

proc export data=bookstore_sales

    outfile='bookstore_sales.csv'

    dbms=csv

    replace;

run;

Log:

10 records created in C:\Users\Lenovo\Desktop\NEW\EXCEL EX OF SAS\bookstore_sales.sales from

BOOKSTORE_SALES.

NOTE: "bookstore_sales.sales" file was successfully created.

NOTE: PROCEDURE EXPORT used (Total process time):

      real time           1.60 seconds

      cpu time            0.20 seconds


/*Exporting Genre-wise Data*/

%macro export_genre_data;

    %do i=1 %to &genre_count;

        proc export data=&&genre&i

            outfile="&&genre&i..csv"

            dbms=csv

            replace;

        run;

    %end;

%mend export_genre_data;


%export_genre_data

Log:
2 records created in C:\Users\Lenovo\Desktop\NEW\EXCEL EX OF SAS\Science________csv from
SCIENCE________.


NOTE: "Science________csv" file was successfully created.
NOTE: PROCEDURE EXPORT used (Total process time):
      real time           0.10 seconds
      cpu time            0.06 seconds

PRACTICE AND COMMENT YOUR CODE: 

-->PLEASE FOLLOW OUR BLOG FOR MORE UPDATES.

TO FOLLOW OUR TELEGRAM CHANNEL CLICK HERE


Thursday, 24 April 2025

163.MASTERING THE GAMESALES DATASET IN SAS USING PROC IMPORT | PROC CONTENTS | PROC PRINT | PROC SORT | PROC MEANS | PROC FREQ | PROC UNIVARIATE | PROC CORR | PROC RANK | PROC STANDARD | PROC SQL | PROC REPORT | PROC TRANSPOSE | PROC FORMAT | PROC DATASETS | PROC EXPORT

MASTERING THE GAMESALES DATASET IN SAS USING PROC IMPORT | PROC CONTENTS | PROC PRINT | PROC SORT | PROC MEANS | PROC FREQ | PROC UNIVARIATE | PROC CORR | PROC RANK | PROC STANDARD | PROC SQL | PROC REPORT | PROC TRANSPOSE | PROC FORMAT | PROC DATASETS | PROC EXPORT


/*Creating a unique dataset centered around video game sales data*/

/*Creating the GameSales Dataset*/

data GameSales;

    length GameID $5 Title $30 Platform $10 Genre $15 Publisher $20;

    input GameID $ Title &  Platform &  Genre &  Publisher & ReleaseYear NA_Sales EU_Sales JP_Sales Other_Sales;

    datalines;

G001 Adventure Quest  PC       Adventure  Epic Games     2018 1.2 0.8 0.5 0.3

G002 Battle Zone      PS4      Shooter    Activision     2019 2.5 1.7 0.6 0.4

G003 Cyber Rally      Xbox     Racing     Ubisoft        2020 1.8 1.2 0.3 0.2

G004 Dragon Slayer    Switch   RPG        Nintendo       2021 3.0 2.0 1.5 0.5

G005 Eternal Night    PC       Horror     Capcom         2017 0.9 0.7 0.2 0.1

;

run;

proc print data=GameSales;

run;

Output:

Obs GameID Title Platform Genre Publisher ReleaseYear NA_Sales EU_Sales JP_Sales Other_Sales
1 G001 Adventure Quest PC Adventure Epic Games 2018 1.2 0.8 0.5 0.3
2 G002 Battle Zone PS4 Shooter Activision 2019 2.5 1.7 0.6 0.4
3 G003 Cyber Rally Xbox Racing Ubisoft 2020 1.8 1.2 0.3 0.2
4 G004 Dragon Slayer Switch RPG Nintendo 2021 3.0 2.0 1.5 0.5
5 G005 Eternal Night PC Horror Capcom 2017 0.9 0.7 0.2 0.1


/*Data Exploration with PROC PRINT and PROC CONTENTS*/

proc contents data=GameSales;

run;

Output:

                                                               The CONTENTS Procedure

Data Set Name WORK.GAMESALES Observations 5
Member Type DATA Variables 10
Engine V9 Indexes 0
Created 14/09/2015 00:10:15 Observation Length 120
Last Modified 14/09/2015 00:10:15 Deleted Observations 0
Protection   Compressed NO
Data Set Type   Sorted NO
Label      
Data Representation WINDOWS_64    
Encoding wlatin1 Western (Windows)    

Engine/Host Dependent Information
Data Set Page Size 65536
Number of Data Set Pages 1
First Data Page 1
Max Obs per Page 545
Obs in First Data Page 5
Number of Data Set Repairs 0
ExtendObsCounter YES
Filename C:\Users\Lenovo\AppData\Local\Temp\SAS Temporary Files\_TD13272_DESKTOP-QFAA4KV_\gamesales.sas7bdat
Release Created 9.0401M2
Host Created X64_8HOME

Alphabetic List of Variables and Attributes
# Variable Type Len
8 EU_Sales Num 8
1 GameID Char 5
4 Genre Char 15
9 JP_Sales Num 8
7 NA_Sales Num 8
10 Other_Sales Num 8
3 Platform Char 10
5 Publisher Char 20
6 ReleaseYear Num 8
2 Title Char 30

/*Data Transformation: Calculating Global Sales*/

data GameSales;

    set GameSales;

    Global_Sales = sum(NA_Sales, EU_Sales, JP_Sales, Other_Sales);

run;

proc print;run;

Output:

Obs GameID Title Platform Genre Publisher ReleaseYear NA_Sales EU_Sales JP_Sales Other_Sales Global_Sales
1 G001 Adventure Quest PC Adventure Epic Games 2018 1.2 0.8 0.5 0.3 2.8
2 G002 Battle Zone PS4 Shooter Activision 2019 2.5 1.7 0.6 0.4 5.2
3 G003 Cyber Rally Xbox Racing Ubisoft 2020 1.8 1.2 0.3 0.2 3.5
4 G004 Dragon Slayer Switch RPG Nintendo 2021 3.0 2.0 1.5 0.5 7.0
5 G005 Eternal Night PC Horror Capcom 2017 0.9 0.7 0.2 0.1 1.9

/*Statistical Analysis with PROC MEANS*/

proc means data=GameSales mean median min max std;

    var NA_Sales EU_Sales JP_Sales Other_Sales Global_Sales;

run;

Output:

                                                               The MEANS Procedure

Variable Mean Median Minimum Maximum Std Dev
NA_Sales
EU_Sales
JP_Sales
Other_Sales
Global_Sales
1.8800000
1.2800000
0.6200000
0.3000000
4.0800000
1.8000000
1.2000000
0.5000000
0.3000000
3.5000000
0.9000000
0.7000000
0.2000000
0.1000000
1.9000000
3.0000000
2.0000000
1.5000000
0.5000000
7.0000000
0.8757854
0.5630275
0.5167204
0.1581139
2.0315019


/*Frequency Analysis with PROC FREQ*/

proc freq data=GameSales;

    tables Genre Platform;

run;

Output:

                                                                   The FREQ Procedure

Genre Frequency Percent Cumulative
Frequency
Cumulative
Percent
Adventure 1 20.00 1 20.00
Horror 1 20.00 2 40.00
RPG 1 20.00 3 60.00
Racing 1 20.00 4 80.00
Shooter 1 20.00 5 100.00

Platform Frequency Percent Cumulative
Frequency
Cumulative
Percent
PC 2 40.00 2 40.00
PS4 1 20.00 3 60.00
Switch 1 20.00 4 80.00
Xbox 1 20.00 5 100.00


/*Visualizing Sales Data with PROC SGPLOT*/

proc sgplot data=GameSales;

    vbar Title / response=Global_Sales stat=sum;

    title "Global Sales per Game";

run;

Log:

NOTE: PROCEDURE SGPLOT used (Total process time):
      real time           2.79 seconds
      cpu time            0.57 seconds

NOTE: Listing image output written to SGPlot1.png.
NOTE: There were 5 observations read from the data set WORK.GAMESALES.

/*Creating Custom Formats with PROC FORMAT*/

proc format;

    value $genre_fmt

        'Adventure' = 'Adventure Games'

        'Shooter' = 'Shooter Games'

        'Racing' = 'Racing Games'

        'RPG' = 'Role-Playing Games'

        'Horror' = 'Horror Games';

run;


data GameSales;

    set GameSales;

    format Genre $genre_fmt.;

run;

proc print;run;

Output:

Obs GameID Title Platform Genre Publisher ReleaseYear NA_Sales EU_Sales JP_Sales Other_Sales Global_Sales
1 G001 Adventure Quest PC Adventure Games Epic Games 2018 1.2 0.8 0.5 0.3 2.8
2 G002 Battle Zone PS4 Shooter Games Activision 2019 2.5 1.7 0.6 0.4 5.2
3 G003 Cyber Rally Xbox Racing Games Ubisoft 2020 1.8 1.2 0.3 0.2 3.5
4 G004 Dragon Slayer Switch Role-Playing Games Nintendo 2021 3.0 2.0 1.5 0.5 7.0
5 G005 Eternal Night PC Horror Games Capcom 2017 0.9 0.7 0.2 0.1 1.9

/*Sorting Data with PROC SORT*/

proc sort data=GameSales;

    by descending Global_Sales;

run;

proc print;run;

Output:

Obs GameID Title Platform Genre Publisher ReleaseYear NA_Sales EU_Sales JP_Sales Other_Sales Global_Sales
1 G004 Dragon Slayer Switch Role-Playing Games Nintendo 2021 3.0 2.0 1.5 0.5 7.0
2 G002 Battle Zone PS4 Shooter Games Activision 2019 2.5 1.7 0.6 0.4 5.2
3 G003 Cyber Rally Xbox Racing Games Ubisoft 2020 1.8 1.2 0.3 0.2 3.5
4 G001 Adventure Quest PC Adventure Games Epic Games 2018 1.2 0.8 0.5 0.3 2.8
5 G005 Eternal Night PC Horror Games Capcom 2017 0.9 0.7 0.2 0.1 1.9

/*Filtering Data with WHERE Clause*/

proc print data=GameSales;

    where ReleaseYear > 2018;

run;

Output:

Obs GameID Title Platform Genre Publisher ReleaseYear NA_Sales EU_Sales JP_Sales Other_Sales Global_Sales
1 G004 Dragon Slayer Switch Role-Playing Games Nintendo 2021 3.0 2.0 1.5 0.5 7.0
2 G002 Battle Zone PS4 Shooter Games Activision 2019 2.5 1.7 0.6 0.4 5.2
3 G003 Cyber Rally Xbox Racing Games Ubisoft 2020 1.8 1.2 0.3 0.2 3.5

/*Aggregating Sales by Genre with PROC SQL*/

proc sql;

    select Genre, sum(Global_Sales) as Total_Sales

    from GameSales

    group by Genre;

quit;

Output:

Genre Total_Sales
Adventure Games 2.8
Horror Games 1.9
Role-Playing Games 7
Racing Games 3.5
Shooter Games 5.2


/*Creating a Subset Dataset*/

data TopSellers;

    set GameSales;

    if Global_Sales > 3;

run;

proc print;run;

Output:

Obs GameID Title Platform Genre Publisher ReleaseYear NA_Sales EU_Sales JP_Sales Other_Sales Global_Sales
1 G004 Dragon Slayer Switch Role-Playing Games Nintendo 2021 3.0 2.0 1.5 0.5 7.0
2 G002 Battle Zone PS4 Shooter Games Activision 2019 2.5 1.7 0.6 0.4 5.2
3 G003 Cyber Rally Xbox Racing Games Ubisoft 2020 1.8 1.2 0.3 0.2 3.5

/*Exporting Data to CSV*/

proc export data=GameSales

    outfile="C:\SASData\GameSales.csv"

    dbms=csv

    replace;

run;


PRACTICE AND COMMENT YOUR CODE: 

-->PLEASE FOLLOW OUR BLOG FOR MORE UPDATES.

TO FOLLOW OUR TELEGRAM CHANNEL CLICK HERE