​164.BOOKSTORE SALES DATA ANALYSIS USING PROC SQL | PROC MEANS | PROC FREQ | PROC PRINT | PROC GCHART | PROC GPLOT | PROC EXPORT | MACROS

BOOKSTORE SALES DATA ANALYSIS USING PROC SQL | PROC MEANS | PROC FREQ | PROC PRINT | PROC GCHART | PROC GPLOT | PROC EXPORT | MACROS


/*Creating a unique dataset and applying various SAS procedures, including SQL and macros. We'll focus on a dataset related to bookstore sales.*/

/*Dataset Creation*/

data bookstore_sales;

    infile datalines dlm=',' dsd;

    input TransactionID : $10.

          CustomerID : $8.

          BookID : $10.

          AuthorID : $8.

          Genre : $15.

          Price : 8.2

          Quantity : 8.

          TransactionDate : date9.;

    format TransactionDate date9.;

datalines;

T001,C001,B001,A001,Fiction,299.99,1,01JAN2025

T002,C002,B002,A002,Non-Fiction,199.50,2,02JAN2025

T003,C001,B003,A003,Science,150.00,1,03JAN2025

T004,C003,B001,A001,Fiction,299.99,1,04JAN2025

T005,C004,B004,A004,History,250.75,1,05JAN2025

T006,C002,B005,A005,Science,175.00,2,06JAN2025

T007,C005,B006,A006,Children,120.00,3,07JAN2025

T008,C006,B007,A007,Fiction,310.00,1,08JAN2025

T009,C001,B008,A008,Non-Fiction,220.00,1,09JAN2025

T010,C007,B009,A009,History,260.00,2,10JAN2025

;

run;

proc print;run;

Output:

Obs TransactionID CustomerID BookID AuthorID Genre Price Quantity TransactionDate
1 T001 C001 B001 A001 Fiction 299.99 1 01JAN2025
2 T002 C002 B002 A002 Non-Fiction 199.50 2 02JAN2025
3 T003 C001 B003 A003 Science 150.00 1 03JAN2025
4 T004 C003 B001 A001 Fiction 299.99 1 04JAN2025
5 T005 C004 B004 A004 History 250.75 1 05JAN2025
6 T006 C002 B005 A005 Science 175.00 2 06JAN2025
7 T007 C005 B006 A006 Children 120.00 3 07JAN2025
8 T008 C006 B007 A007 Fiction 310.00 1 08JAN2025
9 T009 C001 B008 A008 Non-Fiction 220.00 1 09JAN2025
10 T010 C007 B009 A009 History 260.00 2 10JAN2025


/*View the Dataset*/

proc print data=bookstore_sales;

    title "Bookstore Sales Data";

run;

Output:

                                                                        Bookstore Sales Data

Obs TransactionID CustomerID BookID AuthorID Genre Price Quantity TransactionDate
1 T001 C001 B001 A001 Fiction 299.99 1 01JAN2025
2 T002 C002 B002 A002 Non-Fiction 199.50 2 02JAN2025
3 T003 C001 B003 A003 Science 150.00 1 03JAN2025
4 T004 C003 B001 A001 Fiction 299.99 1 04JAN2025
5 T005 C004 B004 A004 History 250.75 1 05JAN2025
6 T006 C002 B005 A005 Science 175.00 2 06JAN2025
7 T007 C005 B006 A006 Children 120.00 3 07JAN2025
8 T008 C006 B007 A007 Fiction 310.00 1 08JAN2025
9 T009 C001 B008 A008 Non-Fiction 220.00 1 09JAN2025
10 T010 C007 B009 A009 History 260.00 2 10JAN2025


/*Summary Statistics*/

proc means data=bookstore_sales n mean min max;

    var Price Quantity;

    title "Summary Statistics for Price and Quantity";

run;

Output:

                                               Summary Statistics for Price and Quantity

                                                          The MEANS Procedure

Variable N Mean Minimum Maximum
Price
Quantity
10
10
228.5230000
1.5000000
120.0000000
1.0000000
310.0000000
3.0000000

/*Frequency of Genres*/

proc freq data=bookstore_sales;

    tables Genre;

    title "Frequency of Book Genres Sold";

run;

Output:

                                                      Frequency of Book Genres Sold

                                                              The FREQ Procedure

Genre Frequency Percent Cumulative
Frequency
Cumulative
Percent
Children 1 10.00 1 10.00
Fiction 3 30.00 4 40.00
History 2 20.00 6 60.00
Non-Fiction 2 20.00 8 80.00
Science 2 20.00 10 100.00


/*Calculate Total Sale Amount*/

data bookstore_sales;

    set bookstore_sales;

    TotalAmount = Price * Quantity;

run;

proc print;run;

Output:

Obs TransactionID CustomerID BookID AuthorID Genre Price Quantity TransactionDate TotalAmount
1 T001 C001 B001 A001 Fiction 299.99 1 01JAN2025 299.99
2 T002 C002 B002 A002 Non-Fiction 199.50 2 02JAN2025 399.00
3 T003 C001 B003 A003 Science 150.00 1 03JAN2025 150.00
4 T004 C003 B001 A001 Fiction 299.99 1 04JAN2025 299.99
5 T005 C004 B004 A004 History 250.75 1 05JAN2025 250.75
6 T006 C002 B005 A005 Science 175.00 2 06JAN2025 350.00
7 T007 C005 B006 A006 Children 120.00 3 07JAN2025 360.00
8 T008 C006 B007 A007 Fiction 310.00 1 08JAN2025 310.00
9 T009 C001 B008 A008 Non-Fiction 220.00 1 09JAN2025 220.00
10 T010 C007 B009 A009 History 260.00 2 10JAN2025 520.00


/*Extract Month from Transaction Date*/

data bookstore_sales;

    set bookstore_sales;

    TransactionMonth = month(TransactionDate);

run;

proc print;run;

Output:

Obs TransactionID CustomerID BookID AuthorID Genre Price Quantity TransactionDate TotalAmount TransactionMonth
1 T001 C001 B001 A001 Fiction 299.99 1 01JAN2025 299.99 1
2 T002 C002 B002 A002 Non-Fiction 199.50 2 02JAN2025 399.00 1
3 T003 C001 B003 A003 Science 150.00 1 03JAN2025 150.00 1
4 T004 C003 B001 A001 Fiction 299.99 1 04JAN2025 299.99 1
5 T005 C004 B004 A004 History 250.75 1 05JAN2025 250.75 1
6 T006 C002 B005 A005 Science 175.00 2 06JAN2025 350.00 1
7 T007 C005 B006 A006 Children 120.00 3 07JAN2025 360.00 1
8 T008 C006 B007 A007 Fiction 310.00 1 08JAN2025 310.00 1
9 T009 C001 B008 A008 Non-Fiction 220.00 1 09JAN2025 220.00 1
10 T010 C007 B009 A009 History 260.00 2 10JAN2025 520.00 1


/*Total Sales per Genre*/

proc sql;

    select Genre, sum(TotalAmount) as TotalSales format=8.2

    from bookstore_sales

    group by Genre;

quit;

Output:

Genre TotalSales
Children 360.00
Fiction 909.98
History 770.75
Non-Fiction 619.00
Science 500.00

/*Top 3 Customers by Total Purchase*/

proc sql outobs=3;

    select CustomerID, sum(TotalAmount) as TotalSpent format=8.2

    from bookstore_sales

    group by CustomerID

    order by TotalSpent desc;

quit;

Output:

CustomerID TotalSpent
C002 749.00
C001 669.99
C007 520.00

/*Average Quantity Sold per Genre*/

proc sql;

    select Genre, avg(Quantity) as AvgQuantitySold format=8.2

    from bookstore_sales

    group by Genre;

quit;

Output:

Genre AvgQuantitySold
Children 3.00
Fiction 1.00
History 1.50
Non-Fiction 1.50
Science 1.50


/*Macro to Generate Sales Report for a Given Genre*/

%macro genre_sales_report(genre);

    proc sql;

        select * from bookstore_sales

        where Genre = "&genre";

    quit;

%mend genre_sales_report;


%genre_sales_report(Fiction)

Output:

TransactionID CustomerID BookID AuthorID Genre Price Quantity TransactionDate TotalAmount TransactionMonth
T001 C001 B001 A001 Fiction 299.99 1 01JAN2025 299.99 1
T004 C003 B001 A001 Fiction 299.99 1 04JAN2025 299.99 1
T008 C006 B007 A007 Fiction 310 1 08JAN2025 310 1


/*Macro to Identify High-Value Transactions*/

%macro high_value_transactions(threshold);

    proc sql;

        select * from bookstore_sales

        where TotalAmount > &threshold;

    quit;

%mend high_value_transactions;


%high_value_transactions(500)

Output:

TransactionID CustomerID BookID AuthorID Genre Price Quantity TransactionDate TotalAmount TransactionMonth
T010 C007 B009 A009 History 260 2 10JAN2025 520 1

/*Monthly Sales Trend*/

proc sql;

    select TransactionMonth, sum(TotalAmount) as MonthlySales format=8.2

    from bookstore_sales

    group by TransactionMonth

    order by TransactionMonth;

quit;

Output:

/*Identify Most Popular Book*/

proc sql outobs=1;

    select BookID, sum(Quantity) as TotalSold

    from bookstore_sales

    group by BookID

    order by TotalSold desc;

quit;

Output:

TransactionMonth MonthlySales
1 3159.73

/*Bar Chart of Sales per Genre*/

proc gchart data=bookstore_sales;

    vbar Genre / sumvar=TotalAmount type=sum

         subgroup=Genre

         patternid=midpoint;

    title "Total Sales per Genre";

run;

quit;

Log:

NOTE: There were 10 observations read from the data set WORK.BOOKSTORE_SALES.

NOTE: PROCEDURE GCHART used (Total process time):

      real time           2.01 seconds

      cpu time            0.50 seconds


/*Line Chart of Monthly Sales*/

proc gplot data=bookstore_sales;

    plot TotalAmount*TransactionMonth;

    title "Monthly Sales Trend";

run;

quit;

Log:

NOTE: There were 10 observations read from the data set WORK.BOOKSTORE_SALES.

NOTE: PROCEDURE GPLOT used (Total process time):

      real time           0.53 seconds

      cpu time            0.25 seconds


/*Creating Separate Datasets per Genre Using Macros*/

/* Step 1: Create a sanitized version of Genre for valid dataset names */

data bookstore_sales_sanitized;

    set bookstore_sales;

    /* Replace spaces and special characters with underscores */

    Genre_sanitized = compress(Genre, , 'kad'); /* 'kad' removes all but letters and digits */

    Genre_sanitized = translate(Genre_sanitized, '_', ' '); /* Replace spaces with underscores */

run;

proc print;run;


/* Step 2: Get distinct sanitized genres */

proc sql noprint;

    select distinct Genre_sanitized into :genre1-:genre999

    from bookstore_sales_sanitized;

    %let genre_count = &sqlobs;

quit;


/* Step 3: Macro to split dataset by sanitized genre */

%macro split_by_genre;

    %do i=1 %to &genre_count;

        data &&genre&i;

            set bookstore_sales_sanitized;

            if Genre_sanitized = "&&genre&i";

        run;

    %end;

%mend split_by_genre;


%split_by_genre

Log:

NOTE: There were 10 observations read from the data set WORK.BOOKSTORE_SALES_SANITIZED.

NOTE: The data set WORK.CHILDREN_______ has 1 observations and 11 variables.

NOTE: DATA statement used (Total process time):

      real time           0.01 seconds

      cpu time            0.01 seconds


NOTE: There were 10 observations read from the data set WORK.BOOKSTORE_SALES_SANITIZED.

NOTE: The data set WORK.FICTION________ has 3 observations and 11 variables.

NOTE: DATA statement used (Total process time):

      real time           0.00 seconds

      cpu time            0.00 seconds


NOTE: There were 10 observations read from the data set WORK.BOOKSTORE_SALES_SANITIZED.

NOTE: The data set WORK.HISTORY________ has 2 observations and 11 variables.

NOTE: DATA statement used (Total process time):

      real time           0.00 seconds

      cpu time            0.00 seconds


NOTE: There were 10 observations read from the data set WORK.BOOKSTORE_SALES_SANITIZED.

NOTE: The data set WORK.NONFICTION_____ has 2 observations and 11 variables.

NOTE: DATA statement used (Total process time):

      real time           0.01 seconds

      cpu time            0.01 seconds


NOTE: There were 10 observations read from the data set WORK.BOOKSTORE_SALES_SANITIZED.

NOTE: The data set WORK.SCIENCE________ has 2 observations and 11 variables.

NOTE: DATA statement used (Total process time):

      real time           0.00 seconds

      cpu time            0.00 seconds


/*Exporting to CSV*/

proc export data=bookstore_sales

    outfile='bookstore_sales.csv'

    dbms=csv

    replace;

run;

Log:

10 records created in C:\Users\Lenovo\Desktop\NEW\EXCEL EX OF SAS\bookstore_sales.sales from

BOOKSTORE_SALES.

NOTE: "bookstore_sales.sales" file was successfully created.

NOTE: PROCEDURE EXPORT used (Total process time):

      real time           1.60 seconds

      cpu time            0.20 seconds


/*Exporting Genre-wise Data*/

%macro export_genre_data;

    %do i=1 %to &genre_count;

        proc export data=&&genre&i

            outfile="&&genre&i..csv"

            dbms=csv

            replace;

        run;

    %end;

%mend export_genre_data;


%export_genre_data

Log:
2 records created in C:\Users\Lenovo\Desktop\NEW\EXCEL EX OF SAS\Science________csv from
SCIENCE________.


NOTE: "Science________csv" file was successfully created.
NOTE: PROCEDURE EXPORT used (Total process time):
      real time           0.10 seconds
      cpu time            0.06 seconds

PRACTICE AND COMMENT YOUR CODE: 

-->PLEASE FOLLOW OUR BLOG FOR MORE UPDATES.

TO FOLLOW OUR TELEGRAM CHANNEL CLICK HERE


Comments