176.SAS BOOKS DATASET ANALYSIS | PROC PRINT | PROC CONTENTS | PROC MEANS | PROC FREQ | PROC SGPLOT | PROC SQL | PROC EXPORT | MACRO PROGRAMMING | DATA MERGING | ADVANCED DATA MANIPULATION

SAS BOOKS DATASET ANALYSIS | PROC PRINT | PROC CONTENTS | PROC MEANS | PROC FREQ | PROC SGPLOT | PROC SQL | PROC EXPORT | MACRO PROGRAMMING | DATA MERGING | ADVANCED DATA MANIPULATION 

 /*Creation of a unique "Books" dataset and apply various SAS procedures*/

/*Creating the Books Dataset*/

data books;

    infile datalines dsd truncover;

    input 

        BookID : $5.

        Title : $50.

        Author : $30.

        Genre : $15.

        Price : 8.

        Pages : 8.

        PubYear : 4.

        Publisher : $30.

        Rating : 3.1;

    datalines;

B001,"The Silent Patient","Alex Michaelides","Thriller",15.99,336,2019,"Celadon Books",4.1

B002,"Educated","Tara Westover","Memoir",13.99,352,2018,"Random House",4.7

B003,"Becoming","Michelle Obama","Biography",17.99,448,2018,"Crown Publishing",4.9

B004,"Where the Crawdads Sing","Delia Owens","Fiction",14.99,384,2018,"G.P. Putnam's Sons",4.8

B005,"The Testaments","Margaret Atwood","Dystopian",16.99,432,2019,"Nan A. Talese",4.5

B006,"Normal People","Sally Rooney","Romance",13.50,273,2018,"Faber & Faber",4.2

B007,"The Midnight Library","Matt Haig","Fantasy",14.00,304,2020,"Canongate Books",4.3

B008,"Dune","Frank Herbert","Sci-Fi",18.00,412,1965,"Chilton Books",4.6

B009,"Sapiens","Yuval Noah Harari","History",19.99,498,2011,"Harvill Secker",4.7

B010,"The Alchemist","Paulo Coelho","Adventure",10.99,208,1988,"HarperTorch",4.0

B011,"Atomic Habits","James Clear","Self-Help",16.20,320,2018,"Avery",4.8

B012,"The Great Gatsby","F. Scott Fitzgerald","Classic",9.99,180,1925,"Scribner",4.4

;

run;

proc print;run;

Output:

Obs BookID Title Author Genre Price Pages PubYear Publisher Rating
1 B001 The Silent Patient Alex Michaelides Thriller 15.99 336 2019 Celadon Books 4.1
2 B002 Educated Tara Westover Memoir 13.99 352 2018 Random House 4.7
3 B003 Becoming Michelle Obama Biography 17.99 448 2018 Crown Publishing 4.9
4 B004 Where the Crawdads Sing Delia Owens Fiction 14.99 384 2018 G.P. Putnam's Sons 4.8
5 B005 The Testaments Margaret Atwood Dystopian 16.99 432 2019 Nan A. Talese 4.5
6 B006 Normal People Sally Rooney Romance 13.50 273 2018 Faber & Faber 4.2
7 B007 The Midnight Library Matt Haig Fantasy 14.00 304 2020 Canongate Books 4.3
8 B008 Dune Frank Herbert Sci-Fi 18.00 412 1965 Chilton Books 4.6
9 B009 Sapiens Yuval Noah Harari History 19.99 498 2011 Harvill Secker 4.7
10 B010 The Alchemist Paulo Coelho Adventure 10.99 208 1988 HarperTorch 4.0
11 B011 Atomic Habits James Clear Self-Help 16.20 320 2018 Avery 4.8
12 B012 The Great Gatsby F. Scott Fitzgerald Classic 9.99 180 1925 Scribner 4.4


/*Exploring the Dataset*/

proc contents data=books;

    title "Structure of Books Dataset";

run;

Output:

                                                            The CONTENTS Procedure

Data Set Name WORK.BOOKS Observations 12
Member Type DATA Variables 9
Engine V9 Indexes 0
Created 14/09/2015 00:04:41 Observation Length 168
Last Modified 14/09/2015 00:04:41 Deleted Observations 0
Protection   Compressed NO
Data Set Type   Sorted NO
Label      
Data Representation WINDOWS_64    
Encoding wlatin1 Western (Windows)    


Engine/Host Dependent Information
Data Set Page Size 65536
Number of Data Set Pages 1
First Data Page 1
Max Obs per Page 389
Obs in First Data Page 12
Number of Data Set Repairs 0
ExtendObsCounter YES
Filename C:\Users\Lenovo\AppData\Local\Temp\SAS Temporary Files\_TD7568_DESKTOP-QFAA4KV_\books.sas7bdat
Release Created 9.0401M2
Host Created X64_8HOME


Alphabetic List of Variables and Attributes
# Variable Type Len
3 Author Char 30
1 BookID Char 5
4 Genre Char 15
6 Pages Num 8
5 Price Num 8
7 PubYear Num 8
8 Publisher Char 30
9 Rating Num 8
2 Title Char 50


/*Summary Statistics*/

proc means data=books mean median min max std;

    var Price Pages Rating;

    title "Summary Statistics for Numeric Variables";

run;

Output:

                                            Summary Statistics for Numeric Variables

                                                        The MEANS Procedure

Variable Mean Median Minimum Maximum Std Dev
Price
Pages
Rating
15.2183333
345.5833333
4.5000000
15.4900000
344.0000000
4.5500000
9.9900000
180.0000000
4.0000000
19.9900000
498.0000000
4.9000000
2.9247963
95.9199082
0.2984810


/*Frequency of Genres*/

proc freq data=books;

    tables Genre;

    title "Frequency Distribution of Genres";

run;

Output:

                                                       Frequency Distribution of Genres
                                                                 The FREQ Procedure

Genre Frequency Percent Cumulative
Frequency
Cumulative
Percent
Adventure 1 8.33 1 8.33
Biography 1 8.33 2 16.67
Classic 1 8.33 3 25.00
Dystopian 1 8.33 4 33.33
Fantasy 1 8.33 5 41.67
Fiction 1 8.33 6 50.00
History 1 8.33 7 58.33
Memoir 1 8.33 8 66.67
Romance 1 8.33 9 75.00
Sci-Fi 1 8.33 10 83.33
Self-Help 1 8.33 11 91.67
Thriller 1 8.33 12 100.00


/*Histogram of Book Prices*/

proc sgplot data=books;

    histogram Price;

    density Price;

    title "Histogram of Book Prices";

run;

Log:

NOTE: PROCEDURE SGPLOT used (Total process time):

      real time           2.81 seconds

      cpu time            0.54 seconds


NOTE: Listing image output written to SGPlot1.png.

NOTE: There were 12 observations read from the data set WORK.BOOKS.


/*Bar Chart of Average Rating by Genre*/

proc sgplot data=books;

    vbar Genre / response=Rating stat=mean;

    title "Average Rating by Genre";

run;

Log:

NOTE: PROCEDURE SGPLOT used (Total process time):

      real time           0.57 seconds

      cpu time            0.04 seconds


NOTE: Listing image output written to SGPlot3.png.

NOTE: There were 12 observations read from the data set WORK.BOOKS.


/*Top 5 Highest Rated Books*/

proc sql outobs=5;

    title "Top 5 Highest Rated Books";

    select Title, Author, Rating

    from books

    order by Rating desc

    limit 5;

quit;

Output:

                                                            Top 5 Highest Rated Books

Title Author Rating
Becoming Michelle Obama 4.9
Atomic Habits James Clear 4.8
Where the Crawdads Sing Delia Owens 4.8
Sapiens Yuval Noah Harari 4.7
Educated Tara Westover 4.7


/*Average Price by Genre*/

proc sql;

    title "Average Price by Genre";

    select Genre, avg(Price) as AvgPrice format=6.2

    from books

    group by Genre;

quit;

Output:

                                                               Average Price by Genre

Genre AvgPrice
Adventure 10.99
Biography 17.99
Classic 9.99
Dystopian 16.99
Fantasy 14.00
Fiction 14.99
History 19.99
Memoir 13.99
Romance 13.50
Sci-Fi 18.00
Self-Help 16.20
Thriller 15.99


/*Books Published After 2015*/

proc sql;

    title "Books Published After 2015";

    select Title, PubYear

    from books

    where PubYear > 2015;

quit;

Output:

                                                               Books Published After 2015

Title PubYear
The Silent Patient 2019
Educated 2018
Becoming 2018
Where the Crawdads Sing 2018
The Testaments 2019
Normal People 2018
The Midnight Library 2020
Atomic Habits 2018


/*Macro to Generate Report for a Specific Genre*/

%macro genre_report(genre_name);

    proc sql;

        title "Books in Genre: &genre_name";

        select Title, Author, Price, Rating

        from books

        where Genre = "&genre_name";

    quit;

%mend;


%genre_report(Fiction);

Output:

                                                           Books in Genre: Fiction

Title Author Price Rating
Where the Crawdads Sing Delia Owens 14.99 4.8

%genre_report(Thriller);

Output:

                                                        Books in Genre: Thriller

Title Author Price Rating
The Silent Patient Alex Michaelides 15.99 4.1


/*Macro to Identify Books Above a Certain Rating*/

%macro high_rating(threshold);

    proc sql;

        title "Books with Rating Above &threshold";

        select Title, Rating

        from books

        where Rating > &threshold;

    quit;

%mend;


%high_rating(4.5);

Output:

                                                            Books with Rating Above 4.5

Title Rating
Educated 4.7
Becoming 4.9
Where the Crawdads Sing 4.8
Dune 4.6
Sapiens 4.7
Atomic Habits 4.8


/*Creating Separate Datasets for Each Genre*/

proc sql noprint;

    select distinct Genre into :genre1-:genre999

    from books;

    %let genre_count = &sqlobs;

quit;


%macro split_by_genre;

    %do i = 1 %to &genre_count;

        data genre_&&genre&i;

            set books;

            where Genre = "&&genre&i";

        run;

proc print;run;

    %end;

%mend;


%split_by_genre;

Output:

Obs BookID Title Author Genre Price Pages PubYear Publisher Rating
1 B010 The Alchemist Paulo Coelho Adventure 10.99 208 1988 HarperTorch 4

Obs BookID Title Author Genre Price Pages PubYear Publisher Rating
1 B003 Becoming Michelle Obama Biography 17.99 448 2018 Crown Publishing 4.9

Obs BookID Title Author Genre Price Pages PubYear Publisher Rating
1 B012 The Great Gatsby F. Scott Fitzgerald Classic 9.99 180 1925 Scribner 4.4

Obs BookID Title Author Genre Price Pages PubYear Publisher Rating
1 B005 The Testaments Margaret Atwood Dystopian 16.99 432 2019 Nan A. Talese 4.5

Obs BookID Title Author Genre Price Pages PubYear Publisher Rating
1 B007 The Midnight Library Matt Haig Fantasy 14 304 2020 Canongate Books 4.3

Obs BookID Title Author Genre Price Pages PubYear Publisher Rating
1 B004 Where the Crawdads Sing Delia Owens Fiction 14.99 384 2018 G.P. Putnam's Sons 4.8

Obs BookID Title Author Genre Price Pages PubYear Publisher Rating
1 B009 Sapiens Yuval Noah Harari History 19.99 498 2011 Harvill Secker 4.7

Obs BookID Title Author Genre Price Pages PubYear Publisher Rating
1 B002 Educated Tara Westover Memoir 13.99 352 2018 Random House 4.7

Obs BookID Title Author Genre Price Pages PubYear Publisher Rating
1 B006 Normal People Sally Rooney Romance 13.5 273 2018 Faber & Faber 4.2

Obs BookID Title Author Genre Price Pages PubYear Publisher Rating
1 B006 Normal People Sally Rooney Romance 13.5 273 2018 Faber & Faber 4.2

Obs BookID Title Author Genre Price Pages PubYear Publisher Rating
1 B006 Normal People Sally Rooney Romance 13.5 273 2018 Faber & Faber 4.2

Obs BookID Title Author Genre Price Pages PubYear Publisher Rating
1 B001 The Silent Patient Alex Michaelides Thriller 15.99 336 2019 Celadon Books 4.1

/*Creating a Sales Dataset*/

data sales;

    input BookID $ UnitsSold;

    datalines;

B001 5000

B002 6000

B003 8000

B004 7500

B005 6200

B006 4300

B007 5400

B008 7000

B009 6700

B010 5800

B011 7200

B012 6900

;

run;

proc print;run;

Output:

Obs BookID UnitsSold
1 B001 5000
2 B002 6000
3 B003 8000
4 B004 7500
5 B005 6200
6 B006 4300
7 B007 5400
8 B008 7000
9 B009 6700
10 B010 5800
11 B011 7200
12 B012 6900


/*Merging Datasets*/

proc sql;

    create table books_sales as

    select a.*, b.UnitsSold

    from books as a

    left join sales as b

    on a.BookID = b.BookID;

quit;

proc print;run;

Output:

Obs BookID Title Author Genre Price Pages PubYear Publisher Rating UnitsSold
1 B001 The Silent Patient Alex Michaelides Thriller 15.99 336 2019 Celadon Books 4.1 5000
2 B002 Educated Tara Westover Memoir 13.99 352 2018 Random House 4.7 6000
3 B003 Becoming Michelle Obama Biography 17.99 448 2018 Crown Publishing 4.9 8000
4 B004 Where the Crawdads Sing Delia Owens Fiction 14.99 384 2018 G.P. Putnam's Sons 4.8 7500
5 B005 The Testaments Margaret Atwood Dystopian 16.99 432 2019 Nan A. Talese 4.5 6200
6 B006 Normal People Sally Rooney Romance 13.50 273 2018 Faber & Faber 4.2 4300
7 B007 The Midnight Library Matt Haig Fantasy 14.00 304 2020 Canongate Books 4.3 5400
8 B008 Dune Frank Herbert Sci-Fi 18.00 412 1965 Chilton Books 4.6 7000
9 B009 Sapiens Yuval Noah Harari History 19.99 498 2011 Harvill Secker 4.7 6700
10 B010 The Alchemist Paulo Coelho Adventure 10.99 208 1988 HarperTorch 4.0 5800
11 B011 Atomic Habits James Clear Self-Help 16.20 320 2018 Avery 4.8 7200
12 B012 The Great Gatsby F. Scott Fitzgerald Classic 9.99 180 1925 Scribner 4.4 6900


/*Total Revenue per Book*/

data books_sales;

    set books_sales;

    Revenue = Price * UnitsSold;

run;


proc print data=books_sales;

    var Title UnitsSold Revenue;

    title "Revenue per Book";

run;

Output:

                                                              Revenue per Book

Obs Title UnitsSold Revenue
1 The Silent Patient 5000 79950
2 Educated 6000 83940
3 Becoming 8000 143920
4 Where the Crawdads Sing 7500 112425
5 The Testaments 6200 105338
6 Normal People 4300 58050
7 The Midnight Library 5400 75600
8 Dune 7000 126000
9 Sapiens 6700 133933
10 The Alchemist 5800 63742
11 Atomic Habits 7200 116640
12 The Great Gatsby 6900 68931


/*Top 3 Bestselling Books*/

proc sort data=books_sales out=sorted_sales;

    by descending UnitsSold;

run;


proc print data=sorted_sales(obs=3);

    var Title UnitsSold;

    title "Top 3 Bestselling Books";

run;

Output:

                                                               Top 3 Bestselling Books

Obs Title UnitsSold
1 Becoming 8000
2 Where the Crawdads Sing 7500
3 Atomic Habits 7200


/*Exporting to CSV*/

proc export data=books_sales

    outfile="books_sales.csv"

    dbms=csv

    replace;

run;


/*Exporting to Excel*/

proc export data=books_sales

    outfile="books_sales.xlsx"

    dbms=xlsx

    replace;

run;



Find A Mistake and Comment It:

%macro high_rating(threshold);

    proc sql;

        title "Books with Rating Above &threshold";

        select Title, Rating

        from books

        where Rating > threshold;

    quit;

%mend;


%high_rating(4.5);


PRACTICE AND COMMENT YOUR CODE: 

--->PLEASE FOLLOW OUR BLOG FOR MORE UPDATES.

PLEASE FOLLOW OUR TELEGRAM CHANNEL CLICK HERE

PLEASE FOLLOW OUR FACEBOOK PAGE  CLICK HERE

Comments