SAS BOOKS DATASET ANALYSIS | PROC PRINT | PROC CONTENTS | PROC MEANS | PROC FREQ | PROC SGPLOT | PROC SQL | PROC EXPORT | MACRO PROGRAMMING | DATA MERGING | ADVANCED DATA MANIPULATION
/*Creation of a unique "Books" dataset and apply various SAS procedures*/
/*Creating the Books Dataset*/
data books;
infile datalines dsd truncover;
input
BookID : $5.
Title : $50.
Author : $30.
Genre : $15.
Price : 8.
Pages : 8.
PubYear : 4.
Publisher : $30.
Rating : 3.1;
datalines;
B001,"The Silent Patient","Alex Michaelides","Thriller",15.99,336,2019,"Celadon Books",4.1
B002,"Educated","Tara Westover","Memoir",13.99,352,2018,"Random House",4.7
B003,"Becoming","Michelle Obama","Biography",17.99,448,2018,"Crown Publishing",4.9
B004,"Where the Crawdads Sing","Delia Owens","Fiction",14.99,384,2018,"G.P. Putnam's Sons",4.8
B005,"The Testaments","Margaret Atwood","Dystopian",16.99,432,2019,"Nan A. Talese",4.5
B006,"Normal People","Sally Rooney","Romance",13.50,273,2018,"Faber & Faber",4.2
B007,"The Midnight Library","Matt Haig","Fantasy",14.00,304,2020,"Canongate Books",4.3
B008,"Dune","Frank Herbert","Sci-Fi",18.00,412,1965,"Chilton Books",4.6
B009,"Sapiens","Yuval Noah Harari","History",19.99,498,2011,"Harvill Secker",4.7
B010,"The Alchemist","Paulo Coelho","Adventure",10.99,208,1988,"HarperTorch",4.0
B011,"Atomic Habits","James Clear","Self-Help",16.20,320,2018,"Avery",4.8
B012,"The Great Gatsby","F. Scott Fitzgerald","Classic",9.99,180,1925,"Scribner",4.4
;
run;
proc print;run;
Output:
| Obs | BookID | Title | Author | Genre | Price | Pages | PubYear | Publisher | Rating |
|---|---|---|---|---|---|---|---|---|---|
| 1 | B001 | The Silent Patient | Alex Michaelides | Thriller | 15.99 | 336 | 2019 | Celadon Books | 4.1 |
| 2 | B002 | Educated | Tara Westover | Memoir | 13.99 | 352 | 2018 | Random House | 4.7 |
| 3 | B003 | Becoming | Michelle Obama | Biography | 17.99 | 448 | 2018 | Crown Publishing | 4.9 |
| 4 | B004 | Where the Crawdads Sing | Delia Owens | Fiction | 14.99 | 384 | 2018 | G.P. Putnam's Sons | 4.8 |
| 5 | B005 | The Testaments | Margaret Atwood | Dystopian | 16.99 | 432 | 2019 | Nan A. Talese | 4.5 |
| 6 | B006 | Normal People | Sally Rooney | Romance | 13.50 | 273 | 2018 | Faber & Faber | 4.2 |
| 7 | B007 | The Midnight Library | Matt Haig | Fantasy | 14.00 | 304 | 2020 | Canongate Books | 4.3 |
| 8 | B008 | Dune | Frank Herbert | Sci-Fi | 18.00 | 412 | 1965 | Chilton Books | 4.6 |
| 9 | B009 | Sapiens | Yuval Noah Harari | History | 19.99 | 498 | 2011 | Harvill Secker | 4.7 |
| 10 | B010 | The Alchemist | Paulo Coelho | Adventure | 10.99 | 208 | 1988 | HarperTorch | 4.0 |
| 11 | B011 | Atomic Habits | James Clear | Self-Help | 16.20 | 320 | 2018 | Avery | 4.8 |
| 12 | B012 | The Great Gatsby | F. Scott Fitzgerald | Classic | 9.99 | 180 | 1925 | Scribner | 4.4 |
/*Exploring the Dataset*/
proc contents data=books;
title "Structure of Books Dataset";
run;
Output:
| Data Set Name | WORK.BOOKS | Observations | 12 |
|---|---|---|---|
| Member Type | DATA | Variables | 9 |
| Engine | V9 | Indexes | 0 |
| Created | 14/09/2015 00:04:41 | Observation Length | 168 |
| Last Modified | 14/09/2015 00:04:41 | Deleted Observations | 0 |
| Protection | Compressed | NO | |
| Data Set Type | Sorted | NO | |
| Label | |||
| Data Representation | WINDOWS_64 | ||
| Encoding | wlatin1 Western (Windows) |
| Engine/Host Dependent Information | |
|---|---|
| Data Set Page Size | 65536 |
| Number of Data Set Pages | 1 |
| First Data Page | 1 |
| Max Obs per Page | 389 |
| Obs in First Data Page | 12 |
| Number of Data Set Repairs | 0 |
| ExtendObsCounter | YES |
| Filename | C:\Users\Lenovo\AppData\Local\Temp\SAS Temporary Files\_TD7568_DESKTOP-QFAA4KV_\books.sas7bdat |
| Release Created | 9.0401M2 |
| Host Created | X64_8HOME |
| Alphabetic List of Variables and Attributes | |||
|---|---|---|---|
| # | Variable | Type | Len |
| 3 | Author | Char | 30 |
| 1 | BookID | Char | 5 |
| 4 | Genre | Char | 15 |
| 6 | Pages | Num | 8 |
| 5 | Price | Num | 8 |
| 7 | PubYear | Num | 8 |
| 8 | Publisher | Char | 30 |
| 9 | Rating | Num | 8 |
| 2 | Title | Char | 50 |
/*Summary Statistics*/
proc means data=books mean median min max std;
var Price Pages Rating;
title "Summary Statistics for Numeric Variables";
run;
Output:
| Summary Statistics for Numeric
Variables |
| Variable | Mean | Median | Minimum | Maximum | Std Dev | ||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
|
|
|
|
/*Frequency of Genres*/
proc freq data=books;
tables Genre;
title "Frequency Distribution of Genres";
run;
Output:
| Frequency Distribution of Genres |
| Genre | Frequency | Percent | Cumulative Frequency |
Cumulative Percent |
|---|---|---|---|---|
| Adventure | 1 | 8.33 | 1 | 8.33 |
| Biography | 1 | 8.33 | 2 | 16.67 |
| Classic | 1 | 8.33 | 3 | 25.00 |
| Dystopian | 1 | 8.33 | 4 | 33.33 |
| Fantasy | 1 | 8.33 | 5 | 41.67 |
| Fiction | 1 | 8.33 | 6 | 50.00 |
| History | 1 | 8.33 | 7 | 58.33 |
| Memoir | 1 | 8.33 | 8 | 66.67 |
| Romance | 1 | 8.33 | 9 | 75.00 |
| Sci-Fi | 1 | 8.33 | 10 | 83.33 |
| Self-Help | 1 | 8.33 | 11 | 91.67 |
| Thriller | 1 | 8.33 | 12 | 100.00 |
/*Histogram of Book Prices*/
proc sgplot data=books;
histogram Price;
density Price;
title "Histogram of Book Prices";
run;
Log:
NOTE: PROCEDURE SGPLOT used (Total process time):
real time 2.81 seconds
cpu time 0.54 seconds
NOTE: Listing image output written to SGPlot1.png.
NOTE: There were 12 observations read from the data set WORK.BOOKS.
/*Bar Chart of Average Rating by Genre*/
proc sgplot data=books;
vbar Genre / response=Rating stat=mean;
title "Average Rating by Genre";
run;
Log:
NOTE: PROCEDURE SGPLOT used (Total process time):
real time 0.57 seconds
cpu time 0.04 seconds
NOTE: Listing image output written to SGPlot3.png.
NOTE: There were 12 observations read from the data set WORK.BOOKS.
/*Top 5 Highest Rated Books*/
proc sql outobs=5;
title "Top 5 Highest Rated Books";
select Title, Author, Rating
from books
order by Rating desc
limit 5;
quit;
Output:
| Top 5 Highest Rated
Books |
| Title | Author | Rating |
|---|---|---|
| Becoming | Michelle Obama | 4.9 |
| Atomic Habits | James Clear | 4.8 |
| Where the Crawdads Sing | Delia Owens | 4.8 |
| Sapiens | Yuval Noah Harari | 4.7 |
| Educated | Tara Westover | 4.7 |
/*Average Price by Genre*/
proc sql;
title "Average Price by Genre";
select Genre, avg(Price) as AvgPrice format=6.2
from books
group by Genre;
quit;
Output:
| Average Price by Genre |
| Genre | AvgPrice |
|---|---|
| Adventure | 10.99 |
| Biography | 17.99 |
| Classic | 9.99 |
| Dystopian | 16.99 |
| Fantasy | 14.00 |
| Fiction | 14.99 |
| History | 19.99 |
| Memoir | 13.99 |
| Romance | 13.50 |
| Sci-Fi | 18.00 |
| Self-Help | 16.20 |
| Thriller | 15.99 |
/*Books Published After 2015*/
proc sql;
title "Books Published After 2015";
select Title, PubYear
from books
where PubYear > 2015;
quit;
Output:
| Books Published After
2015 |
| Title | PubYear |
|---|---|
| The Silent Patient | 2019 |
| Educated | 2018 |
| Becoming | 2018 |
| Where the Crawdads Sing | 2018 |
| The Testaments | 2019 |
| Normal People | 2018 |
| The Midnight Library | 2020 |
| Atomic Habits | 2018 |
/*Macro to Generate Report for a Specific Genre*/
%macro genre_report(genre_name);
proc sql;
title "Books in Genre: &genre_name";
select Title, Author, Price, Rating
from books
where Genre = "&genre_name";
quit;
%mend;
%genre_report(Fiction);
Output:
| Books in Genre: Fiction |
| Title | Author | Price | Rating |
|---|---|---|---|
| Where the Crawdads Sing | Delia Owens | 14.99 | 4.8 |
%genre_report(Thriller);
Output:
| Books in Genre: Thriller |
| Title | Author | Price | Rating |
|---|---|---|---|
| The Silent Patient | Alex Michaelides | 15.99 | 4.1 |
/*Macro to Identify Books Above a Certain Rating*/
%macro high_rating(threshold);
proc sql;
title "Books with Rating Above &threshold";
select Title, Rating
from books
where Rating > &threshold;
quit;
%mend;
%high_rating(4.5);
Output:
| Books with Rating Above 4.5 |
| Title | Rating |
|---|---|
| Educated | 4.7 |
| Becoming | 4.9 |
| Where the Crawdads Sing | 4.8 |
| Dune | 4.6 |
| Sapiens | 4.7 |
| Atomic Habits | 4.8 |
/*Creating Separate Datasets for Each Genre*/
proc sql noprint;
select distinct Genre into :genre1-:genre999
from books;
%let genre_count = &sqlobs;
quit;
%macro split_by_genre;
%do i = 1 %to &genre_count;
data genre_&&genre&i;
set books;
where Genre = "&&genre&i";
run;
proc print;run;
%end;
%mend;
%split_by_genre;
Output:
| Obs | BookID | Title | Author | Genre | Price | Pages | PubYear | Publisher | Rating |
|---|---|---|---|---|---|---|---|---|---|
| 1 | B010 | The Alchemist | Paulo Coelho | Adventure | 10.99 | 208 | 1988 | HarperTorch | 4 |
| Obs | BookID | Title | Author | Genre | Price | Pages | PubYear | Publisher | Rating |
|---|---|---|---|---|---|---|---|---|---|
| 1 | B003 | Becoming | Michelle Obama | Biography | 17.99 | 448 | 2018 | Crown Publishing | 4.9 |
| Obs | BookID | Title | Author | Genre | Price | Pages | PubYear | Publisher | Rating |
|---|---|---|---|---|---|---|---|---|---|
| 1 | B012 | The Great Gatsby | F. Scott Fitzgerald | Classic | 9.99 | 180 | 1925 | Scribner | 4.4 |
| Obs | BookID | Title | Author | Genre | Price | Pages | PubYear | Publisher | Rating |
|---|---|---|---|---|---|---|---|---|---|
| 1 | B005 | The Testaments | Margaret Atwood | Dystopian | 16.99 | 432 | 2019 | Nan A. Talese | 4.5 |
| Obs | BookID | Title | Author | Genre | Price | Pages | PubYear | Publisher | Rating |
|---|---|---|---|---|---|---|---|---|---|
| 1 | B007 | The Midnight Library | Matt Haig | Fantasy | 14 | 304 | 2020 | Canongate Books | 4.3 |
| Obs | BookID | Title | Author | Genre | Price | Pages | PubYear | Publisher | Rating |
|---|---|---|---|---|---|---|---|---|---|
| 1 | B004 | Where the Crawdads Sing | Delia Owens | Fiction | 14.99 | 384 | 2018 | G.P. Putnam's Sons | 4.8 |
| Obs | BookID | Title | Author | Genre | Price | Pages | PubYear | Publisher | Rating |
|---|---|---|---|---|---|---|---|---|---|
| 1 | B009 | Sapiens | Yuval Noah Harari | History | 19.99 | 498 | 2011 | Harvill Secker | 4.7 |
| Obs | BookID | Title | Author | Genre | Price | Pages | PubYear | Publisher | Rating |
|---|---|---|---|---|---|---|---|---|---|
| 1 | B002 | Educated | Tara Westover | Memoir | 13.99 | 352 | 2018 | Random House | 4.7 |
| Obs | BookID | Title | Author | Genre | Price | Pages | PubYear | Publisher | Rating |
|---|---|---|---|---|---|---|---|---|---|
| 1 | B006 | Normal People | Sally Rooney | Romance | 13.5 | 273 | 2018 | Faber & Faber | 4.2 |
| Obs | BookID | Title | Author | Genre | Price | Pages | PubYear | Publisher | Rating |
|---|---|---|---|---|---|---|---|---|---|
| 1 | B006 | Normal People | Sally Rooney | Romance | 13.5 | 273 | 2018 | Faber & Faber | 4.2 |
| Obs | BookID | Title | Author | Genre | Price | Pages | PubYear | Publisher | Rating |
|---|---|---|---|---|---|---|---|---|---|
| 1 | B006 | Normal People | Sally Rooney | Romance | 13.5 | 273 | 2018 | Faber & Faber | 4.2 |
| Obs | BookID | Title | Author | Genre | Price | Pages | PubYear | Publisher | Rating |
|---|---|---|---|---|---|---|---|---|---|
| 1 | B001 | The Silent Patient | Alex Michaelides | Thriller | 15.99 | 336 | 2019 | Celadon Books | 4.1 |
/*Creating a Sales Dataset*/
data sales;
input BookID $ UnitsSold;
datalines;
B001 5000
B002 6000
B003 8000
B004 7500
B005 6200
B006 4300
B007 5400
B008 7000
B009 6700
B010 5800
B011 7200
B012 6900
;
run;
proc print;run;
Output:
| Obs | BookID | UnitsSold |
|---|---|---|
| 1 | B001 | 5000 |
| 2 | B002 | 6000 |
| 3 | B003 | 8000 |
| 4 | B004 | 7500 |
| 5 | B005 | 6200 |
| 6 | B006 | 4300 |
| 7 | B007 | 5400 |
| 8 | B008 | 7000 |
| 9 | B009 | 6700 |
| 10 | B010 | 5800 |
| 11 | B011 | 7200 |
| 12 | B012 | 6900 |
/*Merging Datasets*/
proc sql;
create table books_sales as
select a.*, b.UnitsSold
from books as a
left join sales as b
on a.BookID = b.BookID;
quit;
proc print;run;
Output:
| Obs | BookID | Title | Author | Genre | Price | Pages | PubYear | Publisher | Rating | UnitsSold |
|---|---|---|---|---|---|---|---|---|---|---|
| 1 | B001 | The Silent Patient | Alex Michaelides | Thriller | 15.99 | 336 | 2019 | Celadon Books | 4.1 | 5000 |
| 2 | B002 | Educated | Tara Westover | Memoir | 13.99 | 352 | 2018 | Random House | 4.7 | 6000 |
| 3 | B003 | Becoming | Michelle Obama | Biography | 17.99 | 448 | 2018 | Crown Publishing | 4.9 | 8000 |
| 4 | B004 | Where the Crawdads Sing | Delia Owens | Fiction | 14.99 | 384 | 2018 | G.P. Putnam's Sons | 4.8 | 7500 |
| 5 | B005 | The Testaments | Margaret Atwood | Dystopian | 16.99 | 432 | 2019 | Nan A. Talese | 4.5 | 6200 |
| 6 | B006 | Normal People | Sally Rooney | Romance | 13.50 | 273 | 2018 | Faber & Faber | 4.2 | 4300 |
| 7 | B007 | The Midnight Library | Matt Haig | Fantasy | 14.00 | 304 | 2020 | Canongate Books | 4.3 | 5400 |
| 8 | B008 | Dune | Frank Herbert | Sci-Fi | 18.00 | 412 | 1965 | Chilton Books | 4.6 | 7000 |
| 9 | B009 | Sapiens | Yuval Noah Harari | History | 19.99 | 498 | 2011 | Harvill Secker | 4.7 | 6700 |
| 10 | B010 | The Alchemist | Paulo Coelho | Adventure | 10.99 | 208 | 1988 | HarperTorch | 4.0 | 5800 |
| 11 | B011 | Atomic Habits | James Clear | Self-Help | 16.20 | 320 | 2018 | Avery | 4.8 | 7200 |
| 12 | B012 | The Great Gatsby | F. Scott Fitzgerald | Classic | 9.99 | 180 | 1925 | Scribner | 4.4 | 6900 |
/*Total Revenue per Book*/
data books_sales;
set books_sales;
Revenue = Price * UnitsSold;
run;
proc print data=books_sales;
var Title UnitsSold Revenue;
title "Revenue per Book";
run;
Output:
| Revenue per Book |
| Obs | Title | UnitsSold | Revenue |
|---|---|---|---|
| 1 | The Silent Patient | 5000 | 79950 |
| 2 | Educated | 6000 | 83940 |
| 3 | Becoming | 8000 | 143920 |
| 4 | Where the Crawdads Sing | 7500 | 112425 |
| 5 | The Testaments | 6200 | 105338 |
| 6 | Normal People | 4300 | 58050 |
| 7 | The Midnight Library | 5400 | 75600 |
| 8 | Dune | 7000 | 126000 |
| 9 | Sapiens | 6700 | 133933 |
| 10 | The Alchemist | 5800 | 63742 |
| 11 | Atomic Habits | 7200 | 116640 |
| 12 | The Great Gatsby | 6900 | 68931 |
/*Top 3 Bestselling Books*/
proc sort data=books_sales out=sorted_sales;
by descending UnitsSold;
run;
proc print data=sorted_sales(obs=3);
var Title UnitsSold;
title "Top 3 Bestselling Books";
run;
Output:
| Top 3 Bestselling Books |
| Obs | Title | UnitsSold |
|---|---|---|
| 1 | Becoming | 8000 |
| 2 | Where the Crawdads Sing | 7500 |
| 3 | Atomic Habits | 7200 |
/*Exporting to CSV*/
proc export data=books_sales
outfile="books_sales.csv"
dbms=csv
replace;
run;
/*Exporting to Excel*/
proc export data=books_sales
outfile="books_sales.xlsx"
dbms=xlsx
replace;
run;
Find A Mistake and Comment It:
%macro high_rating(threshold);
proc sql;
title "Books with Rating Above &threshold";
select Title, Rating
from books
where Rating > threshold;
quit;
%mend;
%high_rating(4.5);
No comments:
Post a Comment