SAS PROJECT 11-20 QUESTIONS: DATASTEP | PROC SQL | PROC PRINT | PROC FORMAT | PROC MEANS | PROC REPORT | MACRO | COMPLETE RETAIL SALES ANALYSIS
data sample;
input SalesID Name$ Region$ Product:$10. UnitsSold UnitPrice
SaleDate:date9. Gender$ JoiningDate:date9.;
format SaleDate joiningDate Date9.;
cards;
201 Ajay North Laptop 20 50000 05JAN2025 M 10JAN2020
202 Neha East Smartphone 50 15000 10JAN2025 F 15JUL2019
203 Ravi South Tablet 10 25000 15JAN2025 M 01JAN2018
204 Ayesha West Laptop 30 48000 20JAN2025 F 12DEC2020
205 Raj North Smartphone 70 14000 22JAN2025 M 20FEB2019
206 Naresh South Tablet 20 30000 09JAN2025 F 08FEB2021
;
run;
proc print;run;
Output:
| Obs | SalesID | Name | Region | Product | UnitsSold | UnitPrice | SaleDate | Gender | JoiningDate |
|---|---|---|---|---|---|---|---|---|---|
| 1 | 201 | Ajay | North | Laptop | 20 | 50000 | 05JAN2025 | M | 10JAN2020 |
| 2 | 202 | Neha | East | Smartphone | 50 | 15000 | 10JAN2025 | F | 15JUL2019 |
| 3 | 203 | Ravi | South | Tablet | 10 | 25000 | 15JAN2025 | M | 01JAN2018 |
| 4 | 204 | Ayesha | West | Laptop | 30 | 48000 | 20JAN2025 | F | 12DEC2020 |
| 5 | 205 | Raj | North | Smartphone | 70 | 14000 | 22JAN2025 | M | 20FEB2019 |
| 6 | 206 | Naresh | South | Tablet | 20 | 30000 | 09JAN2025 | F | 08FEB2021 |
/*Q11. Use PROC TRANSPOSE to transpose the total sales of each product by region.*/
proc transpose data=sales out=transale;
by region notsorted;
var totalsale;
run;
proc print;run;
Output:
| Obs | Region | _NAME_ | COL1 |
|---|---|---|---|
| 1 | North | TotalSale | Medium |
| 2 | East | TotalSale | Medium |
| 3 | South | TotalSale | Low |
| 4 | West | TotalSale | High |
| 5 | North | TotalSale | Medium |
| 6 | South | TotalSale | Medium |
*/alternate solution:*/
proc transpose data=sale out=transaleI;
by product notsorted;
var totalsale;
id region;
run;
proc print;run;
Output:
| Obs | Product | _NAME_ | North | East | South | West |
|---|---|---|---|---|---|---|
| 1 | Laptop | TotalSale | Medium | . | . | . |
| 2 | Smartphone | TotalSale | . | Medium | . | . |
| 3 | Tablet | TotalSale | . | . | Low | . |
| 4 | Laptop | TotalSale | . | . | . | High |
| 5 | Smartphone | TotalSale | Medium | . | . | . |
| 6 | Tablet | TotalSale | . | . | Medium | . |
/*Q12. Write a macro that adds a Commission column based on this logic:*/
/*If TotalSale > 1,000,000, commission = 10%*/
/*Else if TotalSale > 500,000, commission = 5%*/
/*Else commission = 2%*/
%macro comm(data,set);
data &data;
set &set;
if totalsale > 1000000 then commission = totalsale*0.10;
else if totalsale > 500000 then commission = totalsale*0.05;
else commission = totalsale*0.02;
run;
proc print;run;
%mend;
%comm(newcom,sales)
Output:
| Obs | SalesID | Name | Region | Product | UnitsSold | UnitPrice | SaleDate | Gender | JoiningDate | TotalSale | commission |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 201 | Ajay | North | Laptop | 20 | 50000 | 05JAN2025 | M | 10JAN2020 | Medium | 50000 |
| 2 | 202 | Neha | East | Smartphone | 50 | 15000 | 10JAN2025 | F | 15JUL2019 | Medium | 37500 |
| 3 | 203 | Ravi | South | Tablet | 10 | 25000 | 15JAN2025 | M | 01JAN2018 | Low | 5000 |
| 4 | 204 | Ayesha | West | Laptop | 30 | 48000 | 20JAN2025 | F | 12DEC2020 | High | 144000 |
| 5 | 205 | Raj | North | Smartphone | 70 | 14000 | 22JAN2025 | M | 20FEB2019 | Medium | 49000 |
| 6 | 206 | Naresh | South | Tablet | 20 | 30000 | 09JAN2025 | F | 08FEB2021 | Medium | 30000 |
/*Q13. Identify duplicate entries based on SalesID using PROC SORT and NODUPKEY.*/
proc sort data=sample out=sortdata nodupkey;
by salesid;
run;
proc print;run;
Output:
| Obs | SalesID | Name | Region | Product | UnitsSold | UnitPrice | SaleDate | Gender | JoiningDate |
|---|---|---|---|---|---|---|---|---|---|
| 1 | 201 | Ajay | North | Laptop | 20 | 50000 | 05JAN2025 | M | 10JAN2020 |
| 2 | 202 | Neha | East | Smartphone | 50 | 15000 | 10JAN2025 | F | 15JUL2019 |
| 3 | 203 | Ravi | South | Tablet | 10 | 25000 | 15JAN2025 | M | 01JAN2018 |
| 4 | 204 | Ayesha | West | Laptop | 30 | 48000 | 20JAN2025 | F | 12DEC2020 |
| 5 | 205 | Raj | North | Smartphone | 70 | 14000 | 22JAN2025 | M | 20FEB2019 |
| 6 | 206 | Naresh | South | Tablet | 20 | 30000 | 09JAN2025 | F | 08FEB2021 |
/*Q14. Use PROC SGPLOT to create a vertical bar chart for average units sold by product.*/
proc sql;
create table avg as
select product , mean(unitssold) as avgunitsold
from sample
group by product;
quit;
proc print;run;
Output:
| Obs | Product | avgunitsold |
|---|---|---|
| 1 | Laptop | 25 |
| 2 | Smartphone | 60 |
| 3 | Tablet | 15 |
proc sgplot data=avg;
vbar product/response=avgunitsold datalabel;
yaxis label="Average Units Sold";
xaxis label="Product";
run;
Log:
NOTE: PROCEDURE SGPLOT used (Total process time):
real time 2.34 seconds
cpu time 0.54 seconds
NOTE: Listing image output written to SGPlot1.png.
NOTE: There were 3 observations read from the data set WORK.AVG.
/*Q15. Using PROC SQL, get the salesperson who sold the maximum units in a single sale.*/
proc sql ;
select salesid,name,unitssold
from sample
where unitssold = (select max(unitssold)
from sample);
quit;
Output:
| SalesID | Name | UnitsSold |
|---|---|---|
| 205 | Raj | 70 |
/*Q16. Create a new variable Experience as years between JoiningDate and today, rounded to 2 decimals.*/
data exp;
set sample;
experience=intck('year',joiningdate,'15may2025'd);
run;
proc print;run;
Output:
| Obs | SalesID | Name | Region | Product | UnitsSold | UnitPrice | SaleDate | Gender | JoiningDate | experience |
|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 201 | Ajay | North | Laptop | 20 | 50000 | 05JAN2025 | M | 10JAN2020 | 5 |
| 2 | 202 | Neha | East | Smartphone | 50 | 15000 | 10JAN2025 | F | 15JUL2019 | 6 |
| 3 | 203 | Ravi | South | Tablet | 10 | 25000 | 15JAN2025 | M | 01JAN2018 | 7 |
| 4 | 204 | Ayesha | West | Laptop | 30 | 48000 | 20JAN2025 | F | 12DEC2020 | 5 |
| 5 | 205 | Raj | North | Smartphone | 70 | 14000 | 22JAN2025 | M | 20FEB2019 | 6 |
| 6 | 206 | Naresh | South | Tablet | 20 | 30000 | 09JAN2025 | F | 08FEB2021 | 4 |
*/alternate solution:*/
data expI;
set sample;
experience=round(("15may2025"d-joiningdate)/365.25,0.01);
run;
proc print;run;
Output:
| Obs | SalesID | Name | Region | Product | UnitsSold | UnitPrice | SaleDate | Gender | JoiningDate | experience |
|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 201 | Ajay | North | Laptop | 20 | 50000 | 05JAN2025 | M | 10JAN2020 | 5.34 |
| 2 | 202 | Neha | East | Smartphone | 50 | 15000 | 10JAN2025 | F | 15JUL2019 | 5.83 |
| 3 | 203 | Ravi | South | Tablet | 10 | 25000 | 15JAN2025 | M | 01JAN2018 | 7.37 |
| 4 | 204 | Ayesha | West | Laptop | 30 | 48000 | 20JAN2025 | F | 12DEC2020 | 4.42 |
| 5 | 205 | Raj | North | Smartphone | 70 | 14000 | 22JAN2025 | M | 20FEB2019 | 6.23 |
| 6 | 206 | Naresh | South | Tablet | 20 | 30000 | 09JAN2025 | F | 08FEB2021 | 4.26 |
/*Q17. Use PROC FREQ to calculate % of salespeople by region.*/
proc freq data=sample;
table region / out=percent ;
run;
proc print data=percent;
var region count percent;
run;
Output:
| Region | Frequency | Percent | Cumulative Frequency |
Cumulative Percent |
|---|---|---|---|---|
| East | 1 | 16.67 | 1 | 16.67 |
| North | 2 | 33.33 | 3 | 50.00 |
| South | 2 | 33.33 | 5 | 83.33 |
| West | 1 | 16.67 | 6 | 100.00 |
/*Q18. Generate a summary table showing average sale value per month using SALEDATE.*/
data monthsale;
set sample;
total=unitssold*unitprice;
format salemonth monyy7.;
salemonth=intnx('month',saledate,0,'b');
run;
proc print;run;
Output:
| Obs | SalesID | Name | Region | Product | UnitsSold | UnitPrice | SaleDate | Gender | JoiningDate | total | salemonth |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 201 | Ajay | North | Laptop | 20 | 50000 | 05JAN2025 | M | 10JAN2020 | 1000000 | JAN2025 |
| 2 | 202 | Neha | East | Smartphone | 50 | 15000 | 10JAN2025 | F | 15JUL2019 | 750000 | JAN2025 |
| 3 | 203 | Ravi | South | Tablet | 10 | 25000 | 15JAN2025 | M | 01JAN2018 | 250000 | JAN2025 |
| 4 | 204 | Ayesha | West | Laptop | 30 | 48000 | 20JAN2025 | F | 12DEC2020 | 1440000 | JAN2025 |
| 5 | 205 | Raj | North | Smartphone | 70 | 14000 | 22JAN2025 | M | 20FEB2019 | 980000 | JAN2025 |
| 6 | 206 | Naresh | South | Tablet | 20 | 30000 | 09JAN2025 | F | 08FEB2021 | 600000 | JAN2025 |
proc sql;
create table new1 as
select salemonth format=monyy7.,
mean(total) as avgtotal format=comma12.2
from monthsale
group by salemonth
order by salemonth;
quit;
proc print;run;
Output:
| Obs | salemonth | avgtotal |
|---|---|---|
| 1 | JAN2025 | 836,666.67 |
/*Q19. Create a gender-wise and product-wise cross-tab using PROC FREQ.*/
proc freq data=sample;
table gender*product/norow nocol nopercent;
run;
Output:
|
| |||||||||||||||||||||||||||||||||||||||||||
*/alternate solution:*/
proc freq data=sample;
table gender*product/chisq;
run;
Output:
|
| ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| Statistics for Table of Gender by Product |
| Statistic | DF | Value | Prob |
|---|---|---|---|
| Chi-Square | 2 | 0.0000 | 1.0000 |
| Likelihood Ratio Chi-Square | 2 | 0.0000 | 1.0000 |
| Mantel-Haenszel Chi-Square | 1 | 0.0000 | 1.0000 |
| Phi Coefficient | 0.0000 | ||
| Contingency Coefficient | 0.0000 | ||
| Cramer's V | 0.0000 | ||
| WARNING: 100% of the cells have expected counts
less than 5. Chi-Square may not be a valid test. | |||
| Sample Size = 6 |
/*Q20. Create a listing of employees who sold a Laptop with UnitPrice > 45,000 AND joined before 2021.*/
data list;
set sample;
year = year(joiningdate);
where product = 'Laptop' and unitprice > 45000 and joiningdate > 2021;
run;
proc print;run;
Output:
| Obs | SalesID | Name | Region | Product | UnitsSold | UnitPrice | SaleDate | Gender | JoiningDate | year |
|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 201 | Ajay | North | Laptop | 20 | 50000 | 05JAN2025 | M | 10JAN2020 | 2020 |
| 2 | 204 | Ayesha | West | Laptop | 30 | 48000 | 20JAN2025 | F | 12DEC2020 | 2020 |
*/alternate solution:*/
proc print data=sample noobs;
where Product = 'Laptop'
and UnitPrice > 45000
and JoiningDate < '01JAN2021'd;
var Name Product UnitPrice JoiningDate;
format JoiningDate date9.;
title "Employees who sold Laptop > 45,000 and joined before 2021";
run;
Output:
| Employees who sold Laptop > 45,000 and joined
before 2021 |
| Name | Product | UnitPrice | JoiningDate |
|---|---|---|---|
| Ajay | Laptop | 50000 | 10JAN2020 |
| Ayesha | Laptop | 48000 | 12DEC2020 |
No comments:
Post a Comment