181.SAS PROJECT 11-20 QUESTIONS: DATASTEP | PROC SQL | PROC PRINT | PROC FORMAT | PROC MEANS | PROC REPORT | MACRO | COMPLETE RETAIL SALES ANALYSIS
- Get link
- X
- Other Apps
SAS PROJECT 11-20 QUESTIONS: DATASTEP | PROC SQL | PROC PRINT | PROC FORMAT | PROC MEANS | PROC REPORT | MACRO | COMPLETE RETAIL SALES ANALYSIS
data sample;
input SalesID Name$ Region$ Product:$10. UnitsSold UnitPrice
SaleDate:date9. Gender$ JoiningDate:date9.;
format SaleDate joiningDate Date9.;
cards;
201 Ajay North Laptop 20 50000 05JAN2025 M 10JAN2020
202 Neha East Smartphone 50 15000 10JAN2025 F 15JUL2019
203 Ravi South Tablet 10 25000 15JAN2025 M 01JAN2018
204 Ayesha West Laptop 30 48000 20JAN2025 F 12DEC2020
205 Raj North Smartphone 70 14000 22JAN2025 M 20FEB2019
206 Naresh South Tablet 20 30000 09JAN2025 F 08FEB2021
;
run;
proc print;run;
Output:
Obs | SalesID | Name | Region | Product | UnitsSold | UnitPrice | SaleDate | Gender | JoiningDate |
---|---|---|---|---|---|---|---|---|---|
1 | 201 | Ajay | North | Laptop | 20 | 50000 | 05JAN2025 | M | 10JAN2020 |
2 | 202 | Neha | East | Smartphone | 50 | 15000 | 10JAN2025 | F | 15JUL2019 |
3 | 203 | Ravi | South | Tablet | 10 | 25000 | 15JAN2025 | M | 01JAN2018 |
4 | 204 | Ayesha | West | Laptop | 30 | 48000 | 20JAN2025 | F | 12DEC2020 |
5 | 205 | Raj | North | Smartphone | 70 | 14000 | 22JAN2025 | M | 20FEB2019 |
6 | 206 | Naresh | South | Tablet | 20 | 30000 | 09JAN2025 | F | 08FEB2021 |
/*Q11. Use PROC TRANSPOSE to transpose the total sales of each product by region.*/
proc transpose data=sales out=transale;
by region notsorted;
var totalsale;
run;
proc print;run;
Output:
Obs | Region | _NAME_ | COL1 |
---|---|---|---|
1 | North | TotalSale | Medium |
2 | East | TotalSale | Medium |
3 | South | TotalSale | Low |
4 | West | TotalSale | High |
5 | North | TotalSale | Medium |
6 | South | TotalSale | Medium |
*/alternate solution:*/
proc transpose data=sale out=transaleI;
by product notsorted;
var totalsale;
id region;
run;
proc print;run;
Output:
Obs | Product | _NAME_ | North | East | South | West |
---|---|---|---|---|---|---|
1 | Laptop | TotalSale | Medium | . | . | . |
2 | Smartphone | TotalSale | . | Medium | . | . |
3 | Tablet | TotalSale | . | . | Low | . |
4 | Laptop | TotalSale | . | . | . | High |
5 | Smartphone | TotalSale | Medium | . | . | . |
6 | Tablet | TotalSale | . | . | Medium | . |
/*Q12. Write a macro that adds a Commission column based on this logic:*/
/*If TotalSale > 1,000,000, commission = 10%*/
/*Else if TotalSale > 500,000, commission = 5%*/
/*Else commission = 2%*/
%macro comm(data,set);
data &data;
set &set;
if totalsale > 1000000 then commission = totalsale*0.10;
else if totalsale > 500000 then commission = totalsale*0.05;
else commission = totalsale*0.02;
run;
proc print;run;
%mend;
%comm(newcom,sales)
Output:
Obs | SalesID | Name | Region | Product | UnitsSold | UnitPrice | SaleDate | Gender | JoiningDate | TotalSale | commission |
---|---|---|---|---|---|---|---|---|---|---|---|
1 | 201 | Ajay | North | Laptop | 20 | 50000 | 05JAN2025 | M | 10JAN2020 | Medium | 50000 |
2 | 202 | Neha | East | Smartphone | 50 | 15000 | 10JAN2025 | F | 15JUL2019 | Medium | 37500 |
3 | 203 | Ravi | South | Tablet | 10 | 25000 | 15JAN2025 | M | 01JAN2018 | Low | 5000 |
4 | 204 | Ayesha | West | Laptop | 30 | 48000 | 20JAN2025 | F | 12DEC2020 | High | 144000 |
5 | 205 | Raj | North | Smartphone | 70 | 14000 | 22JAN2025 | M | 20FEB2019 | Medium | 49000 |
6 | 206 | Naresh | South | Tablet | 20 | 30000 | 09JAN2025 | F | 08FEB2021 | Medium | 30000 |
/*Q13. Identify duplicate entries based on SalesID using PROC SORT and NODUPKEY.*/
proc sort data=sample out=sortdata nodupkey;
by salesid;
run;
proc print;run;
Output:
Obs | SalesID | Name | Region | Product | UnitsSold | UnitPrice | SaleDate | Gender | JoiningDate |
---|---|---|---|---|---|---|---|---|---|
1 | 201 | Ajay | North | Laptop | 20 | 50000 | 05JAN2025 | M | 10JAN2020 |
2 | 202 | Neha | East | Smartphone | 50 | 15000 | 10JAN2025 | F | 15JUL2019 |
3 | 203 | Ravi | South | Tablet | 10 | 25000 | 15JAN2025 | M | 01JAN2018 |
4 | 204 | Ayesha | West | Laptop | 30 | 48000 | 20JAN2025 | F | 12DEC2020 |
5 | 205 | Raj | North | Smartphone | 70 | 14000 | 22JAN2025 | M | 20FEB2019 |
6 | 206 | Naresh | South | Tablet | 20 | 30000 | 09JAN2025 | F | 08FEB2021 |
/*Q14. Use PROC SGPLOT to create a vertical bar chart for average units sold by product.*/
proc sql;
create table avg as
select product , mean(unitssold) as avgunitsold
from sample
group by product;
quit;
proc print;run;
Output:
Obs | Product | avgunitsold |
---|---|---|
1 | Laptop | 25 |
2 | Smartphone | 60 |
3 | Tablet | 15 |
proc sgplot data=avg;
vbar product/response=avgunitsold datalabel;
yaxis label="Average Units Sold";
xaxis label="Product";
run;
Log:
NOTE: PROCEDURE SGPLOT used (Total process time):
real time 2.34 seconds
cpu time 0.54 seconds
NOTE: Listing image output written to SGPlot1.png.
NOTE: There were 3 observations read from the data set WORK.AVG.
/*Q15. Using PROC SQL, get the salesperson who sold the maximum units in a single sale.*/
proc sql ;
select salesid,name,unitssold
from sample
where unitssold = (select max(unitssold)
from sample);
quit;
Output:
SalesID | Name | UnitsSold |
---|---|---|
205 | Raj | 70 |
/*Q16. Create a new variable Experience as years between JoiningDate and today, rounded to 2 decimals.*/
data exp;
set sample;
experience=intck('year',joiningdate,'15may2025'd);
run;
proc print;run;
Output:
Obs | SalesID | Name | Region | Product | UnitsSold | UnitPrice | SaleDate | Gender | JoiningDate | experience |
---|---|---|---|---|---|---|---|---|---|---|
1 | 201 | Ajay | North | Laptop | 20 | 50000 | 05JAN2025 | M | 10JAN2020 | 5 |
2 | 202 | Neha | East | Smartphone | 50 | 15000 | 10JAN2025 | F | 15JUL2019 | 6 |
3 | 203 | Ravi | South | Tablet | 10 | 25000 | 15JAN2025 | M | 01JAN2018 | 7 |
4 | 204 | Ayesha | West | Laptop | 30 | 48000 | 20JAN2025 | F | 12DEC2020 | 5 |
5 | 205 | Raj | North | Smartphone | 70 | 14000 | 22JAN2025 | M | 20FEB2019 | 6 |
6 | 206 | Naresh | South | Tablet | 20 | 30000 | 09JAN2025 | F | 08FEB2021 | 4 |
*/alternate solution:*/
data expI;
set sample;
experience=round(("15may2025"d-joiningdate)/365.25,0.01);
run;
proc print;run;
Output:
Obs | SalesID | Name | Region | Product | UnitsSold | UnitPrice | SaleDate | Gender | JoiningDate | experience |
---|---|---|---|---|---|---|---|---|---|---|
1 | 201 | Ajay | North | Laptop | 20 | 50000 | 05JAN2025 | M | 10JAN2020 | 5.34 |
2 | 202 | Neha | East | Smartphone | 50 | 15000 | 10JAN2025 | F | 15JUL2019 | 5.83 |
3 | 203 | Ravi | South | Tablet | 10 | 25000 | 15JAN2025 | M | 01JAN2018 | 7.37 |
4 | 204 | Ayesha | West | Laptop | 30 | 48000 | 20JAN2025 | F | 12DEC2020 | 4.42 |
5 | 205 | Raj | North | Smartphone | 70 | 14000 | 22JAN2025 | M | 20FEB2019 | 6.23 |
6 | 206 | Naresh | South | Tablet | 20 | 30000 | 09JAN2025 | F | 08FEB2021 | 4.26 |
/*Q17. Use PROC FREQ to calculate % of salespeople by region.*/
proc freq data=sample;
table region / out=percent ;
run;
proc print data=percent;
var region count percent;
run;
Output:
Region | Frequency | Percent | Cumulative Frequency |
Cumulative Percent |
---|---|---|---|---|
East | 1 | 16.67 | 1 | 16.67 |
North | 2 | 33.33 | 3 | 50.00 |
South | 2 | 33.33 | 5 | 83.33 |
West | 1 | 16.67 | 6 | 100.00 |
/*Q18. Generate a summary table showing average sale value per month using SALEDATE.*/
data monthsale;
set sample;
total=unitssold*unitprice;
format salemonth monyy7.;
salemonth=intnx('month',saledate,0,'b');
run;
proc print;run;
Output:
Obs | SalesID | Name | Region | Product | UnitsSold | UnitPrice | SaleDate | Gender | JoiningDate | total | salemonth |
---|---|---|---|---|---|---|---|---|---|---|---|
1 | 201 | Ajay | North | Laptop | 20 | 50000 | 05JAN2025 | M | 10JAN2020 | 1000000 | JAN2025 |
2 | 202 | Neha | East | Smartphone | 50 | 15000 | 10JAN2025 | F | 15JUL2019 | 750000 | JAN2025 |
3 | 203 | Ravi | South | Tablet | 10 | 25000 | 15JAN2025 | M | 01JAN2018 | 250000 | JAN2025 |
4 | 204 | Ayesha | West | Laptop | 30 | 48000 | 20JAN2025 | F | 12DEC2020 | 1440000 | JAN2025 |
5 | 205 | Raj | North | Smartphone | 70 | 14000 | 22JAN2025 | M | 20FEB2019 | 980000 | JAN2025 |
6 | 206 | Naresh | South | Tablet | 20 | 30000 | 09JAN2025 | F | 08FEB2021 | 600000 | JAN2025 |
proc sql;
create table new1 as
select salemonth format=monyy7.,
mean(total) as avgtotal format=comma12.2
from monthsale
group by salemonth
order by salemonth;
quit;
proc print;run;
Output:
Obs | salemonth | avgtotal |
---|---|---|
1 | JAN2025 | 836,666.67 |
/*Q19. Create a gender-wise and product-wise cross-tab using PROC FREQ.*/
proc freq data=sample;
table gender*product/norow nocol nopercent;
run;
Output:
|
|
*/alternate solution:*/
proc freq data=sample;
table gender*product/chisq;
run;
Output:
|
|
Statistics for Table of Gender by Product |
Statistic | DF | Value | Prob |
---|---|---|---|
Chi-Square | 2 | 0.0000 | 1.0000 |
Likelihood Ratio Chi-Square | 2 | 0.0000 | 1.0000 |
Mantel-Haenszel Chi-Square | 1 | 0.0000 | 1.0000 |
Phi Coefficient | 0.0000 | ||
Contingency Coefficient | 0.0000 | ||
Cramer's V | 0.0000 | ||
WARNING: 100% of the cells have expected counts
less than 5. Chi-Square may not be a valid test. |
Sample Size = 6 |
/*Q20. Create a listing of employees who sold a Laptop with UnitPrice > 45,000 AND joined before 2021.*/
data list;
set sample;
year = year(joiningdate);
where product = 'Laptop' and unitprice > 45000 and joiningdate > 2021;
run;
proc print;run;
Output:
Obs | SalesID | Name | Region | Product | UnitsSold | UnitPrice | SaleDate | Gender | JoiningDate | year |
---|---|---|---|---|---|---|---|---|---|---|
1 | 201 | Ajay | North | Laptop | 20 | 50000 | 05JAN2025 | M | 10JAN2020 | 2020 |
2 | 204 | Ayesha | West | Laptop | 30 | 48000 | 20JAN2025 | F | 12DEC2020 | 2020 |
*/alternate solution:*/
proc print data=sample noobs;
where Product = 'Laptop'
and UnitPrice > 45000
and JoiningDate < '01JAN2021'd;
var Name Product UnitPrice JoiningDate;
format JoiningDate date9.;
title "Employees who sold Laptop > 45,000 and joined before 2021";
run;
Output:
Employees who sold Laptop > 45,000 and joined
before 2021 |
Name | Product | UnitPrice | JoiningDate |
---|---|---|---|
Ajay | Laptop | 50000 | 10JAN2020 |
Ayesha | Laptop | 48000 | 12DEC2020 |
- Get link
- X
- Other Apps
Comments
Post a Comment