180.SAS PROJECT 1-10 QUESTIONS: DATASTEP | PROC SQL | PROC PRINT | PROC FORMAT | PROC MEANS | PROC REPORT | MACRO | COMPLETE RETAIL SALES ANALYSIS
- Get link
- X
- Other Apps
SAS PROJECT 1-10 QUESTIONS: DATASTEP | PROC SQL | PROC PRINT | PROC FORMAT | PROC MEANS | PROC REPORT | MACRO | COMPLETE RETAIL SALES ANALYSIS
data sample;
input SalesID Name$ Region$ Product:$10. UnitsSold UnitPrice
SaleDate:date9. Gender$ JoiningDate:date9.;
format SaleDate joiningDate Date9.;
cards;
201 Ajay North Laptop 20 50000 05JAN2025 M 10JAN2020
202 Neha East Smartphone 50 15000 10JAN2025 F 15JUL2019
203 Ravi South Tablet 10 25000 15JAN2025 M 01JAN2018
204 Ayesha West Laptop 30 48000 20JAN2025 F 12DEC2020
205 Raj North Smartphone 70 14000 22JAN2025 M 20FEB2019
206 Naresh South Tablet 20 30000 09JAN2025 F 08FEB2021
;
run;
proc print;run;
Output:
Obs | SalesID | Name | Region | Product | UnitsSold | UnitPrice | SaleDate | Gender | JoiningDate |
---|---|---|---|---|---|---|---|---|---|
1 | 201 | Ajay | North | Laptop | 20 | 50000 | 05JAN2025 | M | 10JAN2020 |
2 | 202 | Neha | East | Smartphone | 50 | 15000 | 10JAN2025 | F | 15JUL2019 |
3 | 203 | Ravi | South | Tablet | 10 | 25000 | 15JAN2025 | M | 01JAN2018 |
4 | 204 | Ayesha | West | Laptop | 30 | 48000 | 20JAN2025 | F | 12DEC2020 |
5 | 205 | Raj | North | Smartphone | 70 | 14000 | 22JAN2025 | M | 20FEB2019 |
6 | 206 | Naresh | South | Tablet | 20 | 30000 | 09JAN2025 | F | 08FEB2021 |
/*Q1. Create a new variable TotalSale = UnitsSold * UnitPrice using a DATA step*/
data Sale;
set sample;
TotalSale = UnitsSold * UnitPrice;
run;
proc print;run;
Output:
Obs | SalesID | Name | Region | Product | UnitsSold | UnitPrice | SaleDate | Gender | JoiningDate | TotalSale |
---|---|---|---|---|---|---|---|---|---|---|
1 | 201 | Ajay | North | Laptop | 20 | 50000 | 05JAN2025 | M | 10JAN2020 | 1000000 |
2 | 202 | Neha | East | Smartphone | 50 | 15000 | 10JAN2025 | F | 15JUL2019 | 750000 |
3 | 203 | Ravi | South | Tablet | 10 | 25000 | 15JAN2025 | M | 01JAN2018 | 250000 |
4 | 204 | Ayesha | West | Laptop | 30 | 48000 | 20JAN2025 | F | 12DEC2020 | 1440000 |
5 | 205 | Raj | North | Smartphone | 70 | 14000 | 22JAN2025 | M | 20FEB2019 | 980000 |
6 | 206 | Naresh | South | Tablet | 20 | 30000 | 09JAN2025 | F | 08FEB2021 | 600000 |
/*Q2. Using PROC SQL, calculate the total revenue by each Region.*/
proc sql;
select region,sum(UnitPrice) as TotalRevenue
from sample
group by region;
quit;
Output:
Region | TotalRevenue |
---|---|
East | 15000 |
North | 64000 |
South | 55000 |
West | 48000 |
proc sql;
select region,sum(UnitsSold*UnitPrice) as TotalRevenue
from sample
group by region;
quit;
Output:
Region | TotalRevenue |
---|---|
East | 750000 |
North | 1980000 |
South | 850000 |
West | 1440000 |
/*Q3. Find the top 3 salespersons based on total revenue (TotalSale).*/
proc sql outobs=3;
select name,sum(UnitsSold*UnitPrice) as TotalRevenue
from sample
group by region
order by Name desc;
quit;
Output:
Name | TotalRevenue |
---|---|
Ravi | 850000 |
Raj | 1980000 |
Neha | 750000 |
/*Q4. Add a variable SalesLevel:*/
/*<500000 → 'Low'*/
/*500000–1000000 → 'Medium'*/
/*>1000000 → 'High'*/
proc format;
value sales Low - < 500000 = "Low"
500000 - 1000000 = "Medium"
1000000<- High = "High";
run;
data sales;
set sample;
TotalSale = UnitsSold * UnitPrice;
Format TotalSale sales.;
run;
proc print;run;
Output:
Obs | SalesID | Name | Region | Product | UnitsSold | UnitPrice | SaleDate | Gender | JoiningDate | TotalSale |
---|---|---|---|---|---|---|---|---|---|---|
1 | 201 | Ajay | North | Laptop | 20 | 50000 | 05JAN2025 | M | 10JAN2020 | Medium |
2 | 202 | Neha | East | Smartphone | 50 | 15000 | 10JAN2025 | F | 15JUL2019 | Medium |
3 | 203 | Ravi | South | Tablet | 10 | 25000 | 15JAN2025 | M | 01JAN2018 | Low |
4 | 204 | Ayesha | West | Laptop | 30 | 48000 | 20JAN2025 | F | 12DEC2020 | High |
5 | 205 | Raj | North | Smartphone | 70 | 14000 | 22JAN2025 | M | 20FEB2019 | Medium |
6 | 206 | Naresh | South | Tablet | 20 | 30000 | 09JAN2025 | F | 08FEB2021 | Medium |
*/alternate solution:*/
data salesI;
set sample;
TotalSale = UnitsSold*UnitPrice;
Retain SalesID Name Region Product UnitsSold UnitPrice SaleDate Gender
JoiningDate TotalSale ;
length SalesLevel $10.;
if TotalSale < 500000 then SalesLevel = "Low";
else if TotalSale <= 1000000 then SalesLevel = "Medium";
else SalesLevel = "High";
run;
proc print;run;
Output:
Obs | SalesID | Name | Region | Product | UnitsSold | UnitPrice | SaleDate | Gender | JoiningDate | TotalSale | SalesLevel |
---|---|---|---|---|---|---|---|---|---|---|---|
1 | 201 | Ajay | North | Laptop | 20 | 50000 | 05JAN2025 | M | 10JAN2020 | 1000000 | Medium |
2 | 202 | Neha | East | Smartphone | 50 | 15000 | 10JAN2025 | F | 15JUL2019 | 750000 | Medium |
3 | 203 | Ravi | South | Tablet | 10 | 25000 | 15JAN2025 | M | 01JAN2018 | 250000 | Low |
4 | 204 | Ayesha | West | Laptop | 30 | 48000 | 20JAN2025 | F | 12DEC2020 | 1440000 | High |
5 | 205 | Raj | North | Smartphone | 70 | 14000 | 22JAN2025 | M | 20FEB2019 | 980000 | Medium |
6 | 206 | Naresh | South | Tablet | 20 | 30000 | 09JAN2025 | F | 08FEB2021 | 600000 | Medium |
/*Q5. Use PROC FORMAT to define the above SalesLevel and apply it in a PROC PRINT.*/
proc format;
value sales Low - < 500000 = "Low"
500000 - 1000000 = "Medium"
1000000<- High = "High";
run;
data salesII;
set sample;
TotalSale = UnitsSold * UnitPrice;
run;
proc print data=sales;
format TotalSale sales.;
run;
Output:
Obs | SalesID | Name | Region | Product | UnitsSold | UnitPrice | SaleDate | Gender | JoiningDate | TotalSale |
---|---|---|---|---|---|---|---|---|---|---|
1 | 201 | Ajay | North | Laptop | 20 | 50000 | 05JAN2025 | M | 10JAN2020 | Medium |
2 | 202 | Neha | East | Smartphone | 50 | 15000 | 10JAN2025 | F | 15JUL2019 | Medium |
3 | 203 | Ravi | South | Tablet | 10 | 25000 | 15JAN2025 | M | 01JAN2018 | Low |
4 | 204 | Ayesha | West | Laptop | 30 | 48000 | 20JAN2025 | F | 12DEC2020 | High |
5 | 205 | Raj | North | Smartphone | 70 | 14000 | 22JAN2025 | M | 20FEB2019 | Medium |
6 | 206 | Naresh | South | Tablet | 20 | 30000 | 09JAN2025 | F | 08FEB2021 | Medium |
/*Q6. Create a macro that accepts a Region and gives summary stats (avg, min, max) of sales.*/
%macro Region (region);
proc means data=sales mean min max;
where region = "®ion";
var totalsale;
title "TotalSale Of :&Region";
run;
%mend;
%Region(East)
Output:
TotalSale Of :East |
Analysis Variable : TotalSale | ||
---|---|---|
Mean | Minimum | Maximum |
750000.00 | 750000.00 | 750000.00 |
%Region(South)
Output:
The MEANS Procedure
|
/*alternate solutions:*/
%macro regioni (reg,region);
data ®
set sample;
totalsales = unitssold * unitprice;
run;
proc means data=® mean min max;
where region = "®ion";
var totalsales;
run;
%mend;
%regioni(samplei,North)
Output:
Analysis Variable : totalsales | ||
---|---|---|
Mean | Minimum | Maximum |
990000.00 | 980000.00 | 1000000.00 |
*/alternate solution:*/
%macro regionii(reg,region);
proc sql;
select ®,
avg(unitssold*unitprice) as avg,
min(unitssold*unitprice) as min,
max(unitssold*unitprice) as max
from sample
where ® = "®ion"
group by ®
quit;
%mend;
%regionii(region,South)
Output:
Region | avg | min | max |
---|---|---|---|
South | 425000 | 250000 | 600000 |
%regionii(region,North)
Output:
Region | avg | min | max |
---|---|---|---|
North | 990000 | 980000 | 1000000 |
/*Q7. Create a variable SaleMonth (e.g., JAN2025) from SaleDate.*/
data month;
set sample;
SaleMonth=month(SaleDate);
run;
proc print;run;
Output:
Obs | SalesID | Name | Region | Product | UnitsSold | UnitPrice | SaleDate | Gender | JoiningDate | SaleMonth |
---|---|---|---|---|---|---|---|---|---|---|
1 | 201 | Ajay | North | Laptop | 20 | 50000 | 05JAN2025 | M | 10JAN2020 | 1 |
2 | 202 | Neha | East | Smartphone | 50 | 15000 | 10JAN2025 | F | 15JUL2019 | 1 |
3 | 203 | Ravi | South | Tablet | 10 | 25000 | 15JAN2025 | M | 01JAN2018 | 1 |
4 | 204 | Ayesha | West | Laptop | 30 | 48000 | 20JAN2025 | F | 12DEC2020 | 1 |
5 | 205 | Raj | North | Smartphone | 70 | 14000 | 22JAN2025 | M | 20FEB2019 | 1 |
6 | 206 | Naresh | South | Tablet | 20 | 30000 | 09JAN2025 | F | 08FEB2021 | 1 |
data monthI;
set sample;
SaleMonth = put(SaleDate,monyy7.);
run;
proc print;run;
Output:
Obs | SalesID | Name | Region | Product | UnitsSold | UnitPrice | SaleDate | Gender | JoiningDate | SaleMonth |
---|---|---|---|---|---|---|---|---|---|---|
1 | 201 | Ajay | North | Laptop | 20 | 50000 | 05JAN2025 | M | 10JAN2020 | JAN2025 |
2 | 202 | Neha | East | Smartphone | 50 | 15000 | 10JAN2025 | F | 15JUL2019 | JAN2025 |
3 | 203 | Ravi | South | Tablet | 10 | 25000 | 15JAN2025 | M | 01JAN2018 | JAN2025 |
4 | 204 | Ayesha | West | Laptop | 30 | 48000 | 20JAN2025 | F | 12DEC2020 | JAN2025 |
5 | 205 | Raj | North | Smartphone | 70 | 14000 | 22JAN2025 | M | 20FEB2019 | JAN2025 |
6 | 206 | Naresh | South | Tablet | 20 | 30000 | 09JAN2025 | F | 08FEB2021 | JAN2025 |
/*Q8. For each product, compute average sale price using PROC MEANS.*/
proc means data=sample mean;
class product;
var UnitPrice;
run;
Output:
Analysis Variable : UnitPrice | ||
---|---|---|
Product | N Obs | Mean |
Laptop | 2 | 49000.00 |
Smartphone | 2 | 14500.00 |
Tablet | 2 | 27500.00 |
/*alternate solution*/
proc means data=sample noprint;
class product;
var UnitPrice;
output out=NewSample
mean=Avg_UnitPrice;
run;
proc print data=newsample noobs;
where _type_ = 1;
var product Avg_UnitPrice;
run;
Output:
Product | Avg_UnitPrice |
---|---|
Laptop | 49000 |
Smartphone | 14500 |
Tablet | 27500 |
/*Q9. List employees who joined more than 4 years ago from today.*/
data ago;
set sample;
if intck('Year' , jOiningDate , '15may2025'd) > 4 then output;
run;
proc print;run;
Output:
Obs | SalesID | Name | Region | Product | UnitsSold | UnitPrice | SaleDate | Gender | JoiningDate |
---|---|---|---|---|---|---|---|---|---|
1 | 201 | Ajay | North | Laptop | 20 | 50000 | 05JAN2025 | M | 10JAN2020 |
2 | 202 | Neha | East | Smartphone | 50 | 15000 | 10JAN2025 | F | 15JUL2019 |
3 | 203 | Ravi | South | Tablet | 10 | 25000 | 15JAN2025 | M | 01JAN2018 |
4 | 204 | Ayesha | West | Laptop | 30 | 48000 | 20JAN2025 | F | 12DEC2020 |
5 | 205 | Raj | North | Smartphone | 70 | 14000 | 22JAN2025 | M | 20FEB2019 |
/*alternate solution*/
proc sql;
select *
from sample
where intck('year',joiningdate, '15may2025'd) > 4;
quit;
Output:
SalesID | Name | Region | Product | UnitsSold | UnitPrice | SaleDate | Gender | JoiningDate |
---|---|---|---|---|---|---|---|---|
201 | Ajay | North | Laptop | 20 | 50000 | 05JAN2025 | M | 10JAN2020 |
202 | Neha | East | Smartphone | 50 | 15000 | 10JAN2025 | F | 15JUL2019 |
203 | Ravi | South | Tablet | 10 | 25000 | 15JAN2025 | M | 01JAN2018 |
204 | Ayesha | West | Laptop | 30 | 48000 | 20JAN2025 | F | 12DEC2020 |
205 | Raj | North | Smartphone | 70 | 14000 | 22JAN2025 | M | 20FEB2019 |
/*Q10. Create a report showing total units sold by product and gender using PROC REPORT.*/
proc report data=sample nowd;
column name product unitssold gender;
define name / display;
define product / group;
define gender /group;
define unitssold / analysis format=comma10.2;
run;
Output:
Name | Product | UnitsSold | Gender |
---|---|---|---|
Ayesha | Laptop | 30.00 | F |
Ajay | 20.00 | M | |
Neha | Smartphone | 50.00 | F |
Raj | 70.00 | M | |
Naresh | Tablet | 20.00 | F |
Ravi | 10.00 | M |
/*alternate solution*/
proc report data=sample nowd;
column name Product Gender UnitsSold;
define name / display;
define Product / group;
define Gender / across;
define UnitsSold / analysis sum format=comma10.;
run;
Output:
Gender | ||||
---|---|---|---|---|
Name | Product | F | M | UnitsSold |
Ajay | Laptop | . | 1 | 20 |
Ayesha | 1 | . | 30 | |
Neha | Smartphone | 1 | . | 50 |
Raj | . | 1 | 70 | |
Ravi | Tablet | . | 1 | 10 |
Naresh | 1 | . | 20 |
- Get link
- X
- Other Apps
Comments
Post a Comment