180.SAS PROJECT 1-10 QUESTIONS: DATASTEP | PROC SQL | PROC PRINT | PROC FORMAT | PROC MEANS | PROC REPORT | MACRO | COMPLETE RETAIL SALES ANALYSIS

SAS PROJECT 1-10 QUESTIONS: DATASTEP | PROC SQL | PROC PRINT | PROC FORMAT | PROC MEANS | PROC REPORT | MACRO | COMPLETE RETAIL SALES ANALYSIS

data sample;

 input SalesID Name$ Region$ Product:$10. UnitsSold UnitPrice 

        SaleDate:date9. Gender$ JoiningDate:date9.;

        format SaleDate joiningDate Date9.;

 cards;

201 Ajay North Laptop 20 50000 05JAN2025 M 10JAN2020

202 Neha East Smartphone 50 15000 10JAN2025 F 15JUL2019

203 Ravi South Tablet 10 25000 15JAN2025 M 01JAN2018

204 Ayesha West Laptop 30 48000 20JAN2025 F 12DEC2020

205 Raj     North Smartphone 70 14000 22JAN2025 M 20FEB2019

206 Naresh South Tablet  20 30000 09JAN2025 08FEB2021

;

run;

proc print;run;

Output:

Obs SalesID Name Region Product UnitsSold UnitPrice SaleDate Gender JoiningDate
1 201 Ajay North Laptop 20 50000 05JAN2025 M 10JAN2020
2 202 Neha East Smartphone 50 15000 10JAN2025 F 15JUL2019
3 203 Ravi South Tablet 10 25000 15JAN2025 M 01JAN2018
4 204 Ayesha West Laptop 30 48000 20JAN2025 F 12DEC2020
5 205 Raj North Smartphone 70 14000 22JAN2025 M 20FEB2019
6 206 Naresh South Tablet 20 30000 09JAN2025 F 08FEB2021


/*Q1. Create a new variable TotalSale = UnitsSold * UnitPrice using a DATA step*/

data Sale;

 set sample;

 TotalSale = UnitsSold * UnitPrice;

run;

proc print;run;

Output:

Obs SalesID Name Region Product UnitsSold UnitPrice SaleDate Gender JoiningDate TotalSale
1 201 Ajay North Laptop 20 50000 05JAN2025 M 10JAN2020 1000000
2 202 Neha East Smartphone 50 15000 10JAN2025 F 15JUL2019 750000
3 203 Ravi South Tablet 10 25000 15JAN2025 M 01JAN2018 250000
4 204 Ayesha West Laptop 30 48000 20JAN2025 F 12DEC2020 1440000
5 205 Raj North Smartphone 70 14000 22JAN2025 M 20FEB2019 980000
6 206 Naresh South Tablet 20 30000 09JAN2025 F 08FEB2021 600000


/*Q2. Using PROC SQL, calculate the total revenue by each Region.*/

proc sql;

 select region,sum(UnitPrice) as TotalRevenue

  from sample

  group by region;

quit;

Output:

Region TotalRevenue
East 15000
North 64000
South 55000
West 48000


proc sql;

 select region,sum(UnitsSold*UnitPrice) as TotalRevenue

  from sample

  group by region;

quit;

Output:

Region TotalRevenue
East 750000
North 1980000
South 850000
West 1440000


/*Q3. Find the top 3 salespersons based on total revenue (TotalSale).*/

proc sql outobs=3;

 select name,sum(UnitsSold*UnitPrice) as TotalRevenue

  from sample 

  group by region

  order by Name desc;

quit;

Output:

Name TotalRevenue
Ravi 850000
Raj 1980000
Neha 750000


/*Q4. Add a variable SalesLevel:*/

/*<500000 → 'Low'*/

/*500000–1000000 → 'Medium'*/

/*>1000000 → 'High'*/

proc format; 

  value   sales      Low     - < 500000   = "Low"

                            500000  - 1000000   = "Medium"

    1000000<- High      = "High";

run;


data sales;

 set sample;

 TotalSale = UnitsSold * UnitPrice;

 Format TotalSale sales.;

run;

proc print;run;

Output:

Obs SalesID Name Region Product UnitsSold UnitPrice SaleDate Gender JoiningDate TotalSale
1 201 Ajay North Laptop 20 50000 05JAN2025 M 10JAN2020 Medium
2 202 Neha East Smartphone 50 15000 10JAN2025 F 15JUL2019 Medium
3 203 Ravi South Tablet 10 25000 15JAN2025 M 01JAN2018 Low
4 204 Ayesha West Laptop 30 48000 20JAN2025 F 12DEC2020 High
5 205 Raj North Smartphone 70 14000 22JAN2025 M 20FEB2019 Medium
6 206 Naresh South Tablet 20 30000 09JAN2025 F 08FEB2021 Medium

*/alternate solution:*/

data salesI;

 set sample;

 TotalSale = UnitsSold*UnitPrice;

 Retain SalesID Name Region Product UnitsSold UnitPrice SaleDate Gender 

 JoiningDate TotalSale ;

 length SalesLevel $10.;

 if TotalSale < 500000 then SalesLevel = "Low";

 else if TotalSale <= 1000000 then SalesLevel = "Medium";

 else SalesLevel = "High";

run;

proc print;run;

Output:

Obs SalesID Name Region Product UnitsSold UnitPrice SaleDate Gender JoiningDate TotalSale SalesLevel
1 201 Ajay North Laptop 20 50000 05JAN2025 M 10JAN2020 1000000 Medium
2 202 Neha East Smartphone 50 15000 10JAN2025 F 15JUL2019 750000 Medium
3 203 Ravi South Tablet 10 25000 15JAN2025 M 01JAN2018 250000 Low
4 204 Ayesha West Laptop 30 48000 20JAN2025 F 12DEC2020 1440000 High
5 205 Raj North Smartphone 70 14000 22JAN2025 M 20FEB2019 980000 Medium
6 206 Naresh South Tablet 20 30000 09JAN2025 F 08FEB2021 600000 Medium


/*Q5. Use PROC FORMAT to define the above SalesLevel and apply it in a PROC PRINT.*/

proc format; 

  value   sales     Low     - < 500000   = "Low"

                           500000  - 1000000   = "Medium"

   1000000<- High      = "High";

run;


data salesII;

 set sample;

 TotalSale = UnitsSold * UnitPrice;

run;

proc print data=sales;

 format TotalSale sales.;

run;

Output:

Obs SalesID Name Region Product UnitsSold UnitPrice SaleDate Gender JoiningDate TotalSale
1 201 Ajay North Laptop 20 50000 05JAN2025 M 10JAN2020 Medium
2 202 Neha East Smartphone 50 15000 10JAN2025 F 15JUL2019 Medium
3 203 Ravi South Tablet 10 25000 15JAN2025 M 01JAN2018 Low
4 204 Ayesha West Laptop 30 48000 20JAN2025 F 12DEC2020 High
5 205 Raj North Smartphone 70 14000 22JAN2025 M 20FEB2019 Medium
6 206 Naresh South Tablet 20 30000 09JAN2025 F 08FEB2021 Medium


/*Q6. Create a macro that accepts a Region and gives summary stats (avg, min, max) of sales.*/

%macro Region (region);

proc means data=sales mean min max;

 where region = "&region";

 var totalsale;

 title "TotalSale Of :&Region";

run;

%mend;


%Region(East)

Output:

                                                                TotalSale Of :East

                                                             The MEANS Procedure

Analysis Variable : TotalSale
Mean Minimum Maximum
750000.00 750000.00 750000.00

%Region(South)

Output:

                                                                   TotalSale Of :South

                                                               The MEANS Procedure

Analysis Variable : TotalSale
Mean Minimum Maximum
425000.00 250000.00 600000.00


/*alternate solutions:*/

%macro regioni (reg,region);

data &reg;

 set sample;

 totalsales = unitssold * unitprice;

run;

proc means data=&reg mean min max;

 where region = "&region";

 var totalsales;

run;

%mend;


%regioni(samplei,North)

Output:

                                                                  The MEANS Procedure

Analysis Variable : totalsales
Mean Minimum Maximum
990000.00 980000.00 1000000.00


*/alternate solution:*/

%macro regionii(reg,region);

proc sql;

    select   &reg,

                avg(unitssold*unitprice) as avg,

min(unitssold*unitprice) as min,

max(unitssold*unitprice) as max

  from sample

  where &reg = "&region"

  group by &reg;

quit;

%mend;


%regionii(region,South)

Output:

Region avg min max
South 425000 250000 600000

%regionii(region,North)

Output:

Region avg min max
North 990000 980000 1000000


/*Q7. Create a variable SaleMonth (e.g., JAN2025) from SaleDate.*/

data month;

 set sample;

 SaleMonth=month(SaleDate);

run;

proc print;run;

Output:

Obs SalesID Name Region Product UnitsSold UnitPrice SaleDate Gender JoiningDate SaleMonth
1 201 Ajay North Laptop 20 50000 05JAN2025 M 10JAN2020 1
2 202 Neha East Smartphone 50 15000 10JAN2025 F 15JUL2019 1
3 203 Ravi South Tablet 10 25000 15JAN2025 M 01JAN2018 1
4 204 Ayesha West Laptop 30 48000 20JAN2025 F 12DEC2020 1
5 205 Raj North Smartphone 70 14000 22JAN2025 M 20FEB2019 1
6 206 Naresh South Tablet 20 30000 09JAN2025 F 08FEB2021 1


data monthI;

 set sample;

 SaleMonth = put(SaleDate,monyy7.);

run;

proc print;run;

Output:

Obs SalesID Name Region Product UnitsSold UnitPrice SaleDate Gender JoiningDate SaleMonth
1 201 Ajay North Laptop 20 50000 05JAN2025 M 10JAN2020 JAN2025
2 202 Neha East Smartphone 50 15000 10JAN2025 F 15JUL2019 JAN2025
3 203 Ravi South Tablet 10 25000 15JAN2025 M 01JAN2018 JAN2025
4 204 Ayesha West Laptop 30 48000 20JAN2025 F 12DEC2020 JAN2025
5 205 Raj North Smartphone 70 14000 22JAN2025 M 20FEB2019 JAN2025
6 206 Naresh South Tablet 20 30000 09JAN2025 F 08FEB2021 JAN2025


/*Q8. For each product, compute average sale price using PROC MEANS.*/

proc means data=sample  mean;

 class product;

 var UnitPrice;

run;

Output:

                                                               The MEANS Procedure

Analysis Variable : UnitPrice
Product N Obs Mean
Laptop 2 49000.00
Smartphone 2 14500.00
Tablet 2 27500.00

/*alternate solution*/

proc means data=sample noprint;

 class product;

 var UnitPrice;

 output out=NewSample 

              mean=Avg_UnitPrice;

run;


proc print data=newsample noobs;

 where _type_ = 1;

 var product Avg_UnitPrice;

run;

Output:

Product Avg_UnitPrice
Laptop 49000
Smartphone 14500
Tablet 27500


/*Q9. List employees who joined more than 4 years ago from today.*/

data ago;

 set sample;

 if intck('Year' , jOiningDate , '15may2025'd) > 4 then output;

run;

proc  print;run;

Output:

Obs SalesID Name Region Product UnitsSold UnitPrice SaleDate Gender JoiningDate
1 201 Ajay North Laptop 20 50000 05JAN2025 M 10JAN2020
2 202 Neha East Smartphone 50 15000 10JAN2025 F 15JUL2019
3 203 Ravi South Tablet 10 25000 15JAN2025 M 01JAN2018
4 204 Ayesha West Laptop 30 48000 20JAN2025 F 12DEC2020
5 205 Raj North Smartphone 70 14000 22JAN2025 M 20FEB2019


/*alternate solution*/

proc sql;

 select *

  from sample

   where intck('year',joiningdate, '15may2025'd) > 4;

quit;

Output:

SalesID Name Region Product UnitsSold UnitPrice SaleDate Gender JoiningDate
201 Ajay North Laptop 20 50000 05JAN2025 M 10JAN2020
202 Neha East Smartphone 50 15000 10JAN2025 F 15JUL2019
203 Ravi South Tablet 10 25000 15JAN2025 M 01JAN2018
204 Ayesha West Laptop 30 48000 20JAN2025 F 12DEC2020
205 Raj North Smartphone 70 14000 22JAN2025 M 20FEB2019


/*Q10. Create a report showing total units sold by product and gender using PROC REPORT.*/

proc report data=sample nowd;

 column name product unitssold gender;

 define name / display;

 define product / group; 

 define gender /group;

 define unitssold / analysis format=comma10.2;

run;

Output:

Name Product UnitsSold Gender
Ayesha Laptop 30.00 F
Ajay   20.00 M
Neha Smartphone 50.00 F
Raj   70.00 M
Naresh Tablet 20.00 F
Ravi   10.00 M


/*alternate solution*/

proc report data=sample nowd;

     column name Product Gender UnitsSold;

     define name / display;

    define Product / group;

    define Gender / across;

    define UnitsSold / analysis sum format=comma10.;

run;

Output:

  Gender  
Name Product F M UnitsSold
Ajay Laptop . 1 20
Ayesha   1 . 30
Neha Smartphone 1 . 50
Raj   . 1 70
Ravi Tablet . 1 10
Naresh   1 . 20


PRACTICE AND COMMENT YOUR CODE: 

--->PLEASE FOLLOW OUR BLOG FOR MORE UPDATES.

PLEASE FOLLOW OUR TELEGRAM CHANNEL CLICK HERE

PLEASE FOLLOW OUR FACEBOOK PAGE  CLICK HERE

PLEASE FOLLOW OUR INSTAGRAM PAGE CLICK HERE

Comments