181.SAS PROJECT 11-20 QUESTIONS: DATASTEP | PROC SQL | PROC PRINT | PROC FORMAT | PROC MEANS | PROC REPORT | MACRO | COMPLETE RETAIL SALES ANALYSIS

SAS PROJECT 11-20 QUESTIONS: DATASTEP | PROC SQL | PROC PRINT | PROC FORMAT | PROC MEANS | PROC REPORT | MACRO | COMPLETE RETAIL SALES ANALYSIS


data sample;

 input SalesID Name$ Region$ Product:$10. UnitsSold UnitPrice 

        SaleDate:date9. Gender$ JoiningDate:date9.;

        format SaleDate joiningDate Date9.;

 cards;

201 Ajay North Laptop 20 50000 05JAN2025 M 10JAN2020

202 Neha East Smartphone 50 15000 10JAN2025 F 15JUL2019

203 Ravi South Tablet 10 25000 15JAN2025 M 01JAN2018

204 Ayesha West Laptop 30 48000 20JAN2025 F 12DEC2020

205 Raj     North Smartphone 70 14000 22JAN2025 M 20FEB2019

206 Naresh  South  Tablet  20  30000  09JAN2025  F   08FEB2021

;

run;

proc print;run;

Output:

ObsSalesIDNameRegionProductUnitsSoldUnitPriceSaleDateGenderJoiningDate
1201AjayNorthLaptop205000005JAN2025M10JAN2020
2202NehaEastSmartphone501500010JAN2025F15JUL2019
3203RaviSouthTablet102500015JAN2025M01JAN2018
4204AyeshaWestLaptop304800020JAN2025F12DEC2020
5205RajNorthSmartphone701400022JAN2025M20FEB2019
6206NareshSouthTablet203000009JAN2025F08FEB2021


/*Q11. Use PROC TRANSPOSE to transpose the total sales of each product by region.*/

proc transpose data=sales out=transale;

  by region notsorted;

  var totalsale;

run;

proc print;run;

Output:

Obs Region _NAME_ COL1
1 North TotalSale Medium
2 East TotalSale Medium
3 South TotalSale Low
4 West TotalSale High
5 North TotalSale Medium
6 South TotalSale Medium


*/alternate solution:*/

proc transpose data=sale out=transaleI;

  by product notsorted;

  var totalsale;

  id region;

run;

proc print;run;

Output:

Obs Product _NAME_ North East South West
1 Laptop TotalSale Medium . . .
2 Smartphone TotalSale . Medium . .
3 Tablet TotalSale . . Low .
4 Laptop TotalSale . . . High
5 Smartphone TotalSale Medium . . .
6 Tablet TotalSale . . Medium .


/*Q12. Write a macro that adds a Commission column based on this logic:*/

/*If TotalSale > 1,000,000, commission = 10%*/

/*Else if TotalSale > 500,000, commission = 5%*/

/*Else commission = 2%*/

%macro comm(data,set);

data &data;

 set &set;

 if totalsale > 1000000 then commission = totalsale*0.10;

 else if totalsale > 500000 then commission = totalsale*0.05;

 else commission = totalsale*0.02;

run;

proc print;run;

%mend;


%comm(newcom,sales)

Output:

Obs SalesID Name Region Product UnitsSold UnitPrice SaleDate Gender JoiningDate TotalSale commission
1 201 Ajay North Laptop 20 50000 05JAN2025 M 10JAN2020 Medium 50000
2 202 Neha East Smartphone 50 15000 10JAN2025 F 15JUL2019 Medium 37500
3 203 Ravi South Tablet 10 25000 15JAN2025 M 01JAN2018 Low 5000
4 204 Ayesha West Laptop 30 48000 20JAN2025 F 12DEC2020 High 144000
5 205 Raj North Smartphone 70 14000 22JAN2025 M 20FEB2019 Medium 49000
6 206 Naresh South Tablet 20 30000 09JAN2025 F 08FEB2021 Medium 30000


/*Q13. Identify duplicate entries based on SalesID using PROC SORT and NODUPKEY.*/

proc sort data=sample out=sortdata nodupkey;

 by salesid;

run;

proc print;run;

Output:

Obs SalesID Name Region Product UnitsSold UnitPrice SaleDate Gender JoiningDate
1 201 Ajay North Laptop 20 50000 05JAN2025 M 10JAN2020
2 202 Neha East Smartphone 50 15000 10JAN2025 F 15JUL2019
3 203 Ravi South Tablet 10 25000 15JAN2025 M 01JAN2018
4 204 Ayesha West Laptop 30 48000 20JAN2025 F 12DEC2020
5 205 Raj North Smartphone 70 14000 22JAN2025 M 20FEB2019
6 206 Naresh South Tablet 20 30000 09JAN2025 F 08FEB2021


/*Q14. Use PROC SGPLOT to create a vertical bar chart for average units sold by product.*/

proc sql;

 create table avg as

 select product , mean(unitssold) as avgunitsold

 from sample

 group by product;

quit;

proc print;run;

Output:

Obs Product avgunitsold
1 Laptop 25
2 Smartphone 60
3 Tablet 15


proc sgplot data=avg;

 vbar product/response=avgunitsold datalabel;

 yaxis label="Average Units Sold";

 xaxis label="Product";

run;

Log:

NOTE: PROCEDURE SGPLOT used (Total process time):

      real time           2.34 seconds

      cpu time            0.54 seconds


NOTE: Listing image output written to SGPlot1.png.

NOTE: There were 3 observations read from the data set WORK.AVG.


/*Q15. Using PROC SQL, get the salesperson who sold the maximum units in a single sale.*/

proc sql ;

 select salesid,name,unitssold

  from sample

   where unitssold = (select  max(unitssold)

                                   from sample);

quit;

Output:

SalesID Name UnitsSold
205 Raj 70


/*Q16. Create a new variable Experience as years between JoiningDate and today, rounded to 2 decimals.*/

data exp;

 set sample;

 experience=intck('year',joiningdate,'15may2025'd);

run;

proc print;run;

Output:

Obs SalesID Name Region Product UnitsSold UnitPrice SaleDate Gender JoiningDate experience
1 201 Ajay North Laptop 20 50000 05JAN2025 M 10JAN2020 5
2 202 Neha East Smartphone 50 15000 10JAN2025 F 15JUL2019 6
3 203 Ravi South Tablet 10 25000 15JAN2025 M 01JAN2018 7
4 204 Ayesha West Laptop 30 48000 20JAN2025 F 12DEC2020 5
5 205 Raj North Smartphone 70 14000 22JAN2025 M 20FEB2019 6
6 206 Naresh South Tablet 20 30000 09JAN2025 F 08FEB2021 4


*/alternate solution:*/

data expI;

 set sample;

 experience=round(("15may2025"d-joiningdate)/365.25,0.01);

run;

proc print;run;

Output:

Obs SalesID Name Region Product UnitsSold UnitPrice SaleDate Gender JoiningDate experience
1 201 Ajay North Laptop 20 50000 05JAN2025 M 10JAN2020 5.34
2 202 Neha East Smartphone 50 15000 10JAN2025 F 15JUL2019 5.83
3 203 Ravi South Tablet 10 25000 15JAN2025 M 01JAN2018 7.37
4 204 Ayesha West Laptop 30 48000 20JAN2025 F 12DEC2020 4.42
5 205 Raj North Smartphone 70 14000 22JAN2025 M 20FEB2019 6.23
6 206 Naresh South Tablet 20 30000 09JAN2025 F 08FEB2021 4.26


/*Q17. Use PROC FREQ to calculate % of salespeople by region.*/

proc freq data=sample;

 table region / out=percent ;

run;

proc print data=percent;

 var region count percent;

run;

Output:

                                                                   The FREQ Procedure

Region Frequency Percent Cumulative
Frequency
Cumulative
Percent
East 1 16.67 1 16.67
North 2 33.33 3 50.00
South 2 33.33 5 83.33
West 1 16.67 6 100.00

Obs Region COUNT PERCENT
1 East 1 16.6667
2 North 2 33.3333
3 South 2 33.3333
4 West 1 16.6667

/*Q18. Generate a summary table showing average sale value per month using SALEDATE.*/

data monthsale;

 set sample;

 total=unitssold*unitprice;

 format salemonth monyy7.;

 salemonth=intnx('month',saledate,0,'b');

run;

proc print;run;

Output:

Obs SalesID Name Region Product UnitsSold UnitPrice SaleDate Gender JoiningDate total salemonth
1 201 Ajay North Laptop 20 50000 05JAN2025 M 10JAN2020 1000000 JAN2025
2 202 Neha East Smartphone 50 15000 10JAN2025 F 15JUL2019 750000 JAN2025
3 203 Ravi South Tablet 10 25000 15JAN2025 M 01JAN2018 250000 JAN2025
4 204 Ayesha West Laptop 30 48000 20JAN2025 F 12DEC2020 1440000 JAN2025
5 205 Raj North Smartphone 70 14000 22JAN2025 M 20FEB2019 980000 JAN2025
6 206 Naresh South Tablet 20 30000 09JAN2025 F 08FEB2021 600000 JAN2025


proc sql;

 create table new1 as

 select salemonth format=monyy7.,

       mean(total) as avgtotal format=comma12.2

 from monthsale

 group by salemonth

 order by salemonth;

quit;

proc print;run;

Output:

Obs salemonth avgtotal
1 JAN2025 836,666.67


/*Q19. Create a gender-wise and product-wise cross-tab using PROC FREQ.*/

proc freq data=sample;

 table gender*product/norow nocol nopercent;

run;

Output:

                                                                          The FREQ Procedure

Frequency
Table of Gender by Product
Gender Product
Laptop Smartphone Tablet Total
F
1
1
1
3
M
1
1
1
3
Total
2
2
2
6


*/alternate solution:*/

proc freq data=sample;

 table gender*product/chisq;

run;

Output:

                                                                              The FREQ Procedure

Frequency
Percent
Row Pct
Col Pct
Table of Gender by Product
Gender Product
Laptop Smartphone Tablet Total
F
1
16.67
33.33
50.00
1
16.67
33.33
50.00
1
16.67
33.33
50.00
3
50.00
 
 
M
1
16.67
33.33
50.00
1
16.67
33.33
50.00
1
16.67
33.33
50.00
3
50.00
 
 
Total
2
33.33
2
33.33
2
33.33
6
100.00


Statistics for Table of Gender by Product

Statistic DF Value Prob
Chi-Square 2 0.0000 1.0000
Likelihood Ratio Chi-Square 2 0.0000 1.0000
Mantel-Haenszel Chi-Square 1 0.0000 1.0000
Phi Coefficient   0.0000  
Contingency Coefficient   0.0000  
Cramer's V   0.0000  
WARNING: 100% of the cells have expected counts less
than 5. Chi-Square may not be a valid test.


Sample Size = 6

/*Q20. Create a listing of employees who sold a Laptop with UnitPrice > 45,000 AND joined before 2021.*/

data list;

 set sample;

 year = year(joiningdate);

 where product = 'Laptop' and unitprice > 45000 and joiningdate > 2021;

run;

proc print;run;

Output:

Obs SalesID Name Region Product UnitsSold UnitPrice SaleDate Gender JoiningDate year
1 201 Ajay North Laptop 20 50000 05JAN2025 M 10JAN2020 2020
2 204 Ayesha West Laptop 30 48000 20JAN2025 F 12DEC2020 2020


*/alternate solution:*/

proc print data=sample noobs;

    where Product = 'Laptop' 

      and UnitPrice > 45000

      and JoiningDate < '01JAN2021'd;

    var  Name Product UnitPrice JoiningDate;

    format JoiningDate date9.;

    title "Employees who sold Laptop > 45,000 and joined before 2021";

run;

Output:

                                      Employees who sold Laptop > 45,000 and joined before 2021

Name Product UnitPrice JoiningDate
Ajay Laptop 50000 10JAN2020
Ayesha Laptop 48000 12DEC2020


PRACTICE AND COMMENT YOUR CODE: 

--->PLEASE FOLLOW OUR BLOG FOR MORE UPDATES.

PLEASE FOLLOW OUR TELEGRAM CHANNEL CLICK HERE

PLEASE FOLLOW OUR FACEBOOK PAGE  CLICK HERE

PLEASE FOLLOW OUR INSTAGRAM PAGE CLICK HERE

Comments