181.SAS PROJECT 11-20 QUESTIONS: DATASTEP | PROC SQL | PROC PRINT | PROC FORMAT | PROC MEANS | PROC REPORT | MACRO | COMPLETE RETAIL SALES ANALYSIS

SAS PROJECT 11-20 QUESTIONS: DATASTEP | PROC SQL | PROC PRINT | PROC FORMAT | PROC MEANS | PROC REPORT | MACRO | COMPLETE RETAIL SALES ANALYSIS


data sample;

 input SalesID Name$ Region$ Product:$10. UnitsSold UnitPrice 

        SaleDate:date9. Gender$ JoiningDate:date9.;

        format SaleDate joiningDate Date9.;

 cards;

201 Ajay North Laptop 20 50000 05JAN2025 M 10JAN2020

202 Neha East Smartphone 50 15000 10JAN2025 F 15JUL2019

203 Ravi South Tablet 10 25000 15JAN2025 M 01JAN2018

204 Ayesha West Laptop 30 48000 20JAN2025 F 12DEC2020

205 Raj     North Smartphone 70 14000 22JAN2025 M 20FEB2019

206 Naresh  South  Tablet  20  30000  09JAN2025  F   08FEB2021

;

run;

proc print;run;

Output:

ObsSalesIDNameRegionProductUnitsSoldUnitPriceSaleDateGenderJoiningDate
1201AjayNorthLaptop205000005JAN2025M10JAN2020
2202NehaEastSmartphone501500010JAN2025F15JUL2019
3203RaviSouthTablet102500015JAN2025M01JAN2018
4204AyeshaWestLaptop304800020JAN2025F12DEC2020
5205RajNorthSmartphone701400022JAN2025M20FEB2019
6206NareshSouthTablet203000009JAN2025F08FEB2021


/*Q11. Use PROC TRANSPOSE to transpose the total sales of each product by region.*/

proc transpose data=sales out=transale;

  by region notsorted;

  var totalsale;

run;

proc print;run;

Output:

Obs Region _NAME_ COL1
1 North TotalSale Medium
2 East TotalSale Medium
3 South TotalSale Low
4 West TotalSale High
5 North TotalSale Medium
6 South TotalSale Medium


*/alternate solution:*/

proc transpose data=sale out=transaleI;

  by product notsorted;

  var totalsale;

  id region;

run;

proc print;run;

Output:

Obs Product _NAME_ North East South West
1 Laptop TotalSale Medium . . .
2 Smartphone TotalSale . Medium . .
3 Tablet TotalSale . . Low .
4 Laptop TotalSale . . . High
5 Smartphone TotalSale Medium . . .
6 Tablet TotalSale . . Medium .


/*Q12. Write a macro that adds a Commission column based on this logic:*/

/*If TotalSale > 1,000,000, commission = 10%*/

/*Else if TotalSale > 500,000, commission = 5%*/

/*Else commission = 2%*/

%macro comm(data,set);

data &data;

 set &set;

 if totalsale > 1000000 then commission = totalsale*0.10;

 else if totalsale > 500000 then commission = totalsale*0.05;

 else commission = totalsale*0.02;

run;

proc print;run;

%mend;


%comm(newcom,sales)

Output:

Obs SalesID Name Region Product UnitsSold UnitPrice SaleDate Gender JoiningDate TotalSale commission
1 201 Ajay North Laptop 20 50000 05JAN2025 M 10JAN2020 Medium 50000
2 202 Neha East Smartphone 50 15000 10JAN2025 F 15JUL2019 Medium 37500
3 203 Ravi South Tablet 10 25000 15JAN2025 M 01JAN2018 Low 5000
4 204 Ayesha West Laptop 30 48000 20JAN2025 F 12DEC2020 High 144000
5 205 Raj North Smartphone 70 14000 22JAN2025 M 20FEB2019 Medium 49000
6 206 Naresh South Tablet 20 30000 09JAN2025 F 08FEB2021 Medium 30000


/*Q13. Identify duplicate entries based on SalesID using PROC SORT and NODUPKEY.*/

proc sort data=sample out=sortdata nodupkey;

 by salesid;

run;

proc print;run;

Output:

Obs SalesID Name Region Product UnitsSold UnitPrice SaleDate Gender JoiningDate
1 201 Ajay North Laptop 20 50000 05JAN2025 M 10JAN2020
2 202 Neha East Smartphone 50 15000 10JAN2025 F 15JUL2019
3 203 Ravi South Tablet 10 25000 15JAN2025 M 01JAN2018
4 204 Ayesha West Laptop 30 48000 20JAN2025 F 12DEC2020
5 205 Raj North Smartphone 70 14000 22JAN2025 M 20FEB2019
6 206 Naresh South Tablet 20 30000 09JAN2025 F 08FEB2021


/*Q14. Use PROC SGPLOT to create a vertical bar chart for average units sold by product.*/

proc sql;

 create table avg as

 select product , mean(unitssold) as avgunitsold

 from sample

 group by product;

quit;

proc print;run;

Output:

Obs Product avgunitsold
1 Laptop 25
2 Smartphone 60
3 Tablet 15


proc sgplot data=avg;

 vbar product/response=avgunitsold datalabel;

 yaxis label="Average Units Sold";

 xaxis label="Product";

run;

Log:

NOTE: PROCEDURE SGPLOT used (Total process time):

      real time           2.34 seconds

      cpu time            0.54 seconds


NOTE: Listing image output written to SGPlot1.png.

NOTE: There were 3 observations read from the data set WORK.AVG.


/*Q15. Using PROC SQL, get the salesperson who sold the maximum units in a single sale.*/

proc sql ;

 select salesid,name,unitssold

  from sample

   where unitssold = (select  max(unitssold)

                                   from sample);

quit;

Output:

SalesID Name UnitsSold
205 Raj 70


/*Q16. Create a new variable Experience as years between JoiningDate and today, rounded to 2 decimals.*/

data exp;

 set sample;

 experience=intck('year',joiningdate,'15may2025'd);

run;

proc print;run;

Output:

Obs SalesID Name Region Product UnitsSold UnitPrice SaleDate Gender JoiningDate experience
1 201 Ajay North Laptop 20 50000 05JAN2025 M 10JAN2020 5
2 202 Neha East Smartphone 50 15000 10JAN2025 F 15JUL2019 6
3 203 Ravi South Tablet 10 25000 15JAN2025 M 01JAN2018 7
4 204 Ayesha West Laptop 30 48000 20JAN2025 F 12DEC2020 5
5 205 Raj North Smartphone 70 14000 22JAN2025 M 20FEB2019 6
6 206 Naresh South Tablet 20 30000 09JAN2025 F 08FEB2021 4


*/alternate solution:*/

data expI;

 set sample;

 experience=round(("15may2025"d-joiningdate)/365.25,0.01);

run;

proc print;run;

Output:

Obs SalesID Name Region Product UnitsSold UnitPrice SaleDate Gender JoiningDate experience
1 201 Ajay North Laptop 20 50000 05JAN2025 M 10JAN2020 5.34
2 202 Neha East Smartphone 50 15000 10JAN2025 F 15JUL2019 5.83
3 203 Ravi South Tablet 10 25000 15JAN2025 M 01JAN2018 7.37
4 204 Ayesha West Laptop 30 48000 20JAN2025 F 12DEC2020 4.42
5 205 Raj North Smartphone 70 14000 22JAN2025 M 20FEB2019 6.23
6 206 Naresh South Tablet 20 30000 09JAN2025 F 08FEB2021 4.26


/*Q17. Use PROC FREQ to calculate % of salespeople by region.*/

proc freq data=sample;

 table region / out=percent ;

run;

proc print data=percent;

 var region count percent;

run;

Output:

                                                                   The FREQ Procedure

Region Frequency Percent Cumulative
Frequency
Cumulative
Percent
East 1 16.67 1 16.67
North 2 33.33 3 50.00
South 2 33.33 5 83.33
West 1 16.67 6 100.00

Obs Region COUNT PERCENT
1 East 1 16.6667
2 North 2 33.3333
3 South 2 33.3333
4 West 1 16.6667

/*Q18. Generate a summary table showing average sale value per month using SALEDATE.*/

data monthsale;

 set sample;

 total=unitssold*unitprice;

 format salemonth monyy7.;

 salemonth=intnx('month',saledate,0,'b');

run;

proc print;run;

Output:

Obs SalesID Name Region Product UnitsSold UnitPrice SaleDate Gender JoiningDate total salemonth
1 201 Ajay North Laptop 20 50000 05JAN2025 M 10JAN2020 1000000 JAN2025
2 202 Neha East Smartphone 50 15000 10JAN2025 F 15JUL2019 750000 JAN2025
3 203 Ravi South Tablet 10 25000 15JAN2025 M 01JAN2018 250000 JAN2025
4 204 Ayesha West Laptop 30 48000 20JAN2025 F 12DEC2020 1440000 JAN2025
5 205 Raj North Smartphone 70 14000 22JAN2025 M 20FEB2019 980000 JAN2025
6 206 Naresh South Tablet 20 30000 09JAN2025 F 08FEB2021 600000 JAN2025


proc sql;

 create table new1 as

 select salemonth format=monyy7.,

       mean(total) as avgtotal format=comma12.2

 from monthsale

 group by salemonth

 order by salemonth;

quit;

proc print;run;

Output:

Obs salemonth avgtotal
1 JAN2025 836,666.67


/*Q19. Create a gender-wise and product-wise cross-tab using PROC FREQ.*/

proc freq data=sample;

 table gender*product/norow nocol nopercent;

run;

Output:

                                                                          The FREQ Procedure

Frequency
Table of Gender by Product
Gender Product
Laptop Smartphone Tablet Total
F
1
1
1
3
M
1
1
1
3
Total
2
2
2
6


*/alternate solution:*/

proc freq data=sample;

 table gender*product/chisq;

run;

Output:

                                                                              The FREQ Procedure

Frequency
Percent
Row Pct
Col Pct
Table of Gender by Product
Gender Product
Laptop Smartphone Tablet Total
F
1
16.67
33.33
50.00
1
16.67
33.33
50.00
1
16.67
33.33
50.00
3
50.00
 
 
M
1
16.67
33.33
50.00
1
16.67
33.33
50.00
1
16.67
33.33
50.00
3
50.00
 
 
Total
2
33.33
2
33.33
2
33.33
6
100.00


Statistics for Table of Gender by Product

Statistic DF Value Prob
Chi-Square 2 0.0000 1.0000
Likelihood Ratio Chi-Square 2 0.0000 1.0000
Mantel-Haenszel Chi-Square 1 0.0000 1.0000
Phi Coefficient   0.0000  
Contingency Coefficient   0.0000  
Cramer's V   0.0000  
WARNING: 100% of the cells have expected counts less
than 5. Chi-Square may not be a valid test.


Sample Size = 6

/*Q20. Create a listing of employees who sold a Laptop with UnitPrice > 45,000 AND joined before 2021.*/

data list;

 set sample;

 year = year(joiningdate);

 where product = 'Laptop' and unitprice > 45000 and joiningdate > 2021;

run;

proc print;run;

Output:

Obs SalesID Name Region Product UnitsSold UnitPrice SaleDate Gender JoiningDate year
1 201 Ajay North Laptop 20 50000 05JAN2025 M 10JAN2020 2020
2 204 Ayesha West Laptop 30 48000 20JAN2025 F 12DEC2020 2020


*/alternate solution:*/

proc print data=sample noobs;

    where Product = 'Laptop' 

      and UnitPrice > 45000

      and JoiningDate < '01JAN2021'd;

    var  Name Product UnitPrice JoiningDate;

    format JoiningDate date9.;

    title "Employees who sold Laptop > 45,000 and joined before 2021";

run;

Output:

                                      Employees who sold Laptop > 45,000 and joined before 2021

Name Product UnitPrice JoiningDate
Ajay Laptop 50000 10JAN2020
Ayesha Laptop 48000 12DEC2020


PRACTICE AND COMMENT YOUR CODE: 

--->PLEASE FOLLOW OUR BLOG FOR MORE UPDATES.

PLEASE FOLLOW OUR TELEGRAM CHANNEL CLICK HERE

PLEASE FOLLOW OUR FACEBOOK PAGE  CLICK HERE

PLEASE FOLLOW OUR INSTAGRAM PAGE CLICK HERE

Comments

Popular posts from this blog

397.If a satellite has excellent signal strength but very high latency, can it still deliver good quality communication? Why or why not?A Sas Study

401.How Efficient Are Global Data Centers? A Complete SAS Analytics Study

383.Which Pharma Distributors Are High-Risk? Can SAS Detect Delays, Temperature Violations, and Fraud?