322.ADVANCED CAR COMPANIES SALES AND PERFORMANCE DATASET CREATION AND ANALYSIS USING PROC SQL | PROC MEANS | PROC STANDARD | PROC SGPLOT | MACROS | INTCK | INTNX
ADVANCED CAR COMPANIES SALES AND PERFORMANCE DATASET CREATION AND ANALYSIS USING PROC SQL | PROC MEANS | PROC STANDARD | PROC SGPLOT | MACROS | INTCK | INTNX
options nocenter;
1. CREATE RAW DATASET WITH DATE VARIABLES AND FORMATS
data work.cars_raw;
length Brand $15 Model $20 Engine_Type $10;
format Launch_Date Review_Date Analysis_Date Next_Service_Date Five_Year_Anniv date9.;
format Price comma10.2;
input Brand $ Model $ Engine_Type $
Sales_2024 Mileage Price Safety_Rating
Launch_Date :date9. Review_Date :date9.;
/* Set a common analysis date (end of 2024) */
Analysis_Date = '31DEC2024'd;
/* Date-based derived variables using INTCK and INTNX */
Years_Since_Launch = intck('year', Launch_Date, Analysis_Date);
Months_Between_Launch_Review = intck('month', Launch_Date, Review_Date);
/* 5-year anniversary from launch and next service due 6 months after last review */
Five_Year_Anniv = intnx('year', Launch_Date, 5, 'same');
Next_Service_Date = intnx('month', Review_Date, 6, 'same');
datalines;
Toyota Corolla Petrol 980000 18.5 18000 4.5 15JAN2018 10MAR2024
Honda City Petrol 620000 17.8 16000 4.3 10JUN2019 05FEB2024
Maruti Swift Petrol 850000 21.1 9000 4.2 01APR2017 20JAN2024
Hyundai Creta Diesel 740000 19.5 20000 4.6 20AUG2020 15MAR2024
Tata NexonEV EV 210000 320.0 22000 4.7 05DEC2021 01APR2024
Kia Seltos Diesel 530000 18.0 19000 4.4 12SEP2019 12FEB2024
Mahindra XUV700 Diesel 350000 16.5 25000 4.8 01NOV2021 28APR2024
Skoda Slavia Petrol 90000 18.7 23000 4.4 15MAR2022 10MAY2024
Volkswagen Virtus Petrol 80000 18.9 24000 4.3 20APR2022 18MAY2024
MG Hector Petrol 260000 15.8 21000 4.2 10JAN2020 22MAR2024
Renault Kiger Petrol 150000 19.0 11000 4.0 05FEB2021 10FEB2024
Hyundai Ioniq5 EV 30000 450.0 45000 4.9 01JUL2023 01JUN2024
;
run;
proc print data=work.cars_raw;
run;
OUTPUT:
| Obs | Brand | Model | Engine_Type | Launch_Date | Review_Date | Analysis_Date | Next_Service_Date | Five_Year_Anniv | Price | Sales_2024 | Mileage | Safety_Rating | Years_Since_Launch | Months_Between_Launch_Review |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | Toyota | Corolla | Petrol | 15JAN2018 | 10MAR2024 | 31DEC2024 | 10SEP2024 | 15JAN2023 | 18,000.00 | 980000 | 18.5 | 4.5 | 6 | 74 |
| 2 | Honda | City | Petrol | 10JUN2019 | 05FEB2024 | 31DEC2024 | 05AUG2024 | 10JUN2024 | 16,000.00 | 620000 | 17.8 | 4.3 | 5 | 56 |
| 3 | Maruti | Swift | Petrol | 01APR2017 | 20JAN2024 | 31DEC2024 | 20JUL2024 | 01APR2022 | 9,000.00 | 850000 | 21.1 | 4.2 | 7 | 81 |
| 4 | Hyundai | Creta | Diesel | 20AUG2020 | 15MAR2024 | 31DEC2024 | 15SEP2024 | 20AUG2025 | 20,000.00 | 740000 | 19.5 | 4.6 | 4 | 43 |
| 5 | Tata | NexonEV | EV | 05DEC2021 | 01APR2024 | 31DEC2024 | 01OCT2024 | 05DEC2026 | 22,000.00 | 210000 | 320.0 | 4.7 | 3 | 28 |
| 6 | Kia | Seltos | Diesel | 12SEP2019 | 12FEB2024 | 31DEC2024 | 12AUG2024 | 12SEP2024 | 19,000.00 | 530000 | 18.0 | 4.4 | 5 | 53 |
| 7 | Mahindra | XUV700 | Diesel | 01NOV2021 | 28APR2024 | 31DEC2024 | 28OCT2024 | 01NOV2026 | 25,000.00 | 350000 | 16.5 | 4.8 | 3 | 29 |
| 8 | Skoda | Slavia | Petrol | 15MAR2022 | 10MAY2024 | 31DEC2024 | 10NOV2024 | 15MAR2027 | 23,000.00 | 90000 | 18.7 | 4.4 | 2 | 26 |
| 9 | Volkswagen | Virtus | Petrol | 20APR2022 | 18MAY2024 | 31DEC2024 | 18NOV2024 | 20APR2027 | 24,000.00 | 80000 | 18.9 | 4.3 | 2 | 25 |
| 10 | MG | Hector | Petrol | 10JAN2020 | 22MAR2024 | 31DEC2024 | 22SEP2024 | 10JAN2025 | 21,000.00 | 260000 | 15.8 | 4.2 | 4 | 50 |
| 11 | Renault | Kiger | Petrol | 05FEB2021 | 10FEB2024 | 31DEC2024 | 10AUG2024 | 05FEB2026 | 11,000.00 | 150000 | 19.0 | 4.0 | 3 | 36 |
| 12 | Hyundai | Ioniq5 | EV | 01JUL2023 | 01JUN2024 | 31DEC2024 | 01DEC2024 | 01JUL2028 | 45,000.00 | 30000 | 450.0 | 4.9 | 1 | 11 |
2. PROC SQL SUMMARY BY BRAND AND ENGINE TYPE
proc sql;
create table work.cars_brand_summary as
select Brand,
Engine_Type,
count(*) as Models_Count,
sum(Sales_2024) as Total_Sales_2024,
mean(Mileage) as Avg_Mileage,
mean(Price) as Avg_Price,
mean(Safety_Rating) as Avg_Safety_Rating
from work.cars_raw
group by Brand, Engine_Type
order by Total_Sales_2024 desc;
quit;
proc print data=work.cars_brand_summary;
run;
OUTPUT:
| Obs | Brand | Engine_Type | Models_Count | Total_Sales_2024 | Avg_Mileage | Avg_Price | Avg_Safety_Rating |
|---|---|---|---|---|---|---|---|
| 1 | Toyota | Petrol | 1 | 980000 | 18.5 | 18000 | 4.5 |
| 2 | Maruti | Petrol | 1 | 850000 | 21.1 | 9000 | 4.2 |
| 3 | Hyundai | Diesel | 1 | 740000 | 19.5 | 20000 | 4.6 |
| 4 | Honda | Petrol | 1 | 620000 | 17.8 | 16000 | 4.3 |
| 5 | Kia | Diesel | 1 | 530000 | 18.0 | 19000 | 4.4 |
| 6 | Mahindra | Diesel | 1 | 350000 | 16.5 | 25000 | 4.8 |
| 7 | MG | Petrol | 1 | 260000 | 15.8 | 21000 | 4.2 |
| 8 | Tata | EV | 1 | 210000 | 320.0 | 22000 | 4.7 |
| 9 | Renault | Petrol | 1 | 150000 | 19.0 | 11000 | 4.0 |
| 10 | Skoda | Petrol | 1 | 90000 | 18.7 | 23000 | 4.4 |
| 11 | Volkswagen | Petrol | 1 | 80000 | 18.9 | 24000 | 4.3 |
| 12 | Hyundai | EV | 1 | 30000 | 450.0 | 45000 | 4.9 |
3. MACRO FOR SALES RANKING USING PROC SQL (MODEL LEVEL)
%macro rank_sales(in=work.cars_raw, out=work.cars_ranked);
proc sql;
create table &out as
select a.*,
/* Rank based on descending Sales_2024 using a correlated subquery */
(select count(*)
from &in b
where b.Sales_2024 >= a.Sales_2024) as Sales_Rank
from &in as a
order by Sales_2024 desc;
quit;
proc print data=&out;
run;
%mend rank_sales;
%rank_sales();
OUTPUT:
| Obs | Brand | Model | Engine_Type | Launch_Date | Review_Date | Analysis_Date | Next_Service_Date | Five_Year_Anniv | Price | Sales_2024 | Mileage | Safety_Rating | Years_Since_Launch | Months_Between_Launch_Review | Sales_Rank |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | Toyota | Corolla | Petrol | 15JAN2018 | 10MAR2024 | 31DEC2024 | 10SEP2024 | 15JAN2023 | 18,000.00 | 980000 | 18.5 | 4.5 | 6 | 74 | 1 |
| 2 | Maruti | Swift | Petrol | 01APR2017 | 20JAN2024 | 31DEC2024 | 20JUL2024 | 01APR2022 | 9,000.00 | 850000 | 21.1 | 4.2 | 7 | 81 | 2 |
| 3 | Hyundai | Creta | Diesel | 20AUG2020 | 15MAR2024 | 31DEC2024 | 15SEP2024 | 20AUG2025 | 20,000.00 | 740000 | 19.5 | 4.6 | 4 | 43 | 3 |
| 4 | Honda | City | Petrol | 10JUN2019 | 05FEB2024 | 31DEC2024 | 05AUG2024 | 10JUN2024 | 16,000.00 | 620000 | 17.8 | 4.3 | 5 | 56 | 4 |
| 5 | Kia | Seltos | Diesel | 12SEP2019 | 12FEB2024 | 31DEC2024 | 12AUG2024 | 12SEP2024 | 19,000.00 | 530000 | 18.0 | 4.4 | 5 | 53 | 5 |
| 6 | Mahindra | XUV700 | Diesel | 01NOV2021 | 28APR2024 | 31DEC2024 | 28OCT2024 | 01NOV2026 | 25,000.00 | 350000 | 16.5 | 4.8 | 3 | 29 | 6 |
| 7 | MG | Hector | Petrol | 10JAN2020 | 22MAR2024 | 31DEC2024 | 22SEP2024 | 10JAN2025 | 21,000.00 | 260000 | 15.8 | 4.2 | 4 | 50 | 7 |
| 8 | Tata | NexonEV | EV | 05DEC2021 | 01APR2024 | 31DEC2024 | 01OCT2024 | 05DEC2026 | 22,000.00 | 210000 | 320.0 | 4.7 | 3 | 28 | 8 |
| 9 | Renault | Kiger | Petrol | 05FEB2021 | 10FEB2024 | 31DEC2024 | 10AUG2024 | 05FEB2026 | 11,000.00 | 150000 | 19.0 | 4.0 | 3 | 36 | 9 |
| 10 | Skoda | Slavia | Petrol | 15MAR2022 | 10MAY2024 | 31DEC2024 | 10NOV2024 | 15MAR2027 | 23,000.00 | 90000 | 18.7 | 4.4 | 2 | 26 | 10 |
| 11 | Volkswagen | Virtus | Petrol | 20APR2022 | 18MAY2024 | 31DEC2024 | 18NOV2024 | 20APR2027 | 24,000.00 | 80000 | 18.9 | 4.3 | 2 | 25 | 11 |
| 12 | Hyundai | Ioniq5 | EV | 01JUL2023 | 01JUN2024 | 31DEC2024 | 01DEC2024 | 01JUL2028 | 45,000.00 | 30000 | 450.0 | 4.9 | 1 | 11 | 12 |
4. PROC MEANS: OVERALL NUMERIC SUMMARY
proc means data=work.cars_ranked n mean std min max maxdec=2;
var Sales_2024 Mileage Price Safety_Rating Years_Since_Launch;
run;
OUTPUT:
The MEANS Procedure
| Variable | N | Mean | Std Dev | Minimum | Maximum |
|---|---|---|---|---|---|
Sales_2024 Mileage Price Safety_Rating Years_Since_Launch | 12 12 12 12 12 | 407500.00 79.48 21083.33 4.44 3.75 | 326861.80 145.38 8989.47 0.27 1.76 | 30000.00 15.80 9000.00 4.00 1.00 | 980000.00 450.00 45000.00 4.90 7.00 |
5. PROC STANDARD: STANDARDIZE SALES, MILEAGE, AND PRICE
proc standard data=work.cars_ranked mean=0 std=1 out=work.cars_standardized;
var Sales_2024 Mileage Price;
run;
proc print data=work.cars_standardized;
var Sales_2024 Mileage Price;
run;
OUTPUT:
| Obs | Sales_2024 | Mileage | Price |
|---|---|---|---|
| 1 | 1.75150 | -0.41948 | -0.34 |
| 2 | 1.35378 | -0.40159 | -1.34 |
| 3 | 1.01725 | -0.41260 | -0.12 |
| 4 | 0.65012 | -0.42429 | -0.57 |
| 5 | 0.37478 | -0.42292 | -0.23 |
| 6 | -0.17592 | -0.43323 | 0.44 |
| 7 | -0.45126 | -0.43805 | -0.01 |
| 8 | -0.60423 | 1.65441 | 0.10 |
| 9 | -0.78779 | -0.41604 | -1.12 |
| 10 | -0.97136 | -0.41810 | 0.21 |
| 11 | -1.00195 | -0.41673 | 0.32 |
| 12 | -1.15492 | 2.54862 | 2.66 |
6. MACRO FOR AUTOMATED SUMMARIES BY ANY CLASS VARIABLE
%macro summary_by(classvar=);
title "SUMMARY OF CAR METRICS BY &classvar";
proc means data=work.cars_ranked mean std min max maxdec=2;
class &classvar;
var Sales_2024 Mileage Price Safety_Rating;
run;
title;
%mend summary_by;
%summary_by(classvar=Engine_Type);
OUTPUT:
The MEANS Procedure
| Engine_Type | N Obs | Variable | Mean | Std Dev | Minimum | Maximum |
|---|---|---|---|---|---|---|
| Diesel | 3 | Sales_2024 Mileage Price Safety_Rating | 540000.00 18.00 21333.33 4.60 | 195192.21 1.50 3214.55 0.20 | 350000.00 16.50 19000.00 4.40 | 740000.00 19.50 25000.00 4.80 |
| EV | 2 | Sales_2024 Mileage Price Safety_Rating | 120000.00 385.00 33500.00 4.80 | 127279.22 91.92 16263.46 0.14 | 30000.00 320.00 22000.00 4.70 | 210000.00 450.00 45000.00 4.90 |
| Petrol | 7 | Sales_2024 Mileage Price Safety_Rating | 432857.14 18.54 17428.57 4.27 | 378669.35 1.58 5798.19 0.16 | 80000.00 15.80 9000.00 4.00 | 980000.00 21.10 24000.00 4.50 |
%summary_by(classvar=Brand);
OUTPUT:
The MEANS Procedure
| Brand | N Obs | Variable | Mean | Std Dev | Minimum | Maximum |
|---|---|---|---|---|---|---|
| Honda | 1 | Sales_2024 Mileage Price Safety_Rating | 620000.00 17.80 16000.00 4.30 | . . . . | 620000.00 17.80 16000.00 4.30 | 620000.00 17.80 16000.00 4.30 |
| Hyundai | 2 | Sales_2024 Mileage Price Safety_Rating | 385000.00 234.75 32500.00 4.75 | 502045.81 304.41 17677.67 0.21 | 30000.00 19.50 20000.00 4.60 | 740000.00 450.00 45000.00 4.90 |
| Kia | 1 | Sales_2024 Mileage Price Safety_Rating | 530000.00 18.00 19000.00 4.40 | . . . . | 530000.00 18.00 19000.00 4.40 | 530000.00 18.00 19000.00 4.40 |
| MG | 1 | Sales_2024 Mileage Price Safety_Rating | 260000.00 15.80 21000.00 4.20 | . . . . | 260000.00 15.80 21000.00 4.20 | 260000.00 15.80 21000.00 4.20 |
| Mahindra | 1 | Sales_2024 Mileage Price Safety_Rating | 350000.00 16.50 25000.00 4.80 | . . . . | 350000.00 16.50 25000.00 4.80 | 350000.00 16.50 25000.00 4.80 |
| Maruti | 1 | Sales_2024 Mileage Price Safety_Rating | 850000.00 21.10 9000.00 4.20 | . . . . | 850000.00 21.10 9000.00 4.20 | 850000.00 21.10 9000.00 4.20 |
| Renault | 1 | Sales_2024 Mileage Price Safety_Rating | 150000.00 19.00 11000.00 4.00 | . . . . | 150000.00 19.00 11000.00 4.00 | 150000.00 19.00 11000.00 4.00 |
| Skoda | 1 | Sales_2024 Mileage Price Safety_Rating | 90000.00 18.70 23000.00 4.40 | . . . . | 90000.00 18.70 23000.00 4.40 | 90000.00 18.70 23000.00 4.40 |
| Tata | 1 | Sales_2024 Mileage Price Safety_Rating | 210000.00 320.00 22000.00 4.70 | . . . . | 210000.00 320.00 22000.00 4.70 | 210000.00 320.00 22000.00 4.70 |
| Toyota | 1 | Sales_2024 Mileage Price Safety_Rating | 980000.00 18.50 18000.00 4.50 | . . . . | 980000.00 18.50 18000.00 4.50 | 980000.00 18.50 18000.00 4.50 |
| Volkswagen | 1 | Sales_2024 Mileage Price Safety_Rating | 80000.00 18.90 24000.00 4.30 | . . . . | 80000.00 18.90 24000.00 4.30 | 80000.00 18.90 24000.00 4.30 |
7. PROC SGPLOT VISUALIZATIONS
/* Bar chart: Brand vs Sales_2024 */
proc sgplot data=work.cars_ranked;
vbar Brand / response=Sales_2024 datalabel;
xaxis discreteorder=data label="Brand";
yaxis label="Sales in 2024 (Units)";
run;
OUTPUT:
/* Scatter plot: Mileage vs Price, colored by Engine_Type */
proc sgplot data=work.cars_ranked;
scatter x=Mileage y=Price / group=Engine_Type datalabel=Model;
xaxis label="Mileage (km/L or km/charge)";
yaxis label="Price";
run;
OUTPUT:
Comments
Post a Comment