AGRICULTURAL MACHINERY DATASET CREATION WITH DATA STEP | PROC SQL | PROC MEANS | PROC FREQ | PROC CORR | MACROS FOR PERFORMANCE & DATE CALCULATIONS
options nocenter;
STEP 1: CREATE BASE DATASET WITH 12 AGRICULTURAL MACHINES
data agri_machines_raw;
length Machine_Name $30 Brand $20 Fuel_Type $10 Maintenance_Level $10;
input Machine_Name $ Brand $ Fuel_Type $ Efficiency Cost Usage_Hours
Purchase_Date_char:$10. Last_Service_char:$10. Maintenance_Level $;
datalines;
Tractor_MidRange JohnDeere Diesel 85 1200000 2200 15JAN2018 10JAN2024 High
Tractor_Compact Mahindra Diesel 78 800000 1800 20JUN2019 15DEC2023 Medium
Combine_Harvester NewHolland Diesel 90 3500000 2500 01APR2017 20NOV2023 High
Rotavator_Heavy Kubota Diesel 82 450000 1600 10SEP2020 05FEB2024 Medium
Seed_Drill_Pneumatic Mahindra Diesel 88 600000 1900 25NOV2020 01JAN2024 Medium
Boom_Sprayer_Trailed CLAAS Diesel 80 1100000 2000 12AUG2018 18OCT2023 Medium
Baler_Round JohnDeere Diesel 87 1500000 2100 05MAY2016 22DEC2023 High
Thresher_Stationary LocalBrand Electric 75 300000 1400 30JAN2021 27JAN2024 Medium
Power_Tiller_Compact VST Diesel 70 250000 1300 18JUL2022 03MAR2024 Low
Rice_Transplanter Kubota Diesel 83 900000 1750 09JUN2019 28NOV2023 Medium
Sugarcane_Harvester CaseIH Diesel 92 5500000 2700 21FEB2016 15SEP2023 High
Potato_Digger_Trailed LocalBrand Petrol 68 350000 1200 11OCT2022 10FEB2024 Low
;
run;
proc print data=agri_machines_raw;
run;
OUTPUT:
| Obs | Machine_Name | Brand | Fuel_Type | Maintenance_Level | Efficiency | Cost | Usage_Hours | Purchase_Date_char | Last_Service_char |
|---|---|---|---|---|---|---|---|---|---|
| 1 | Tractor_MidRange | JohnDeere | Diesel | High | 85 | 1200000 | 2200 | 15JAN2018 | 10JAN2024 |
| 2 | Tractor_Compact | Mahindra | Diesel | Medium | 78 | 800000 | 1800 | 20JUN2019 | 15DEC2023 |
| 3 | Combine_Harvester | NewHolland | Diesel | High | 90 | 3500000 | 2500 | 01APR2017 | 20NOV2023 |
| 4 | Rotavator_Heavy | Kubota | Diesel | Medium | 82 | 450000 | 1600 | 10SEP2020 | 05FEB2024 |
| 5 | Seed_Drill_Pneumatic | Mahindra | Diesel | Medium | 88 | 600000 | 1900 | 25NOV2020 | 01JAN2024 |
| 6 | Boom_Sprayer_Trailed | CLAAS | Diesel | Medium | 80 | 1100000 | 2000 | 12AUG2018 | 18OCT2023 |
| 7 | Baler_Round | JohnDeere | Diesel | High | 87 | 1500000 | 2100 | 05MAY2016 | 22DEC2023 |
| 8 | Thresher_Stationary | LocalBrand | Electric | Medium | 75 | 300000 | 1400 | 30JAN2021 | 27JAN2024 |
| 9 | Power_Tiller_Compact | VST | Diesel | Low | 70 | 250000 | 1300 | 18JUL2022 | 03MAR2024 |
| 10 | Rice_Transplanter | Kubota | Diesel | Medium | 83 | 900000 | 1750 | 09JUN2019 | 28NOV2023 |
| 11 | Sugarcane_Harvester | CaseIH | Diesel | High | 92 | 5500000 | 2700 | 21FEB2016 | 15SEP2023 |
| 12 | Potato_Digger_Trailed | LocalBrand | Petrol | Low | 68 | 350000 | 1200 | 11OCT2022 | 10FEB2024 |
STEP 2: USE PROC SQL TO CONVERT CHAR DATES TO SAS DATES AND KEEP MAINTENANCE_LEVEL
proc sql;
create table agri_machines as
select Machine_Name,Brand,Fuel_Type,Efficiency,Cost,Usage_Hours,
input(Purchase_Date_char, date9.) as Purchase_Date format=date9.,
input(Last_Service_char , date9.) as Last_Service_Date format=date9.,
Maintenance_Level
from agri_machines_raw;
quit;
proc print data=agri_machines;
run;
OUTPUT:
| Obs | Machine_Name | Brand | Fuel_Type | Efficiency | Cost | Usage_Hours | Purchase_Date | Last_Service_Date | Maintenance_Level |
|---|---|---|---|---|---|---|---|---|---|
| 1 | Tractor_MidRange | JohnDeere | Diesel | 85 | 1200000 | 2200 | 15JAN2018 | 10JAN2024 | High |
| 2 | Tractor_Compact | Mahindra | Diesel | 78 | 800000 | 1800 | 20JUN2019 | 15DEC2023 | Medium |
| 3 | Combine_Harvester | NewHolland | Diesel | 90 | 3500000 | 2500 | 01APR2017 | 20NOV2023 | High |
| 4 | Rotavator_Heavy | Kubota | Diesel | 82 | 450000 | 1600 | 10SEP2020 | 05FEB2024 | Medium |
| 5 | Seed_Drill_Pneumatic | Mahindra | Diesel | 88 | 600000 | 1900 | 25NOV2020 | 01JAN2024 | Medium |
| 6 | Boom_Sprayer_Trailed | CLAAS | Diesel | 80 | 1100000 | 2000 | 12AUG2018 | 18OCT2023 | Medium |
| 7 | Baler_Round | JohnDeere | Diesel | 87 | 1500000 | 2100 | 05MAY2016 | 22DEC2023 | High |
| 8 | Thresher_Stationary | LocalBrand | Electric | 75 | 300000 | 1400 | 30JAN2021 | 27JAN2024 | Medium |
| 9 | Power_Tiller_Compact | VST | Diesel | 70 | 250000 | 1300 | 18JUL2022 | 03MAR2024 | Low |
| 10 | Rice_Transplanter | Kubota | Diesel | 83 | 900000 | 1750 | 09JUN2019 | 28NOV2023 | Medium |
| 11 | Sugarcane_Harvester | CaseIH | Diesel | 92 | 5500000 | 2700 | 21FEB2016 | 15SEP2023 | High |
| 12 | Potato_Digger_Trailed | LocalBrand | Petrol | 68 | 350000 | 1200 | 11OCT2022 | 10FEB2024 | Low |
STEP 3: DEFINE MACROS FOR PERFORMANCE GRADING AND SERVICE SCHEDULING USING INTCK / INTNX
/* Macro to grade performance based on efficiency and usage */
%macro perf_grade(efficiency, usage);
length Performance_Grade $1;
if &efficiency >= 90 and &usage >= 2200 then Performance_Grade = 'A';
else if &efficiency >= 80 and &usage >= 1700 then Performance_Grade = 'B';
else if &efficiency >= 70 and &usage >= 1300 then Performance_Grade = 'C';
else Performance_Grade = 'D';
%mend;
/* Macro to compute date-based metrics using INTCK and INTNX */
%macro service_schedule(purchase, last_service);
/* Age in years from purchase date to today */
Age_Years = intck('year', &purchase, today());
/* Months since last service */
Months_Since_Service = intck('month', &last_service, today());
/* Next service due 6 months after last service */
Next_Service_Due = intnx('month', &last_service, 6, 'same');
format Next_Service_Due date9.;
%mend;
STEP 4: CREATE FINAL ANALYTICAL DATASET WITH MACROS
data agri_machines_final;
set agri_machines;
/* Call macro for performance grading */
%perf_grade(Efficiency, Usage_Hours);
/* Call macro for date-based calculations */
%service_schedule(Purchase_Date, Last_Service_Date);
/* Simple numeric coding for Maintenance_Level */
length Maint_Level_Code 8;
if Maintenance_Level = '' then do;
/* Derive level based on Usage_Hours as example rule */
if Usage_Hours >= 2300 then Maintenance_Level = 'High';
else if Usage_Hours >= 1700 then Maintenance_Level = 'Medium';
else Maintenance_Level = 'Low';
end;
if Maintenance_Level = 'High' then Maint_Level_Code = 3;
else if Maintenance_Level = 'Medium' then Maint_Level_Code = 2;
else if Maintenance_Level = 'Low' then Maint_Level_Code = 1;
run;
proc print data=agri_machines_final;
run;
OUTPUT:
| Obs | Machine_Name | Brand | Fuel_Type | Efficiency | Cost | Usage_Hours | Purchase_Date | Last_Service_Date | Maintenance_Level | Performance_Grade | Age_Years | Months_Since_Service | Next_Service_Due | Maint_Level_Code |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | Tractor_MidRange | JohnDeere | Diesel | 85 | 1200000 | 2200 | 15JAN2018 | 10JAN2024 | High | B | 7 | 23 | 10JUL2024 | 3 |
| 2 | Tractor_Compact | Mahindra | Diesel | 78 | 800000 | 1800 | 20JUN2019 | 15DEC2023 | Medium | C | 6 | 24 | 15JUN2024 | 2 |
| 3 | Combine_Harvester | NewHolland | Diesel | 90 | 3500000 | 2500 | 01APR2017 | 20NOV2023 | High | A | 8 | 25 | 20MAY2024 | 3 |
| 4 | Rotavator_Heavy | Kubota | Diesel | 82 | 450000 | 1600 | 10SEP2020 | 05FEB2024 | Medium | C | 5 | 22 | 05AUG2024 | 2 |
| 5 | Seed_Drill_Pneumatic | Mahindra | Diesel | 88 | 600000 | 1900 | 25NOV2020 | 01JAN2024 | Medium | B | 5 | 23 | 01JUL2024 | 2 |
| 6 | Boom_Sprayer_Trailed | CLAAS | Diesel | 80 | 1100000 | 2000 | 12AUG2018 | 18OCT2023 | Medium | B | 7 | 26 | 18APR2024 | 2 |
| 7 | Baler_Round | JohnDeere | Diesel | 87 | 1500000 | 2100 | 05MAY2016 | 22DEC2023 | High | B | 9 | 24 | 22JUN2024 | 3 |
| 8 | Thresher_Stationary | LocalBrand | Electric | 75 | 300000 | 1400 | 30JAN2021 | 27JAN2024 | Medium | C | 4 | 23 | 27JUL2024 | 2 |
| 9 | Power_Tiller_Compact | VST | Diesel | 70 | 250000 | 1300 | 18JUL2022 | 03MAR2024 | Low | C | 3 | 21 | 03SEP2024 | 1 |
| 10 | Rice_Transplanter | Kubota | Diesel | 83 | 900000 | 1750 | 09JUN2019 | 28NOV2023 | Medium | B | 6 | 25 | 28MAY2024 | 2 |
| 11 | Sugarcane_Harvester | CaseIH | Diesel | 92 | 5500000 | 2700 | 21FEB2016 | 15SEP2023 | High | A | 9 | 27 | 15MAR2024 | 3 |
| 12 | Potato_Digger_Trailed | LocalBrand | Petrol | 68 | 350000 | 1200 | 11OCT2022 | 10FEB2024 | Low | D | 3 | 22 | 10AUG2024 | 1 |
STEP 5: PROC MEANS – SUMMARY STATISTICS
proc means data=agri_machines_final n mean std min max;
var Efficiency Cost Usage_Hours Age_Years Months_Since_Service;
class Fuel_Type;
run;
OUTPUT:
The MEANS Procedure
| Fuel_Type | N Obs | Variable | N | Mean | Std Dev | Minimum | Maximum |
|---|---|---|---|---|---|---|---|
| Diesel | 10 | Efficiency Cost Usage_Hours Age_Years Months_Since_Service | 10 10 10 10 10 | 83.5000000 1580000.00 1985.00 6.5000000 24.0000000 | 6.4678693 1650286.17 415.0301194 1.9002924 1.8257419 | 70.0000000 250000.00 1300.00 3.0000000 21.0000000 | 92.0000000 5500000.00 2700.00 9.0000000 27.0000000 |
| Electric | 1 | Efficiency Cost Usage_Hours Age_Years Months_Since_Service | 1 1 1 1 1 | 75.0000000 300000.00 1400.00 4.0000000 23.0000000 | . . . . . | 75.0000000 300000.00 1400.00 4.0000000 23.0000000 | 75.0000000 300000.00 1400.00 4.0000000 23.0000000 |
| Petrol | 1 | Efficiency Cost Usage_Hours Age_Years Months_Since_Service | 1 1 1 1 1 | 68.0000000 350000.00 1200.00 3.0000000 22.0000000 | . . . . . | 68.0000000 350000.00 1200.00 3.0000000 22.0000000 | 68.0000000 350000.00 1200.00 3.0000000 22.0000000 |
STEP 6: PROC FREQ – CATEGORICAL DISTRIBUTIONS
proc freq data=agri_machines_final;
tables Brand Fuel_Type Maintenance_Level Performance_Grade / nocum nopercent;
run;
OUTPUT:
The FREQ Procedure
| Brand | Frequency |
|---|---|
| CLAAS | 1 |
| CaseIH | 1 |
| JohnDeere | 2 |
| Kubota | 2 |
| LocalBrand | 2 |
| Mahindra | 2 |
| NewHolland | 1 |
| VST | 1 |
| Fuel_Type | Frequency |
|---|---|
| Diesel | 10 |
| Electric | 1 |
| Petrol | 1 |
| Maintenance_Level | Frequency |
|---|---|
| High | 4 |
| Low | 2 |
| Medium | 6 |
| Performance_Grade | Frequency |
|---|---|
| A | 2 |
| B | 5 |
| C | 4 |
| D | 1 |
STEP 7: PROC CORR – CORRELATION BETWEEN NUMERIC VARIABLES
proc corr data=agri_machines_final nosimple;
var Efficiency Cost Usage_Hours Age_Years Maint_Level_Code;
run;
OUTPUT:
The CORR Procedure
| 5 Variables: | Efficiency Cost Usage_Hours Age_Years Maint_Level_Code |
|---|
| Pearson Correlation Coefficients, N = 12 Prob > |r| under H0: Rho=0 | |||||
|---|---|---|---|---|---|
| Efficiency | Cost | Usage_Hours | Age_Years | Maint_Level_Code | |
| Efficiency | 1.00000 | 0.69319 0.0124 | 0.90328 <.0001 | 0.84375 0.0006 | 0.87939 0.0002 |
| Cost | 0.69319 0.0124 | 1.00000 | 0.85796 0.0004 | 0.74394 0.0055 | 0.67402 0.0162 |
| Usage_Hours | 0.90328 <.0001 | 0.85796 0.0004 | 1.00000 | 0.92227 <.0001 | 0.89245 <.0001 |
| Age_Years | 0.84375 0.0006 | 0.74394 0.0055 | 0.92227 <.0001 | 1.00000 | 0.90951 <.0001 |
| Maint_Level_Code | 0.87939 0.0002 | 0.67402 0.0162 | 0.89245 <.0001 | 0.90951 <.0001 | 1.00000 |
STEP 8: PROC SQL – EXAMPLE QUERY FOR HIGH PERFORMANCE MACHINES
proc sql;
select Machine_Name,Brand,Fuel_Type,Efficiency,Usage_Hours,Performance_Grade,
Age_Years,Months_Since_Service,Next_Service_Due
from agri_machines_final
where Performance_Grade in ('A', 'B')
order by Efficiency desc, Usage_Hours desc;
quit;
| Machine_Name | Brand | Fuel_Type | Efficiency | Usage_Hours | Performance_Grade | Age_Years | Months_Since_Service | Next_Service_Due |
|---|---|---|---|---|---|---|---|---|
| Sugarcane_Harvester | CaseIH | Diesel | 92 | 2700 | A | 9 | 27 | 15MAR2024 |
| Combine_Harvester | NewHolland | Diesel | 90 | 2500 | A | 8 | 25 | 20MAY2024 |
| Seed_Drill_Pneumatic | Mahindra | Diesel | 88 | 1900 | B | 5 | 23 | 01JUL2024 |
| Baler_Round | JohnDeere | Diesel | 87 | 2100 | B | 9 | 24 | 22JUN2024 |
| Tractor_MidRange | JohnDeere | Diesel | 85 | 2200 | B | 7 | 23 | 10JUL2024 |
| Rice_Transplanter | Kubota | Diesel | 83 | 1750 | B | 6 | 25 | 28MAY2024 |
| Boom_Sprayer_Trailed | CLAAS | Diesel | 80 | 2000 | B | 7 | 26 | 18APR2024 |
No comments:
Post a Comment