POLLUTION SOURCES DATA ANALYSIS USING SAS DATA STEP | PROC SQL | PROC MEANS | PROC REG | PROC SGPLOT | MACROS | DATE FUNCTIONS | APPEND | MERGE | TRANSPOSE
options nocenter;
1.POLLUTION MAIN DATASET CREATION
data pollution_main;
length Source_Type $20 City $15;
format Report_Date date9.;
input Source_Type $ City $ Volume Impact_Level Reduction_Cost Risk_Score
Report_Date : date9.;
datalines;
Vehicle_Emissions Delhi 85 9 1200 88 01JAN2024
Industrial_Waste Mumbai 92 10 1800 94 15JAN2024
Construction_Dust Hyderabad 70 7 900 72 01FEB2024
Power_Plants Chennai 88 9 1600 90 20FEB2024
Waste_Burning Kolkata 76 8 1000 80 05MAR2024
Vehicular_Traffic Bengaluru 82 8 1300 85 18MAR2024
Chemical_Leak Pune 90 10 2000 96 01APR2024
Mining_Activity Ranchi 78 7 1100 75 10APR2024
Oil_Refinery Jamnagar 95 10 2200 98 25APR2024
Plastic_Waste Surat 65 6 700 60 05MAY2024
Thermal_Plant Korba 89 9 1700 92 15MAY2024
Brick_Kilns Patna 72 7 950 74 01JUN2024
Traffic_Congestion Noida 84 8 1250 86 10JUN2024
;
run;
proc print data=pollution_main;
run;
OUTPUT:
| Obs | Source_Type | City | Report_Date | Volume | Impact_Level | Reduction_Cost | Risk_Score |
|---|---|---|---|---|---|---|---|
| 1 | Vehicle_Emissions | Delhi | 01JAN2024 | 85 | 9 | 1200 | 88 |
| 2 | Industrial_Waste | Mumbai | 15JAN2024 | 92 | 10 | 1800 | 94 |
| 3 | Construction_Dust | Hyderabad | 01FEB2024 | 70 | 7 | 900 | 72 |
| 4 | Power_Plants | Chennai | 20FEB2024 | 88 | 9 | 1600 | 90 |
| 5 | Waste_Burning | Kolkata | 05MAR2024 | 76 | 8 | 1000 | 80 |
| 6 | Vehicular_Traffic | Bengaluru | 18MAR2024 | 82 | 8 | 1300 | 85 |
| 7 | Chemical_Leak | Pune | 01APR2024 | 90 | 10 | 2000 | 96 |
| 8 | Mining_Activity | Ranchi | 10APR2024 | 78 | 7 | 1100 | 75 |
| 9 | Oil_Refinery | Jamnagar | 25APR2024 | 95 | 10 | 2200 | 98 |
| 10 | Plastic_Waste | Surat | 05MAY2024 | 65 | 6 | 700 | 60 |
| 11 | Thermal_Plant | Korba | 15MAY2024 | 89 | 9 | 1700 | 92 |
| 12 | Brick_Kilns | Patna | 01JUN2024 | 72 | 7 | 950 | 74 |
| 13 | Traffic_Congestion | Noida | 10JUN2024 | 84 | 8 | 1250 | 86 |
2.DATE FUNCTION DERIVATIONS (INTNX, INTCK, MDY)
data pollution_dates;
set pollution_main;
Next_Report = intnx('month', Report_Date, 1, 'same');
Days_Since_Report = intck('day', Report_Date, today());
Year_Start = mdy(1,1,year(Report_Date));
format Next_Report Year_Start date9.;
run;
proc print data=pollution_dates;
run;
OUTPUT:
| Obs | Source_Type | City | Report_Date | Volume | Impact_Level | Reduction_Cost | Risk_Score | Next_Report | Days_Since_Report | Year_Start |
|---|---|---|---|---|---|---|---|---|---|---|
| 1 | Vehicle_Emissions | Delhi | 01JAN2024 | 85 | 9 | 1200 | 88 | 01FEB2024 | 726 | 01JAN2024 |
| 2 | Industrial_Waste | Mumbai | 15JAN2024 | 92 | 10 | 1800 | 94 | 15FEB2024 | 712 | 01JAN2024 |
| 3 | Construction_Dust | Hyderabad | 01FEB2024 | 70 | 7 | 900 | 72 | 01MAR2024 | 695 | 01JAN2024 |
| 4 | Power_Plants | Chennai | 20FEB2024 | 88 | 9 | 1600 | 90 | 20MAR2024 | 676 | 01JAN2024 |
| 5 | Waste_Burning | Kolkata | 05MAR2024 | 76 | 8 | 1000 | 80 | 05APR2024 | 662 | 01JAN2024 |
| 6 | Vehicular_Traffic | Bengaluru | 18MAR2024 | 82 | 8 | 1300 | 85 | 18APR2024 | 649 | 01JAN2024 |
| 7 | Chemical_Leak | Pune | 01APR2024 | 90 | 10 | 2000 | 96 | 01MAY2024 | 635 | 01JAN2024 |
| 8 | Mining_Activity | Ranchi | 10APR2024 | 78 | 7 | 1100 | 75 | 10MAY2024 | 626 | 01JAN2024 |
| 9 | Oil_Refinery | Jamnagar | 25APR2024 | 95 | 10 | 2200 | 98 | 25MAY2024 | 611 | 01JAN2024 |
| 10 | Plastic_Waste | Surat | 05MAY2024 | 65 | 6 | 700 | 60 | 05JUN2024 | 601 | 01JAN2024 |
| 11 | Thermal_Plant | Korba | 15MAY2024 | 89 | 9 | 1700 | 92 | 15JUN2024 | 591 | 01JAN2024 |
| 12 | Brick_Kilns | Patna | 01JUN2024 | 72 | 7 | 950 | 74 | 01JUL2024 | 574 | 01JAN2024 |
| 13 | Traffic_Congestion | Noida | 10JUN2024 | 84 | 8 | 1250 | 86 | 10JUL2024 | 565 | 01JAN2024 |
3.PROC SQL – SUMMARY AND FILTERING
proc sql;
create table high_risk_sources as
select City,Source_Type,Volume,Risk_Score
from pollution_main
where Risk_Score >= 85
order by Risk_Score desc;
quit;
proc print data=high_risk_sources;
run;
OUTPUT:
| Obs | City | Source_Type | Volume | Risk_Score |
|---|---|---|---|---|
| 1 | Jamnagar | Oil_Refinery | 95 | 98 |
| 2 | Pune | Chemical_Leak | 90 | 96 |
| 3 | Mumbai | Industrial_Waste | 92 | 94 |
| 4 | Korba | Thermal_Plant | 89 | 92 |
| 5 | Chennai | Power_Plants | 88 | 90 |
| 6 | Delhi | Vehicle_Emissions | 85 | 88 |
| 7 | Noida | Traffic_Congestion | 84 | 86 |
| 8 | Bengaluru | Vehicular_Traffic | 82 | 85 |
4.PROC MEANS – STATISTICAL SUMMARY
proc means data=pollution_main mean min max;
var Volume Reduction_Cost Risk_Score;
run;
OUTPUT:
The MEANS Procedure
| Variable | Mean | Minimum | Maximum |
|---|---|---|---|
Volume Reduction_Cost Risk_Score | 82.0000000 1361.54 83.8461538 | 65.0000000 700.0000000 60.0000000 | 95.0000000 2200.00 98.0000000 |
5.MACRO FOR SEVERITY CLASSIFICATION
%macro classify_severity;
data pollution_severity;
set pollution_main;
length Severity $12;
if Risk_Score >= 90 then Severity = 'CRITICAL';
else if Risk_Score >= 75 then Severity = 'HIGH';
else if Risk_Score >= 60 then Severity = 'MODERATE';
else Severity = 'LOW';
run;
proc print data=pollution_severity;
run;
%mend;
%classify_severity;
OUTPUT:
| Obs | Source_Type | City | Report_Date | Volume | Impact_Level | Reduction_Cost | Risk_Score | Severity |
|---|---|---|---|---|---|---|---|---|
| 1 | Vehicle_Emissions | Delhi | 01JAN2024 | 85 | 9 | 1200 | 88 | HIGH |
| 2 | Industrial_Waste | Mumbai | 15JAN2024 | 92 | 10 | 1800 | 94 | CRITICAL |
| 3 | Construction_Dust | Hyderabad | 01FEB2024 | 70 | 7 | 900 | 72 | MODERATE |
| 4 | Power_Plants | Chennai | 20FEB2024 | 88 | 9 | 1600 | 90 | CRITICAL |
| 5 | Waste_Burning | Kolkata | 05MAR2024 | 76 | 8 | 1000 | 80 | HIGH |
| 6 | Vehicular_Traffic | Bengaluru | 18MAR2024 | 82 | 8 | 1300 | 85 | HIGH |
| 7 | Chemical_Leak | Pune | 01APR2024 | 90 | 10 | 2000 | 96 | CRITICAL |
| 8 | Mining_Activity | Ranchi | 10APR2024 | 78 | 7 | 1100 | 75 | HIGH |
| 9 | Oil_Refinery | Jamnagar | 25APR2024 | 95 | 10 | 2200 | 98 | CRITICAL |
| 10 | Plastic_Waste | Surat | 05MAY2024 | 65 | 6 | 700 | 60 | MODERATE |
| 11 | Thermal_Plant | Korba | 15MAY2024 | 89 | 9 | 1700 | 92 | CRITICAL |
| 12 | Brick_Kilns | Patna | 01JUN2024 | 72 | 7 | 950 | 74 | MODERATE |
| 13 | Traffic_Congestion | Noida | 10JUN2024 | 84 | 8 | 1250 | 86 | HIGH |
6.PROC REG – RELATIONSHIP ANALYSIS
proc reg data=pollution_main;
model Risk_Score = Volume Reduction_Cost;
run;
quit;
OUTPUT:
The REG Procedure
Model: MODEL1
Dependent Variable: Risk_Score
| Number of Observations Read | 13 |
|---|---|
| Number of Observations Used | 13 |
| Analysis of Variance | |||||
|---|---|---|---|---|---|
| Source | DF | Sum of Squares | Mean Square | F Value | Pr > F |
| Model | 2 | 1412.72842 | 706.36421 | 108.73 | <.0001 |
| Error | 10 | 64.96388 | 6.49639 | ||
| Corrected Total | 12 | 1477.69231 | |||
| Root MSE | 2.54880 | R-Square | 0.9560 |
|---|---|---|---|
| Dependent Mean | 83.84615 | Adj R-Sq | 0.9472 |
| Coeff Var | 3.03985 |
| Parameter Estimates | |||||
|---|---|---|---|---|---|
| Variable | DF | Parameter Estimate | Standard Error | t Value | Pr > |t| |
| Intercept | 1 | -11.25012 | 13.19922 | -0.85 | 0.4140 |
| Volume | 1 | 1.14905 | 0.23030 | 4.99 | 0.0005 |
| Reduction_Cost | 1 | 0.00064217 | 0.00461 | 0.14 | 0.8920 |
The REG Procedure
Model: MODEL1
Dependent Variable: Risk_Score
7.PROC SGPLOT – VISUALIZATION
proc sgplot data=pollution_main;
scatter x=Volume y=Risk_Score / group=Source_Type;
reg x=Volume y=Risk_Score;
title "Pollution Volume vs Risk Score";
run;
OUTPUT:
8.PROC APPEND – ADDITIONAL DATA
data pollution_new;
length Source_Type $20 City $15;
format Report_Date date9.;
Source_Type='E_Waste';
City='Gurgaon';
Volume=68;
Impact_Level=6;
Reduction_Cost=800;
Risk_Score=65;
Report_Date='20JUN2024'd;
run;
proc print data=pollution_new;
run;
OUTPUT:
| Obs | Source_Type | City | Report_Date | Volume | Impact_Level | Reduction_Cost | Risk_Score |
|---|---|---|---|---|---|---|---|
| 1 | E_Waste | Gurgaon | 20JUN2024 | 68 | 6 | 800 | 65 |
proc append base=pollution_main
data=pollution_new;
run;
proc print data=pollution_main;
run;
OUTPUT:
| Obs | Source_Type | City | Report_Date | Volume | Impact_Level | Reduction_Cost | Risk_Score |
|---|---|---|---|---|---|---|---|
| 1 | Vehicle_Emissions | Delhi | 01JAN2024 | 85 | 9 | 1200 | 88 |
| 2 | Industrial_Waste | Mumbai | 15JAN2024 | 92 | 10 | 1800 | 94 |
| 3 | Construction_Dust | Hyderabad | 01FEB2024 | 70 | 7 | 900 | 72 |
| 4 | Power_Plants | Chennai | 20FEB2024 | 88 | 9 | 1600 | 90 |
| 5 | Waste_Burning | Kolkata | 05MAR2024 | 76 | 8 | 1000 | 80 |
| 6 | Vehicular_Traffic | Bengaluru | 18MAR2024 | 82 | 8 | 1300 | 85 |
| 7 | Chemical_Leak | Pune | 01APR2024 | 90 | 10 | 2000 | 96 |
| 8 | Mining_Activity | Ranchi | 10APR2024 | 78 | 7 | 1100 | 75 |
| 9 | Oil_Refinery | Jamnagar | 25APR2024 | 95 | 10 | 2200 | 98 |
| 10 | Plastic_Waste | Surat | 05MAY2024 | 65 | 6 | 700 | 60 |
| 11 | Thermal_Plant | Korba | 15MAY2024 | 89 | 9 | 1700 | 92 |
| 12 | Brick_Kilns | Patna | 01JUN2024 | 72 | 7 | 950 | 74 |
| 13 | Traffic_Congestion | Noida | 10JUN2024 | 84 | 8 | 1250 | 86 |
| 14 | E_Waste | Gurgaon | 20JUN2024 | 68 | 6 | 800 | 65 |
9.PROC MERGE – COST CATEGORY DATA
data cost_category;
length Source_Type $20 Cost_Class $10;
input Source_Type $ Cost_Class $;
datalines;
Vehicle_Emissions HIGH
Industrial_Waste HIGH
Construction_Dust MEDIUM
Waste_Burning MEDIUM
Plastic_Waste LOW
;
run;
proc print data=cost_category;
run;
OUTPUT:
| Obs | Source_Type | Cost_Class |
|---|---|---|
| 1 | Vehicle_Emissions | HIGH |
| 2 | Industrial_Waste | HIGH |
| 3 | Construction_Dust | MEDIUM |
| 4 | Waste_Burning | MEDIUM |
| 5 | Plastic_Waste | LOW |
proc sort data=pollution_main; by Source_Type; run;
proc print data=pollution_main;
run;
OUTPUT:
| Obs | Source_Type | City | Report_Date | Volume | Impact_Level | Reduction_Cost | Risk_Score |
|---|---|---|---|---|---|---|---|
| 1 | Brick_Kilns | Patna | 01JUN2024 | 72 | 7 | 950 | 74 |
| 2 | Chemical_Leak | Pune | 01APR2024 | 90 | 10 | 2000 | 96 |
| 3 | Construction_Dust | Hyderabad | 01FEB2024 | 70 | 7 | 900 | 72 |
| 4 | E_Waste | Gurgaon | 20JUN2024 | 68 | 6 | 800 | 65 |
| 5 | Industrial_Waste | Mumbai | 15JAN2024 | 92 | 10 | 1800 | 94 |
| 6 | Mining_Activity | Ranchi | 10APR2024 | 78 | 7 | 1100 | 75 |
| 7 | Oil_Refinery | Jamnagar | 25APR2024 | 95 | 10 | 2200 | 98 |
| 8 | Plastic_Waste | Surat | 05MAY2024 | 65 | 6 | 700 | 60 |
| 9 | Power_Plants | Chennai | 20FEB2024 | 88 | 9 | 1600 | 90 |
| 10 | Thermal_Plant | Korba | 15MAY2024 | 89 | 9 | 1700 | 92 |
| 11 | Traffic_Congestion | Noida | 10JUN2024 | 84 | 8 | 1250 | 86 |
| 12 | Vehicle_Emissions | Delhi | 01JAN2024 | 85 | 9 | 1200 | 88 |
| 13 | Vehicular_Traffic | Bengaluru | 18MAR2024 | 82 | 8 | 1300 | 85 |
| 14 | Waste_Burning | Kolkata | 05MAR2024 | 76 | 8 | 1000 | 80 |
proc sort data=cost_category; by Source_Type; run;
proc print data=cost_category;
run;
OUTPUT:
| Obs | Source_Type | Cost_Class |
|---|---|---|
| 1 | Construction_Dust | MEDIUM |
| 2 | Industrial_Waste | HIGH |
| 3 | Plastic_Waste | LOW |
| 4 | Vehicle_Emissions | HIGH |
| 5 | Waste_Burning | MEDIUM |
data pollution_merged;
merge pollution_main(in=a) cost_category(in=b);
by Source_Type;
if a;
run;
proc print data=pollution_merged;
run;
OUTPUT:
| Obs | Source_Type | City | Report_Date | Volume | Impact_Level | Reduction_Cost | Risk_Score | Cost_Class |
|---|---|---|---|---|---|---|---|---|
| 1 | Brick_Kilns | Patna | 01JUN2024 | 72 | 7 | 950 | 74 | |
| 2 | Chemical_Leak | Pune | 01APR2024 | 90 | 10 | 2000 | 96 | |
| 3 | Construction_Dust | Hyderabad | 01FEB2024 | 70 | 7 | 900 | 72 | MEDIUM |
| 4 | E_Waste | Gurgaon | 20JUN2024 | 68 | 6 | 800 | 65 | |
| 5 | Industrial_Waste | Mumbai | 15JAN2024 | 92 | 10 | 1800 | 94 | HIGH |
| 6 | Mining_Activity | Ranchi | 10APR2024 | 78 | 7 | 1100 | 75 | |
| 7 | Oil_Refinery | Jamnagar | 25APR2024 | 95 | 10 | 2200 | 98 | |
| 8 | Plastic_Waste | Surat | 05MAY2024 | 65 | 6 | 700 | 60 | LOW |
| 9 | Power_Plants | Chennai | 20FEB2024 | 88 | 9 | 1600 | 90 | |
| 10 | Thermal_Plant | Korba | 15MAY2024 | 89 | 9 | 1700 | 92 | |
| 11 | Traffic_Congestion | Noida | 10JUN2024 | 84 | 8 | 1250 | 86 | |
| 12 | Vehicle_Emissions | Delhi | 01JAN2024 | 85 | 9 | 1200 | 88 | HIGH |
| 13 | Vehicular_Traffic | Bengaluru | 18MAR2024 | 82 | 8 | 1300 | 85 | |
| 14 | Waste_Burning | Kolkata | 05MAR2024 | 76 | 8 | 1000 | 80 | MEDIUM |
10.PROC TRANSPOSE – CITY-WISE VIEW
proc transpose data=pollution_main out=city_volume prefix=Volume_;
by City NotSorted;
var Volume;
run;
proc print data=city_volume;
run;
| Obs | City | _NAME_ | Volume_1 |
|---|---|---|---|
| 1 | Patna | Volume | 72 |
| 2 | Pune | Volume | 90 |
| 3 | Hyderabad | Volume | 70 |
| 4 | Gurgaon | Volume | 68 |
| 5 | Mumbai | Volume | 92 |
| 6 | Ranchi | Volume | 78 |
| 7 | Jamnagar | Volume | 95 |
| 8 | Surat | Volume | 65 |
| 9 | Chennai | Volume | 88 |
| 10 | Korba | Volume | 89 |
| 11 | Noida | Volume | 84 |
| 12 | Delhi | Volume | 85 |
| 13 | Bengaluru | Volume | 82 |
| 14 | Kolkata | Volume | 76 |
No comments:
Post a Comment