Friday, 26 December 2025

351.POLLUTION SOURCES DATA ANALYSIS USING SAS DATA STEP | PROC SQL | PROC MEANS | PROC REG | PROC SGPLOT | MACROS | DATE FUNCTIONS | APPEND | MERGE | TRANSPOSE

POLLUTION SOURCES DATA ANALYSIS USING SAS DATA STEP | PROC SQL | PROC MEANS | PROC REG | PROC SGPLOT | MACROS | DATE FUNCTIONS | APPEND | MERGE | TRANSPOSE 

options nocenter;

1.POLLUTION MAIN DATASET CREATION

data pollution_main;

    length Source_Type $20 City $15;

    format Report_Date date9.;

    input Source_Type $ City $ Volume Impact_Level Reduction_Cost Risk_Score 

          Report_Date : date9.;

    datalines;

Vehicle_Emissions Delhi 85 9 1200 88 01JAN2024

Industrial_Waste Mumbai 92 10 1800 94 15JAN2024

Construction_Dust Hyderabad 70 7 900 72 01FEB2024

Power_Plants Chennai 88 9 1600 90 20FEB2024

Waste_Burning Kolkata 76 8 1000 80 05MAR2024

Vehicular_Traffic Bengaluru 82 8 1300 85 18MAR2024

Chemical_Leak Pune 90 10 2000 96 01APR2024

Mining_Activity Ranchi 78 7 1100 75 10APR2024

Oil_Refinery Jamnagar 95 10 2200 98 25APR2024

Plastic_Waste Surat 65 6 700 60 05MAY2024

Thermal_Plant Korba 89 9 1700 92 15MAY2024

Brick_Kilns Patna 72 7 950 74 01JUN2024

Traffic_Congestion Noida 84 8 1250 86 10JUN2024

;

run;

proc print data=pollution_main;

run;

OUTPUT:

ObsSource_TypeCityReport_DateVolumeImpact_LevelReduction_CostRisk_Score
1Vehicle_EmissionsDelhi01JAN2024859120088
2Industrial_WasteMumbai15JAN20249210180094
3Construction_DustHyderabad01FEB202470790072
4Power_PlantsChennai20FEB2024889160090
5Waste_BurningKolkata05MAR2024768100080
6Vehicular_TrafficBengaluru18MAR2024828130085
7Chemical_LeakPune01APR20249010200096
8Mining_ActivityRanchi10APR2024787110075
9Oil_RefineryJamnagar25APR20249510220098
10Plastic_WasteSurat05MAY202465670060
11Thermal_PlantKorba15MAY2024899170092
12Brick_KilnsPatna01JUN202472795074
13Traffic_CongestionNoida10JUN2024848125086


2.DATE FUNCTION DERIVATIONS (INTNX, INTCK, MDY)

data pollution_dates;

    set pollution_main;

    Next_Report = intnx('month', Report_Date, 1, 'same');

    Days_Since_Report = intck('day', Report_Date, today());

    Year_Start = mdy(1,1,year(Report_Date));

    format Next_Report Year_Start date9.;

run;

proc print data=pollution_dates;

run;

OUTPUT:

ObsSource_TypeCityReport_DateVolumeImpact_LevelReduction_CostRisk_ScoreNext_ReportDays_Since_ReportYear_Start
1Vehicle_EmissionsDelhi01JAN202485912008801FEB202472601JAN2024
2Industrial_WasteMumbai15JAN2024921018009415FEB202471201JAN2024
3Construction_DustHyderabad01FEB20247079007201MAR202469501JAN2024
4Power_PlantsChennai20FEB202488916009020MAR202467601JAN2024
5Waste_BurningKolkata05MAR202476810008005APR202466201JAN2024
6Vehicular_TrafficBengaluru18MAR202482813008518APR202464901JAN2024
7Chemical_LeakPune01APR2024901020009601MAY202463501JAN2024
8Mining_ActivityRanchi10APR202478711007510MAY202462601JAN2024
9Oil_RefineryJamnagar25APR2024951022009825MAY202461101JAN2024
10Plastic_WasteSurat05MAY20246567006005JUN202460101JAN2024
11Thermal_PlantKorba15MAY202489917009215JUN202459101JAN2024
12Brick_KilnsPatna01JUN20247279507401JUL202457401JAN2024
13Traffic_CongestionNoida10JUN202484812508610JUL202456501JAN2024


3.PROC SQL – SUMMARY AND FILTERING

proc sql;

    create table high_risk_sources as

    select City,Source_Type,Volume,Risk_Score

    from pollution_main

    where Risk_Score >= 85

    order by Risk_Score desc;

quit;

proc print data=high_risk_sources;

run;

OUTPUT:

ObsCitySource_TypeVolumeRisk_Score
1JamnagarOil_Refinery9598
2PuneChemical_Leak9096
3MumbaiIndustrial_Waste9294
4KorbaThermal_Plant8992
5ChennaiPower_Plants8890
6DelhiVehicle_Emissions8588
7NoidaTraffic_Congestion8486
8BengaluruVehicular_Traffic8285


4.PROC MEANS – STATISTICAL SUMMARY

proc means data=pollution_main mean min max;

    var Volume Reduction_Cost Risk_Score;

run;

OUTPUT:

The MEANS Procedure

VariableMeanMinimumMaximum
Volume
Reduction_Cost
Risk_Score
82.0000000
1361.54
83.8461538
65.0000000
700.0000000
60.0000000
95.0000000
2200.00
98.0000000

5.MACRO FOR SEVERITY CLASSIFICATION

%macro classify_severity;

data pollution_severity;

    set pollution_main;

    length Severity $12;

    if Risk_Score >= 90 then Severity = 'CRITICAL';

    else if Risk_Score >= 75 then Severity = 'HIGH';

    else if Risk_Score >= 60 then Severity = 'MODERATE';

    else Severity = 'LOW';

run;

proc print data=pollution_severity;

run;

%mend;


%classify_severity;

OUTPUT:

ObsSource_TypeCityReport_DateVolumeImpact_LevelReduction_CostRisk_ScoreSeverity
1Vehicle_EmissionsDelhi01JAN2024859120088HIGH
2Industrial_WasteMumbai15JAN20249210180094CRITICAL
3Construction_DustHyderabad01FEB202470790072MODERATE
4Power_PlantsChennai20FEB2024889160090CRITICAL
5Waste_BurningKolkata05MAR2024768100080HIGH
6Vehicular_TrafficBengaluru18MAR2024828130085HIGH
7Chemical_LeakPune01APR20249010200096CRITICAL
8Mining_ActivityRanchi10APR2024787110075HIGH
9Oil_RefineryJamnagar25APR20249510220098CRITICAL
10Plastic_WasteSurat05MAY202465670060MODERATE
11Thermal_PlantKorba15MAY2024899170092CRITICAL
12Brick_KilnsPatna01JUN202472795074MODERATE
13Traffic_CongestionNoida10JUN2024848125086HIGH


6.PROC REG – RELATIONSHIP ANALYSIS

proc reg data=pollution_main;

    model Risk_Score = Volume Reduction_Cost;

run;

quit;

OUTPUT:

The REG Procedure

Model: MODEL1

Dependent Variable: Risk_Score

Number of Observations Read13
Number of Observations Used13
Analysis of Variance
SourceDFSum of
Squares
Mean
Square
F ValuePr > F
Model21412.72842706.36421108.73<.0001
Error1064.963886.49639  
Corrected Total121477.69231   
Root MSE2.54880R-Square0.9560
Dependent Mean83.84615Adj R-Sq0.9472
Coeff Var3.03985  
Parameter Estimates
VariableDFParameter
Estimate
Standard
Error
t ValuePr > |t|
Intercept1-11.2501213.19922-0.850.4140
Volume11.149050.230304.990.0005
Reduction_Cost10.000642170.004610.140.8920

The REG Procedure

Model: MODEL1

Dependent Variable: Risk_Score

Panel of fit diagnostics for Risk_Score.
Panel of scatterplots of residuals by regressors for Risk_Score.

7.PROC SGPLOT – VISUALIZATION

proc sgplot data=pollution_main;

    scatter x=Volume y=Risk_Score / group=Source_Type;

    reg x=Volume y=Risk_Score;

    title "Pollution Volume vs Risk Score";

run;

OUTPUT:

The SGPlot Procedure


8.PROC APPEND – ADDITIONAL DATA

data pollution_new;

    length Source_Type $20 City $15;

    format Report_Date date9.;

    Source_Type='E_Waste';

    City='Gurgaon';

    Volume=68;

    Impact_Level=6;

    Reduction_Cost=800;

    Risk_Score=65;

    Report_Date='20JUN2024'd;

run;

proc print data=pollution_new;

run;

OUTPUT:

ObsSource_TypeCityReport_DateVolumeImpact_LevelReduction_CostRisk_Score
1E_WasteGurgaon20JUN202468680065


proc append base=pollution_main 

            data=pollution_new;

run;

proc print data=pollution_main;

run;

OUTPUT:

ObsSource_TypeCityReport_DateVolumeImpact_LevelReduction_CostRisk_Score
1Vehicle_EmissionsDelhi01JAN2024859120088
2Industrial_WasteMumbai15JAN20249210180094
3Construction_DustHyderabad01FEB202470790072
4Power_PlantsChennai20FEB2024889160090
5Waste_BurningKolkata05MAR2024768100080
6Vehicular_TrafficBengaluru18MAR2024828130085
7Chemical_LeakPune01APR20249010200096
8Mining_ActivityRanchi10APR2024787110075
9Oil_RefineryJamnagar25APR20249510220098
10Plastic_WasteSurat05MAY202465670060
11Thermal_PlantKorba15MAY2024899170092
12Brick_KilnsPatna01JUN202472795074
13Traffic_CongestionNoida10JUN2024848125086
14E_WasteGurgaon20JUN202468680065


9.PROC MERGE – COST CATEGORY DATA

data cost_category;

    length Source_Type $20 Cost_Class $10;

    input Source_Type $ Cost_Class $;

    datalines;

Vehicle_Emissions HIGH

Industrial_Waste HIGH

Construction_Dust MEDIUM

Waste_Burning MEDIUM

Plastic_Waste LOW

;

run;

proc print data=cost_category;

run;

OUTPUT:

ObsSource_TypeCost_Class
1Vehicle_EmissionsHIGH
2Industrial_WasteHIGH
3Construction_DustMEDIUM
4Waste_BurningMEDIUM
5Plastic_WasteLOW


proc sort data=pollution_main; by Source_Type; run;

proc print data=pollution_main;

run;

OUTPUT:

ObsSource_TypeCityReport_DateVolumeImpact_LevelReduction_CostRisk_Score
1Brick_KilnsPatna01JUN202472795074
2Chemical_LeakPune01APR20249010200096
3Construction_DustHyderabad01FEB202470790072
4E_WasteGurgaon20JUN202468680065
5Industrial_WasteMumbai15JAN20249210180094
6Mining_ActivityRanchi10APR2024787110075
7Oil_RefineryJamnagar25APR20249510220098
8Plastic_WasteSurat05MAY202465670060
9Power_PlantsChennai20FEB2024889160090
10Thermal_PlantKorba15MAY2024899170092
11Traffic_CongestionNoida10JUN2024848125086
12Vehicle_EmissionsDelhi01JAN2024859120088
13Vehicular_TrafficBengaluru18MAR2024828130085
14Waste_BurningKolkata05MAR2024768100080


proc sort data=cost_category; by Source_Type; run;

proc print data=cost_category;

run;

OUTPUT:

ObsSource_TypeCost_Class
1Construction_DustMEDIUM
2Industrial_WasteHIGH
3Plastic_WasteLOW
4Vehicle_EmissionsHIGH
5Waste_BurningMEDIUM


data pollution_merged;

    merge pollution_main(in=a) cost_category(in=b);

    by Source_Type;

    if a;

run;

proc print data=pollution_merged;

run;

OUTPUT:

ObsSource_TypeCityReport_DateVolumeImpact_LevelReduction_CostRisk_ScoreCost_Class
1Brick_KilnsPatna01JUN202472795074 
2Chemical_LeakPune01APR20249010200096 
3Construction_DustHyderabad01FEB202470790072MEDIUM
4E_WasteGurgaon20JUN202468680065 
5Industrial_WasteMumbai15JAN20249210180094HIGH
6Mining_ActivityRanchi10APR2024787110075 
7Oil_RefineryJamnagar25APR20249510220098 
8Plastic_WasteSurat05MAY202465670060LOW
9Power_PlantsChennai20FEB2024889160090 
10Thermal_PlantKorba15MAY2024899170092 
11Traffic_CongestionNoida10JUN2024848125086 
12Vehicle_EmissionsDelhi01JAN2024859120088HIGH
13Vehicular_TrafficBengaluru18MAR2024828130085 
14Waste_BurningKolkata05MAR2024768100080MEDIUM


10.PROC TRANSPOSE – CITY-WISE VIEW

proc transpose data=pollution_main out=city_volume prefix=Volume_;

    by City NotSorted;

    var Volume;

run;

proc print data=city_volume;

run;

OUTPUT:
ObsCity_NAME_Volume_1
1PatnaVolume72
2PuneVolume90
3HyderabadVolume70
4GurgaonVolume68
5MumbaiVolume92
6RanchiVolume78
7JamnagarVolume95
8SuratVolume65
9ChennaiVolume88
10KorbaVolume89
11NoidaVolume84
12DelhiVolume85
13BengaluruVolume82
14KolkataVolume76




To Visit My Previous Different Types Of Oils Dataset:Click Here
To Visit My Previous Different Types Of Series 2025 Dataset:Click Here
To Visit My Previous Analyzing Yoga Asanas Worldwide Dataset:Click Here
To Visit My Previous Analyzing Indian Languages Dataset:Click Here  



Follow Us On : 


 


--- FOLLOW OUR BLOG FOR MORE INFORMATION.

--->PLEASE DO COMMENTS AND SHARE OUR BLOG.




No comments:

Post a Comment