WORLD ENERGY SOURCES DATASET CREATION AND SUSTAINABILITY RANKING ANALYSIS USING PROC SQL | PROC FORMAT | DATA STEP | MACRO | INTCK | INTNX | PROC PRINT | PROC FREQ | PROC MEANS | PROC CORR | PROC SGPLOT | PROC RANK
options nocenter;
STEP 1: CREATE WORLD ENERGY SOURCE DATASET USING PROC SQL
proc sql;
create table work.energy_sources as
select
"Solar PV" as Source length=25,
"Asia" as Region length=20,
5000 as Capacity_MW,
800000 as Cost_per_MW,
35 as Adoption_Rate,
90 as Sustainability_Index,
"01JAN2015"d as Deployment_Date format=date9.
from sashelp.class(obs=1)
union all
select
"Solar PV",
"Europe",
4200,
820000,
40,
92,
"15MAR2014"d
from sashelp.class(obs=1)
union all
select
"Wind Onshore",
"North America",
6100,
750000,
38,
88,
"10JUN2013"d
from sashelp.class(obs=1)
union all
select
"Wind Offshore",
"Europe",
3800,
1100000,
25,
86,
"20SEP2016"d
from sashelp.class(obs=1)
union all
select
"Nuclear",
"Europe",
9000,
6500000,
22,
70,
"01JAN2010"d
from sashelp.class(obs=1)
union all
select
"Nuclear",
"Asia",
11000,
6000000,
18,
68,
"15AUG2011"d
from sashelp.class(obs=1)
union all
select
"Coal",
"Asia",
15000,
500000,
55,
30,
"01JAN2005"d
from sashelp.class(obs=1)
union all
select
"Coal",
"North America",
8000,
520000,
28,
35,
"01JAN2000"d
from sashelp.class(obs=1)
union all
select
"Hydro",
"South America",
7000,
900000,
45,
82,
"05MAY2012"d
from sashelp.class(obs=1)
union all
select
"Hydro",
"Asia",
6500,
880000,
33,
80,
"20FEB2013"d
from sashelp.class(obs=1)
union all
select
"Geothermal",
"North America",
2400,
1200000,
15,
84,
"11NOV2017"d
from sashelp.class(obs=1)
union all
select
"Biomass",
"Africa",
1800,
700000,
20,
76,
"01JUL2018"d
from sashelp.class(obs=1)
;
quit;
proc print data=work.energy_sources;
run;
OUTPUT:
| Obs | Source | Region | Capacity_MW | Cost_per_MW | Adoption_Rate | Sustainability_Index | Deployment_Date |
|---|---|---|---|---|---|---|---|
| 1 | Solar PV | Asia | 5000 | 800000 | 35 | 90 | 01JAN2015 |
| 2 | Solar PV | Europe | 4200 | 820000 | 40 | 92 | 15MAR2014 |
| 3 | Wind Onshore | North America | 6100 | 750000 | 38 | 88 | 10JUN2013 |
| 4 | Wind Offshore | Europe | 3800 | 1100000 | 25 | 86 | 20SEP2016 |
| 5 | Nuclear | Europe | 9000 | 6500000 | 22 | 70 | 01JAN2010 |
| 6 | Nuclear | Asia | 11000 | 6000000 | 18 | 68 | 15AUG2011 |
| 7 | Coal | Asia | 15000 | 500000 | 55 | 30 | 01JAN2005 |
| 8 | Coal | North America | 8000 | 520000 | 28 | 35 | 01JAN2000 |
| 9 | Hydro | South America | 7000 | 900000 | 45 | 82 | 05MAY2012 |
| 10 | Hydro | Asia | 6500 | 880000 | 33 | 80 | 20FEB2013 |
| 11 | Geothermal | North America | 2400 | 1200000 | 15 | 84 | 11NOV2017 |
| 12 | Biomass | Africa | 1800 | 700000 | 20 | 76 | 01JUL2018 |
proc format;
value $srcfmt
"Solar PV" = "Solar Photovoltaic"
"Wind Onshore" = "Onshore Wind"
"Wind Offshore" = "Offshore Wind"
"Hydro" = "Hydropower"
"Geothermal" = "Geothermal Power"
"Biomass" = "Biomass Energy"
other = "Other/Conventional";
LOG:
NOTE: Format $SRCFMT has been output.
value sustain_rank_fmt
1 = "High Sustainability"
2 = "Medium Sustainability"
3 = "Low Sustainability";
run;
LOG:
STEP 3: MACRO TO ADD SUSTAINABILITY RANK VARIABLES
%macro add_sustainability_rank(in=work.energy_sources, out=work.energy_ranked);
data &out.;
set &in.;
length Sustain_Category $15;
length Sustain_Rank 8;
if Sustainability_Index >= 85 then do;
Sustain_Category = "High";
Sustain_Rank = 1;
end;
else if Sustainability_Index >= 70 then do;
Sustain_Category = "Medium";
Sustain_Rank = 2;
end;
else do;
Sustain_Category = "Low";
Sustain_Rank = 3;
end;
format Sustain_Rank sustain_rank_fmt.;
run;
Proc Print data=&out.;
run;
%mend add_sustainability_rank;
%add_sustainability_rank();
OUTPUT:
| Obs | Source | Region | Capacity_MW | Cost_per_MW | Adoption_Rate | Sustainability_Index | Deployment_Date | Sustain_Category | Sustain_Rank |
|---|---|---|---|---|---|---|---|---|---|
| 1 | Solar PV | Asia | 5000 | 800000 | 35 | 90 | 01JAN2015 | High | High Sustainability |
| 2 | Solar PV | Europe | 4200 | 820000 | 40 | 92 | 15MAR2014 | High | High Sustainability |
| 3 | Wind Onshore | North America | 6100 | 750000 | 38 | 88 | 10JUN2013 | High | High Sustainability |
| 4 | Wind Offshore | Europe | 3800 | 1100000 | 25 | 86 | 20SEP2016 | High | High Sustainability |
| 5 | Nuclear | Europe | 9000 | 6500000 | 22 | 70 | 01JAN2010 | Medium | Medium Sustainability |
| 6 | Nuclear | Asia | 11000 | 6000000 | 18 | 68 | 15AUG2011 | Low | Low Sustainability |
| 7 | Coal | Asia | 15000 | 500000 | 55 | 30 | 01JAN2005 | Low | Low Sustainability |
| 8 | Coal | North America | 8000 | 520000 | 28 | 35 | 01JAN2000 | Low | Low Sustainability |
| 9 | Hydro | South America | 7000 | 900000 | 45 | 82 | 05MAY2012 | Medium | Medium Sustainability |
| 10 | Hydro | Asia | 6500 | 880000 | 33 | 80 | 20FEB2013 | Medium | Medium Sustainability |
| 11 | Geothermal | North America | 2400 | 1200000 | 15 | 84 | 11NOV2017 | Medium | Medium Sustainability |
| 12 | Biomass | Africa | 1800 | 700000 | 20 | 76 | 01JUL2018 | Medium | Medium Sustainability |
STEP 4: USE INTNX AND INTCK FOR DATE-BASED ANALYSIS
data work.energy_dates;
set work.energy_ranked;
/* Assume current reference date is 01JAN2025 for analysis.
Compute years since deployment using INTCK. */
format Deployment_Date Review_Date date9.;
Years_Since_Deployment = intck('year', Deployment_Date, "01JAN2025"d);
/* Compute a 5-year review date using INTNX */
Review_Date = intnx('year', Deployment_Date, 5, 'same');
run;
proc print data=work.energy_dates noobs;
title "World Energy Sources Dataset with Sustainability Rank and Date Variables";
format Source $srcfmt.;
run;
OUTPUT:
| Source | Region | Capacity_MW | Cost_per_MW | Adoption_Rate | Sustainability_Index | Deployment_Date | Sustain_Category | Sustain_Rank | Review_Date | Years_Since_Deployment |
|---|---|---|---|---|---|---|---|---|---|---|
| Solar Photovoltaic | Asia | 5000 | 800000 | 35 | 90 | 01JAN2015 | High | High Sustainability | 01JAN2020 | 10 |
| Solar Photovoltaic | Europe | 4200 | 820000 | 40 | 92 | 15MAR2014 | High | High Sustainability | 15MAR2019 | 11 |
| Onshore Wind | North America | 6100 | 750000 | 38 | 88 | 10JUN2013 | High | High Sustainability | 10JUN2018 | 12 |
| Offshore Wind | Europe | 3800 | 1100000 | 25 | 86 | 20SEP2016 | High | High Sustainability | 20SEP2021 | 9 |
| Other/Conventional | Europe | 9000 | 6500000 | 22 | 70 | 01JAN2010 | Medium | Medium Sustainability | 01JAN2015 | 15 |
| Other/Conventional | Asia | 11000 | 6000000 | 18 | 68 | 15AUG2011 | Low | Low Sustainability | 15AUG2016 | 14 |
| Other/Conventional | Asia | 15000 | 500000 | 55 | 30 | 01JAN2005 | Low | Low Sustainability | 01JAN2010 | 20 |
| Other/Conventional | North America | 8000 | 520000 | 28 | 35 | 01JAN2000 | Low | Low Sustainability | 01JAN2005 | 25 |
| Hydropower | South America | 7000 | 900000 | 45 | 82 | 05MAY2012 | Medium | Medium Sustainability | 05MAY2017 | 13 |
| Hydropower | Asia | 6500 | 880000 | 33 | 80 | 20FEB2013 | Medium | Medium Sustainability | 20FEB2018 | 12 |
| Geothermal Power | North America | 2400 | 1200000 | 15 | 84 | 11NOV2017 | Medium | Medium Sustainability | 11NOV2022 | 8 |
| Biomass Energy | Africa | 1800 | 700000 | 20 | 76 | 01JUL2018 | Medium | Medium Sustainability | 01JUL2023 | 7 |
STEP 5: BASIC DATA CHECKS (PROC FREQ)
/* Frequency of sustainability category by region */
proc freq data=work.energy_dates;
tables Region*Sustain_Category / nopercent norow nocol;
title "Distribution of Sustainability Category by Region";
run;
OUTPUT:
The FREQ Procedure
|
| |||||||||||||||||||||||||||||||||||||||||||||
proc means data=work.energy_dates n mean std min max maxdec=1;
class Region;
var Capacity_MW Cost_per_MW Adoption_Rate Sustainability_Index Years_Since_Deployment;
title "Descriptive Statistics by Region for World Energy Sources";
run;
OUTPUT:
The MEANS Procedure
| Region | N Obs | Variable | N | Mean | Std Dev | Minimum | Maximum |
|---|---|---|---|---|---|---|---|
| Africa | 1 | Capacity_MW Cost_per_MW Adoption_Rate Sustainability_Index Years_Since_Deployment | 1 1 1 1 1 | 1800.0 700000.0 20.0 76.0 7.0 | . . . . . | 1800.0 700000.0 20.0 76.0 7.0 | 1800.0 700000.0 20.0 76.0 7.0 |
| Asia | 4 | Capacity_MW Cost_per_MW Adoption_Rate Sustainability_Index Years_Since_Deployment | 4 4 4 4 4 | 9375.0 2045000.0 35.3 67.0 14.0 | 4534.6 2641735.5 15.2 26.3 4.3 | 5000.0 500000.0 18.0 30.0 10.0 | 15000.0 6000000.0 55.0 90.0 20.0 |
| Europe | 3 | Capacity_MW Cost_per_MW Adoption_Rate Sustainability_Index Years_Since_Deployment | 3 3 3 3 3 | 5666.7 2806666.7 29.0 82.7 11.7 | 2893.7 3201582.9 9.6 11.4 3.1 | 3800.0 820000.0 22.0 70.0 9.0 | 9000.0 6500000.0 40.0 92.0 15.0 |
| North America | 3 | Capacity_MW Cost_per_MW Adoption_Rate Sustainability_Index Years_Since_Deployment | 3 3 3 3 3 | 5500.0 823333.3 27.0 69.0 15.0 | 2847.8 345880.5 11.5 29.5 8.9 | 2400.0 520000.0 15.0 35.0 8.0 | 8000.0 1200000.0 38.0 88.0 25.0 |
| South America | 1 | Capacity_MW Cost_per_MW Adoption_Rate Sustainability_Index Years_Since_Deployment | 1 1 1 1 1 | 7000.0 900000.0 45.0 82.0 13.0 | . . . . . | 7000.0 900000.0 45.0 82.0 13.0 | 7000.0 900000.0 45.0 82.0 13.0 |
STEP 7: CORRELATION ANALYSIS USING PROC CORR
proc corr data=work.energy_dates pearson nosimple;
var Capacity_MW Cost_per_MW Adoption_Rate Sustainability_Index Years_Since_Deployment;
title "Correlation Between Capacity, Cost, Adoption, Sustainability and Age of Assets";
run;
OUTPUT:
The CORR Procedure
| 5 Variables: | Capacity_MW Cost_per_MW Adoption_Rate Sustainability_Index Years_Since_Deployment |
|---|
| Pearson Correlation Coefficients, N = 12 Prob > |r| under H0: Rho=0 | |||||
|---|---|---|---|---|---|
| Capacity_MW | Cost_per_MW | Adoption_Rate | Sustainability_Index | Years_Since_Deployment | |
| Capacity_MW | 1.00000 | 0.35384 0.2592 | 0.46135 0.1311 | -0.73020 0.0070 | 0.73611 0.0063 |
| Cost_per_MW | 0.35384 0.2592 | 1.00000 | -0.47161 0.1217 | -0.03142 0.9228 | 0.07050 0.8276 |
| Adoption_Rate | 0.46135 0.1311 | -0.47161 0.1217 | 1.00000 | -0.23189 0.4683 | 0.34126 0.2777 |
| Sustainability_Index | -0.73020 0.0070 | -0.03142 0.9228 | -0.23189 0.4683 | 1.00000 | -0.86655 0.0003 |
| Years_Since_Deployment | 0.73611 0.0063 | 0.07050 0.8276 | 0.34126 0.2777 | -0.86655 0.0003 | 1.00000 |
STEP 8: VISUALIZATION USING PROC SGPLOT
/* Scatter plot: Cost vs Sustainability */
proc sgplot data=work.energy_dates;
scatter x=Cost_per_MW y=Sustainability_Index / group=Source;
xaxis label = "Cost per MW (USD)";
yaxis label = "Sustainability Index (0–100)";
title "Relationship Between Cost per MW and Sustainability Index by Energy Source";
run;
OUTPUT:
/* Bar chart: Total Capacity by Region and Source */
proc sgplot data=work.energy_dates;
vbar Region / response=Capacity_MW group=Source stat=sum;
yaxis label = "Total Installed Capacity (MW)";
title "Total Capacity by Region and Energy Source";
run;
OUTPUT:
STEP 9: EXTRA – USING PROC SQL FOR SUMMARY AND RANKING
/* Average sustainability index by source globally */
proc sql;
create table work.avg_sustain_by_source as
select
Source,
mean(Sustainability_Index) as Avg_Sustainability format=6.2,
mean(Adoption_Rate) as Avg_Adoption format=6.2
from work.energy_dates
group by Source
order by Avg_Sustainability desc;
quit;
proc print data=work.avg_sustain_by_source;
run;
OUTPUT:
| Obs | Source | Avg_Sustainability | Avg_Adoption |
|---|---|---|---|
| 1 | Solar PV | 91.00 | 37.50 |
| 2 | Wind Onshore | 88.00 | 38.00 |
| 3 | Wind Offshore | 86.00 | 25.00 |
| 4 | Geothermal | 84.00 | 15.00 |
| 5 | Hydro | 81.00 | 39.00 |
| 6 | Biomass | 76.00 | 20.00 |
| 7 | Nuclear | 69.00 | 20.00 |
| 8 | Coal | 32.50 | 41.50 |
/* Rank sources by average sustainability using PROC RANK */
proc rank data=work.avg_sustain_by_source out=work.source_ranks descending ties=low;
var Avg_Sustainability;
ranks Global_Sustain_Rank;
run;
proc print data=work.source_ranks noobs;
title "Global Sustainability Ranking of Energy Sources (Average Index)";
run;
OUTPUT:
| Source | Avg_Sustainability | Avg_Adoption | Global_Sustain_Rank |
|---|---|---|---|
| Solar PV | 91.00 | 37.50 | 1 |
| Wind Onshore | 88.00 | 38.00 | 2 |
| Wind Offshore | 86.00 | 25.00 | 3 |
| Geothermal | 84.00 | 15.00 | 4 |
| Hydro | 81.00 | 39.00 | 5 |
| Biomass | 76.00 | 20.00 | 6 |
| Nuclear | 69.00 | 20.00 | 7 |
| Coal | 32.50 | 41.50 | 8 |
No comments:
Post a Comment