Friday, 21 November 2025

316.WORLD ENERGY SOURCES DATASET CREATION AND SUSTAINABILITY RANKING ANALYSIS USING PROC SQL | PROC FORMAT | DATA STEP | MACRO | INTCK | INTNX | PROC PRINT | PROC FREQ | PROC MEANS | PROC CORR | PROC SGPLOT | PROC RANK

WORLD ENERGY SOURCES DATASET CREATION AND SUSTAINABILITY RANKING ANALYSIS USING PROC SQL | PROC FORMAT | DATA STEP | MACRO | INTCK | INTNX | PROC PRINT | PROC FREQ | PROC MEANS | PROC CORR | PROC SGPLOT | PROC RANK

options nocenter;

STEP 1: CREATE WORLD ENERGY SOURCE DATASET USING PROC SQL

proc sql;

  create table work.energy_sources as

  select 

      "Solar PV"      as Source          length=25,

      "Asia"          as Region          length=20,

      5000            as Capacity_MW,

      800000          as Cost_per_MW,

      35              as Adoption_Rate,

      90              as Sustainability_Index,

      "01JAN2015"d    as Deployment_Date format=date9.

  from sashelp.class(obs=1)

  union all

  select 

      "Solar PV",

      "Europe",

      4200,

      820000,

      40,

      92,

      "15MAR2014"d

  from sashelp.class(obs=1)

  union all

  select 

      "Wind Onshore",

      "North America",

      6100,

      750000,

      38,

      88,

      "10JUN2013"d

  from sashelp.class(obs=1)

  union all

  select 

      "Wind Offshore",

      "Europe",

      3800,

      1100000,

      25,

      86,

      "20SEP2016"d

  from sashelp.class(obs=1)

  union all

  select 

      "Nuclear",

      "Europe",

      9000,

      6500000,

      22,

      70,

      "01JAN2010"d

  from sashelp.class(obs=1)

  union all

  select 

      "Nuclear",

      "Asia",

      11000,

      6000000,

      18,

      68,

      "15AUG2011"d

  from sashelp.class(obs=1)

  union all

  select 

      "Coal",

      "Asia",

      15000,

      500000,

      55,

      30,

      "01JAN2005"d

  from sashelp.class(obs=1)

  union all

  select 

      "Coal",

      "North America",

      8000,

      520000,

      28,

      35,

      "01JAN2000"d

  from sashelp.class(obs=1)

  union all

  select 

      "Hydro",

      "South America",

      7000,

      900000,

      45,

      82,

      "05MAY2012"d

  from sashelp.class(obs=1)

  union all

  select 

      "Hydro",

      "Asia",

      6500,

      880000,

      33,

      80,

      "20FEB2013"d

  from sashelp.class(obs=1)

  union all

  select 

      "Geothermal",

      "North America",

      2400,

      1200000,

      15,

      84,

      "11NOV2017"d

  from sashelp.class(obs=1)

  union all

  select 

      "Biomass",

      "Africa",

      1800,

      700000,

      20,

      76,

      "01JUL2018"d

  from sashelp.class(obs=1)

  ;

quit;

proc print data=work.energy_sources;

run;

OUTPUT:

ObsSourceRegionCapacity_MWCost_per_MWAdoption_RateSustainability_IndexDeployment_Date
1Solar PVAsia5000800000359001JAN2015
2Solar PVEurope4200820000409215MAR2014
3Wind OnshoreNorth America6100750000388810JUN2013
4Wind OffshoreEurope38001100000258620SEP2016
5NuclearEurope90006500000227001JAN2010
6NuclearAsia110006000000186815AUG2011
7CoalAsia15000500000553001JAN2005
8CoalNorth America8000520000283501JAN2000
9HydroSouth America7000900000458205MAY2012
10HydroAsia6500880000338020FEB2013
11GeothermalNorth America24001200000158411NOV2017
12BiomassAfrica1800700000207601JUL2018

STEP 2: DEFINE FORMATS FOR SUSTAINABILITY RANK CATEGORIES

proc format;

  value $srcfmt

    "Solar PV"      = "Solar Photovoltaic"

    "Wind Onshore"  = "Onshore Wind"

    "Wind Offshore" = "Offshore Wind"

    "Hydro"         = "Hydropower"

    "Geothermal"    = "Geothermal Power"

    "Biomass"       = "Biomass Energy"

    other           = "Other/Conventional";

LOG:

NOTE: Format $SRCFMT has been output.

  value sustain_rank_fmt

    1 = "High Sustainability"

    2 = "Medium Sustainability"

    3 = "Low Sustainability";

run;

LOG:

NOTE: Format SUSTAIN_RANK_FMT has been output.

STEP 3: MACRO TO ADD SUSTAINABILITY RANK VARIABLES

%macro add_sustainability_rank(in=work.energy_sources, out=work.energy_ranked);

  data &out.;

    set &in.;

    length Sustain_Category $15;

    length Sustain_Rank 8;

    if Sustainability_Index >= 85 then do;

      Sustain_Category = "High";

      Sustain_Rank     = 1;

    end;

    else if Sustainability_Index >= 70 then do;

      Sustain_Category = "Medium";

      Sustain_Rank     = 2;

    end;

    else do;

      Sustain_Category = "Low";

      Sustain_Rank     = 3;

    end;

    format Sustain_Rank sustain_rank_fmt.;

  run;

  Proc Print data=&out.;

  run;

%mend add_sustainability_rank;


%add_sustainability_rank();

OUTPUT:

ObsSourceRegionCapacity_MWCost_per_MWAdoption_RateSustainability_IndexDeployment_DateSustain_CategorySustain_Rank
1Solar PVAsia5000800000359001JAN2015HighHigh Sustainability
2Solar PVEurope4200820000409215MAR2014HighHigh Sustainability
3Wind OnshoreNorth America6100750000388810JUN2013HighHigh Sustainability
4Wind OffshoreEurope38001100000258620SEP2016HighHigh Sustainability
5NuclearEurope90006500000227001JAN2010MediumMedium Sustainability
6NuclearAsia110006000000186815AUG2011LowLow Sustainability
7CoalAsia15000500000553001JAN2005LowLow Sustainability
8CoalNorth America8000520000283501JAN2000LowLow Sustainability
9HydroSouth America7000900000458205MAY2012MediumMedium Sustainability
10HydroAsia6500880000338020FEB2013MediumMedium Sustainability
11GeothermalNorth America24001200000158411NOV2017MediumMedium Sustainability
12BiomassAfrica1800700000207601JUL2018MediumMedium Sustainability


STEP 4: USE INTNX AND INTCK FOR DATE-BASED ANALYSIS

data work.energy_dates;

  set work.energy_ranked;

    /* Assume current reference date is 01JAN2025 for analysis.

       Compute years since deployment using INTCK. */

  format Deployment_Date Review_Date date9.;

  Years_Since_Deployment = intck('year', Deployment_Date, "01JAN2025"d);

  /* Compute a 5-year review date using INTNX */

  Review_Date = intnx('year', Deployment_Date, 5, 'same');

run;

proc print data=work.energy_dates noobs;

  title "World Energy Sources Dataset with Sustainability Rank and Date Variables";

  format Source $srcfmt.;

run;

OUTPUT:

World Energy Sources Dataset with Sustainability Rank and Date Variables

SourceRegionCapacity_MWCost_per_MWAdoption_RateSustainability_IndexDeployment_DateSustain_CategorySustain_RankReview_DateYears_Since_Deployment
Solar PhotovoltaicAsia5000800000359001JAN2015HighHigh Sustainability01JAN202010
Solar PhotovoltaicEurope4200820000409215MAR2014HighHigh Sustainability15MAR201911
Onshore WindNorth America6100750000388810JUN2013HighHigh Sustainability10JUN201812
Offshore WindEurope38001100000258620SEP2016HighHigh Sustainability20SEP20219
Other/ConventionalEurope90006500000227001JAN2010MediumMedium Sustainability01JAN201515
Other/ConventionalAsia110006000000186815AUG2011LowLow Sustainability15AUG201614
Other/ConventionalAsia15000500000553001JAN2005LowLow Sustainability01JAN201020
Other/ConventionalNorth America8000520000283501JAN2000LowLow Sustainability01JAN200525
HydropowerSouth America7000900000458205MAY2012MediumMedium Sustainability05MAY201713
HydropowerAsia6500880000338020FEB2013MediumMedium Sustainability20FEB201812
Geothermal PowerNorth America24001200000158411NOV2017MediumMedium Sustainability11NOV20228
Biomass EnergyAfrica1800700000207601JUL2018MediumMedium Sustainability01JUL20237

STEP 5: BASIC DATA CHECKS (PROC FREQ)

/* Frequency of sustainability category by region */

proc freq data=work.energy_dates;

  tables Region*Sustain_Category / nopercent norow nocol;

  title "Distribution of Sustainability Category by Region";

run;

OUTPUT:

Distribution of Sustainability Category by Region

The FREQ Procedure

Frequency
Table of Region by Sustain_Category
RegionSustain_Category
HighLowMediumTotal
Africa
0
0
1
1
Asia
1
2
1
4
Europe
2
0
1
3
North America
1
1
1
3
South America
0
0
1
1
Total
4
3
5
12
STEP 6: DESCRIPTIVE STATISTICS USING PROC MEANS

proc means data=work.energy_dates n mean std min max maxdec=1;

  class Region;

  var Capacity_MW Cost_per_MW Adoption_Rate Sustainability_Index Years_Since_Deployment;

  title "Descriptive Statistics by Region for World Energy Sources";

run;

OUTPUT:

Descriptive Statistics by Region for World Energy Sources

The MEANS Procedure

RegionN ObsVariableNMeanStd DevMinimumMaximum
Africa1
Capacity_MW
Cost_per_MW
Adoption_Rate
Sustainability_Index
Years_Since_Deployment
1
1
1
1
1
1800.0
700000.0
20.0
76.0
7.0
.
.
.
.
.
1800.0
700000.0
20.0
76.0
7.0
1800.0
700000.0
20.0
76.0
7.0
Asia4
Capacity_MW
Cost_per_MW
Adoption_Rate
Sustainability_Index
Years_Since_Deployment
4
4
4
4
4
9375.0
2045000.0
35.3
67.0
14.0
4534.6
2641735.5
15.2
26.3
4.3
5000.0
500000.0
18.0
30.0
10.0
15000.0
6000000.0
55.0
90.0
20.0
Europe3
Capacity_MW
Cost_per_MW
Adoption_Rate
Sustainability_Index
Years_Since_Deployment
3
3
3
3
3
5666.7
2806666.7
29.0
82.7
11.7
2893.7
3201582.9
9.6
11.4
3.1
3800.0
820000.0
22.0
70.0
9.0
9000.0
6500000.0
40.0
92.0
15.0
North America3
Capacity_MW
Cost_per_MW
Adoption_Rate
Sustainability_Index
Years_Since_Deployment
3
3
3
3
3
5500.0
823333.3
27.0
69.0
15.0
2847.8
345880.5
11.5
29.5
8.9
2400.0
520000.0
15.0
35.0
8.0
8000.0
1200000.0
38.0
88.0
25.0
South America1
Capacity_MW
Cost_per_MW
Adoption_Rate
Sustainability_Index
Years_Since_Deployment
1
1
1
1
1
7000.0
900000.0
45.0
82.0
13.0
.
.
.
.
.
7000.0
900000.0
45.0
82.0
13.0
7000.0
900000.0
45.0
82.0
13.0

STEP 7: CORRELATION ANALYSIS USING PROC CORR

proc corr data=work.energy_dates pearson nosimple;

  var Capacity_MW Cost_per_MW Adoption_Rate Sustainability_Index Years_Since_Deployment;

  title "Correlation Between Capacity, Cost, Adoption, Sustainability and Age of Assets";

run;

OUTPUT:

Correlation Between Capacity, Cost, Adoption, Sustainability and Age of Assets

The CORR Procedure

5 Variables:Capacity_MW Cost_per_MW Adoption_Rate Sustainability_Index Years_Since_Deployment
Pearson Correlation Coefficients, N = 12
Prob > |r| under H0: Rho=0
 Capacity_MWCost_per_MWAdoption_RateSustainability_IndexYears_Since_Deployment
Capacity_MW
1.00000
 
0.35384
0.2592
0.46135
0.1311
-0.73020
0.0070
0.73611
0.0063
Cost_per_MW
0.35384
0.2592
1.00000
 
-0.47161
0.1217
-0.03142
0.9228
0.07050
0.8276
Adoption_Rate
0.46135
0.1311
-0.47161
0.1217
1.00000
 
-0.23189
0.4683
0.34126
0.2777
Sustainability_Index
-0.73020
0.0070
-0.03142
0.9228
-0.23189
0.4683
1.00000
 
-0.86655
0.0003
Years_Since_Deployment
0.73611
0.0063
0.07050
0.8276
0.34126
0.2777
-0.86655
0.0003
1.00000
 

STEP 8: VISUALIZATION USING PROC SGPLOT

/* Scatter plot: Cost vs Sustainability */

proc sgplot data=work.energy_dates;

  scatter x=Cost_per_MW y=Sustainability_Index / group=Source;

  xaxis label = "Cost per MW (USD)";

  yaxis label = "Sustainability Index (0–100)";

  title "Relationship Between Cost per MW and Sustainability Index by Energy Source";

run;

OUTPUT:

The SGPlot Procedure


/* Bar chart: Total Capacity by Region and Source */

proc sgplot data=work.energy_dates;

  vbar Region / response=Capacity_MW group=Source stat=sum;

  yaxis label = "Total Installed Capacity (MW)";

  title "Total Capacity by Region and Energy Source";

run;

OUTPUT:

The SGPlot Procedure


STEP 9: EXTRA – USING PROC SQL FOR SUMMARY AND RANKING

/* Average sustainability index by source globally */

proc sql;

  create table work.avg_sustain_by_source as

  select 

    Source,

    mean(Sustainability_Index) as Avg_Sustainability format=6.2,

    mean(Adoption_Rate)        as Avg_Adoption      format=6.2

  from work.energy_dates

  group by Source

  order by Avg_Sustainability desc;

quit;

proc print data=work.avg_sustain_by_source;

run;

OUTPUT:

ObsSourceAvg_SustainabilityAvg_Adoption
1Solar PV91.0037.50
2Wind Onshore88.0038.00
3Wind Offshore86.0025.00
4Geothermal84.0015.00
5Hydro81.0039.00
6Biomass76.0020.00
7Nuclear69.0020.00
8Coal32.5041.50


/* Rank sources by average sustainability using PROC RANK */

proc rank data=work.avg_sustain_by_source out=work.source_ranks descending ties=low;

  var Avg_Sustainability;

  ranks Global_Sustain_Rank;

run;

proc print data=work.source_ranks noobs;

  title "Global Sustainability Ranking of Energy Sources (Average Index)";

run;

OUTPUT:

Global Sustainability Ranking of Energy Sources (Average Index)

SourceAvg_SustainabilityAvg_AdoptionGlobal_Sustain_Rank
Solar PV91.0037.501
Wind Onshore88.0038.002
Wind Offshore86.0025.003
Geothermal84.0015.004
Hydro81.0039.005
Biomass76.0020.006
Nuclear69.0020.007
Coal32.5041.508


To Visit My Previous Electronic Data Analysis:Click Here
To Visit My Previous IPS Toppers Dataset:Click Here
To Visit My Previous Global Money Loan Trends Dataset:Click Here
To Visit My Previous Statewise Population Analysis:Click Here



Follow Us On : 


 


--- FOLLOW OUR BLOG FOR MORE INFORMATION.

--->PLEASE DO COMMENTS AND SHARE OUR BLOG.




No comments:

Post a Comment