Sunday, 21 December 2025

346.INDIAN STATES SOCIO-ECONOMIC DATA ANALYSIS USING DATA STEP | PROC SQL | PROC MEANS | PROC CORR | PROC UNIVARIATE | MACROS | DATE FUNCTIONS

INDIAN STATES SOCIO-ECONOMIC DATA ANALYSIS USING DATA STEP | PROC SQL | PROC MEANS |  PROC CORR | PROC UNIVARIATE | MACROS | DATE FUNCTIONS

 options nocenter;

1.INDIAN STATES DATASET CREATION WITH DATE FORMATS

data indian_states;

    length State $20;

    format Data_Date date9.;

    input State $ Literacy_Rate GDP Population Industrial_Growth Employment_Rate 

          Data_Date :date9.;

    datalines;

Andhra_Pradesh 67.4 13.5 53 6.2 59.1 01JAN2023

Telangana 72.8 14.2 39 7.5 61.4 01JAN2023

Tamil_Nadu 80.1 23.1 76 8.1 64.8 01JAN2023

Karnataka 75.6 21.9 68 7.9 63.2 01JAN2023

Kerala 94.0 9.8 35 5.1 60.9 01JAN2023

Maharashtra 82.3 38.5 125 6.9 62.7 01JAN2023

Gujarat 78.0 25.6 70 8.4 65.3 01JAN2023

Rajasthan 66.1 12.7 81 5.8 58.4 01JAN2023

Uttar_Pradesh 69.7 17.1 235 4.9 56.2 01JAN2023

Madhya_Pradesh 70.6 11.4 86 5.3 57.1 01JAN2023

West_Bengal 76.3 14.9 99 6.1 60.2 01JAN2023

Punjab 75.8 7.6 30 5.7 61.0 01JAN2023

;

run;

proc print data=indian_states;

run;

OUTPUT:

ObsStateData_DateLiteracy_RateGDPPopulationIndustrial_GrowthEmployment_Rate
1Andhra_Pradesh01JAN202367.413.5536.259.1
2Telangana01JAN202372.814.2397.561.4
3Tamil_Nadu01JAN202380.123.1768.164.8
4Karnataka01JAN202375.621.9687.963.2
5Kerala01JAN202394.09.8355.160.9
6Maharashtra01JAN202382.338.51256.962.7
7Gujarat01JAN202378.025.6708.465.3
8Rajasthan01JAN202366.112.7815.858.4
9Uttar_Pradesh01JAN202369.717.12354.956.2
10Madhya_Pradesh01JAN202370.611.4865.357.1
11West_Bengal01JAN202376.314.9996.160.2
12Punjab01JAN202375.87.6305.761.0


2.DATE FUNCTIONS USING INTNX AND INTCK

data indian_states_dates;

    set indian_states;

    Next_Year = intnx('year', Data_Date, 1, 'same');

    Months_Since = intck('month', Data_Date, '01JAN2024'd);

    format Next_Year date9.;

run;

proc print data=indian_states_dates;

run;

OUTPUT:

ObsStateData_DateLiteracy_RateGDPPopulationIndustrial_GrowthEmployment_RateNext_YearMonths_Since
1Andhra_Pradesh01JAN202367.413.5536.259.101JAN202412
2Telangana01JAN202372.814.2397.561.401JAN202412
3Tamil_Nadu01JAN202380.123.1768.164.801JAN202412
4Karnataka01JAN202375.621.9687.963.201JAN202412
5Kerala01JAN202394.09.8355.160.901JAN202412
6Maharashtra01JAN202382.338.51256.962.701JAN202412
7Gujarat01JAN202378.025.6708.465.301JAN202412
8Rajasthan01JAN202366.112.7815.858.401JAN202412
9Uttar_Pradesh01JAN202369.717.12354.956.201JAN202412
10Madhya_Pradesh01JAN202370.611.4865.357.101JAN202412
11West_Bengal01JAN202376.314.9996.160.201JAN202412
12Punjab01JAN202375.87.6305.761.001JAN202412


3.PROC SQL FOR DERIVED METRICS

proc sql;

    create table state_metrics as

    select State,Literacy_Rate,GDP,Population,Industrial_Growth,Employment_Rate,

           GDP/Population as GDP_Per_Capita format=8.2

    from indian_states;

quit;

proc print data=state_metrics;

run;

OUTPUT:

ObsStateLiteracy_RateGDPPopulationIndustrial_GrowthEmployment_RateGDP_Per_Capita
1Andhra_Pradesh67.413.5536.259.10.25
2Telangana72.814.2397.561.40.36
3Tamil_Nadu80.123.1768.164.80.30
4Karnataka75.621.9687.963.20.32
5Kerala94.09.8355.160.90.28
6Maharashtra82.338.51256.962.70.31
7Gujarat78.025.6708.465.30.37
8Rajasthan66.112.7815.858.40.16
9Uttar_Pradesh69.717.12354.956.20.07
10Madhya_Pradesh70.611.4865.357.10.13
11West_Bengal76.314.9996.160.20.15
12Punjab75.87.6305.761.00.25


4.PROC MEANS (DESCRIPTIVE STATISTICS)

proc means data=indian_states mean min max std;

    var Literacy_Rate GDP Population Industrial_Growth Employment_Rate;

run;

OUTPUT:

The MEANS Procedure

VariableMeanMinimumMaximumStd Dev
Literacy_Rate
GDP
Population
Industrial_Growth
Employment_Rate
75.7250000
17.5250000
83.0833333
6.4916667
60.8583333
66.1000000
7.6000000
30.0000000
4.9000000
56.2000000
94.0000000
38.5000000
235.0000000
8.4000000
65.3000000
7.5975026
8.5741816
55.2094497
1.2295294
2.8579899

5.PROC CORR (RELATIONSHIP ANALYSIS)

proc corr data=indian_states;

    var Literacy_Rate GDP Industrial_Growth Employment_Rate;

run;

OUTPUT:

The CORR Procedure

4 Variables:Literacy_Rate GDP Industrial_Growth Employment_Rate
Simple Statistics
VariableNMeanStd DevSumMinimumMaximum
Literacy_Rate1275.725007.59750908.7000066.1000094.00000
GDP1217.525008.57418210.300007.6000038.50000
Industrial_Growth126.491671.2295377.900004.900008.40000
Employment_Rate1260.858332.85799730.3000056.2000065.30000
Pearson Correlation Coefficients, N = 12
Prob > |r| under H0: Rho=0
 Literacy_RateGDPIndustrial_GrowthEmployment_Rate
Literacy_Rate
1.00000
 
0.22374
0.4845
0.09734
0.7634
0.51749
0.0849
GDP
0.22374
0.4845
1.00000
 
0.58529
0.0456
0.55222
0.0626
Industrial_Growth
0.09734
0.7634
0.58529
0.0456
1.00000
 
0.86941
0.0002
Employment_Rate
0.51749
0.0849
0.55222
0.0626
0.86941
0.0002
1.00000
 

6.PROC UNIVARIATE (DISTRIBUTION ANALYSIS)

proc univariate data=indian_states;

    var GDP Population;

    histogram GDP Population;

run;

OUTPUT:

The UNIVARIATE Procedure

Variable: GDP

Moments
N12Sum Weights12
Mean17.525Sum Observations210.3
Std Deviation8.57418165Variance73.5165909
Skewness1.40366643Kurtosis2.22885838
Uncorrected SS4494.19Corrected SS808.6825
Coeff Variation48.9254302Std Error Mean2.47515304
Basic Statistical Measures
LocationVariability
Mean17.52500Std Deviation8.57418
Median14.55000Variance73.51659
Mode.Range30.90000
  Interquartile Range10.45000
Tests for Location: Mu0=0
TestStatisticp Value
Student's tt7.08037Pr > |t|<.0001
SignM6Pr >= |M|0.0005
Signed RankS39Pr >= |S|0.0005
Quantiles (Definition 5)
LevelQuantile
100% Max38.50
99%38.50
95%38.50
90%25.60
75% Q322.50
50% Median14.55
25% Q112.05
10%9.80
5%7.60
1%7.60
0% Min7.60
Extreme Observations
LowestHighest
ValueObsValueObs
7.61217.19
9.8521.94
11.41023.13
12.7825.67
13.5138.56

The UNIVARIATE Procedure

Histogram for GDP

The UNIVARIATE Procedure

Variable: Population

Moments
N12Sum Weights12
Mean83.0833333Sum Observations997
Std Deviation55.2094497Variance3048.08333
Skewness2.07706004Kurtosis5.35833297
Uncorrected SS116363Corrected SS33528.9167
Coeff Variation66.4506917Std Error Mean15.9375953
Basic Statistical Measures
LocationVariability
Mean83.08333Std Deviation55.20945
Median73.00000Variance3048
Mode.Range205.00000
  Interquartile Range46.50000
Tests for Location: Mu0=0
TestStatisticp Value
Student's tt5.213041Pr > |t|0.0003
SignM6Pr >= |M|0.0005
Signed RankS39Pr >= |S|0.0005
Quantiles (Definition 5)
LevelQuantile
100% Max235.0
99%235.0
95%235.0
90%125.0
75% Q392.5
50% Median73.0
25% Q146.0
10%35.0
5%30.0
1%30.0
0% Min30.0
Extreme Observations
LowestHighest
ValueObsValueObs
3012818
3558610
3929911
5311256
6842359

The UNIVARIATE Procedure

Histogram for Population

7.MACRO FOR STATE-LEVEL CATEGORIZATION

%macro categorize_state;

data categorize_states;

    set indian_states;

    length Development_Level $15;

    if GDP >= 20 and Literacy_Rate >= 75 then Development_Level = "High";

    else if GDP >= 10 then Development_Level = "Medium";

    else Development_Level = "Low";

run;

proc print data=categorize_states;

run;

%mend;


%categorize_state;

OUTPUT:

ObsStateData_DateLiteracy_RateGDPPopulationIndustrial_GrowthEmployment_RateDevelopment_Level
1Andhra_Pradesh01JAN202367.413.5536.259.1Medium
2Telangana01JAN202372.814.2397.561.4Medium
3Tamil_Nadu01JAN202380.123.1768.164.8High
4Karnataka01JAN202375.621.9687.963.2High
5Kerala01JAN202394.09.8355.160.9Low
6Maharashtra01JAN202382.338.51256.962.7High
7Gujarat01JAN202378.025.6708.465.3High
8Rajasthan01JAN202366.112.7815.858.4Medium
9Uttar_Pradesh01JAN202369.717.12354.956.2Medium
10Madhya_Pradesh01JAN202370.611.4865.357.1Medium
11West_Bengal01JAN202376.314.9996.160.2Medium
12Punjab01JAN202375.87.6305.761.0Low


8.ADDITIONAL SAS PROCEDURE

proc print data=categorize_states(obs=5);

    title "Indian States Development Classification";

run;

OUTPUT:

Indian States Development Classification

ObsStateData_DateLiteracy_RateGDPPopulationIndustrial_GrowthEmployment_RateDevelopment_Level
1Andhra_Pradesh01JAN202367.413.5536.259.1Medium
2Telangana01JAN202372.814.2397.561.4Medium
3Tamil_Nadu01JAN202380.123.1768.164.8High
4Karnataka01JAN202375.621.9687.963.2High
5Kerala01JAN202394.09.8355.160.9Low

9.PROC CORR

proc corr data=indian_states;

    var Literacy_Rate GDP Population Employment_Rate;

    table Literacy_Rate*GDP;

run;


/* Note: In practice above there is an Invalid in this code Find it,Correct it and Use it /*


OUTPUT:

The CORR Procedure

1 With Variables:GDP
4 Variables:Literacy_Rate GDP Population Employment_Rate
Simple Statistics
VariableNMeanStd DevSumMinimumMaximum
GDP1217.525008.57418210.300007.6000038.50000
Literacy_Rate1275.725007.59750908.7000066.1000094.00000
Population1283.0833355.20945997.0000030.00000235.00000
Employment_Rate1260.858332.85799730.3000056.2000065.30000
Pearson Correlation Coefficients, N = 12
Prob > |r| under H0: Rho=0
 Literacy_RateGDPPopulationEmployment_Rate
GDP
0.22374
0.4845
1.00000
 
0.33029
0.2944
0.55222
0.0626





To Visit My Previous Intermediate Students Performance Dataset:Click Here
To Visit My Previous Namkeen Data Creation:Click Here
To Visit My Previous Hyd_Mall_Family Dataset:Click Here
To Visit My Previous Home Energy Consumption Dataset:Click Here



Follow Us On : 


 


--- FOLLOW OUR BLOG FOR MORE INFORMATION.

--->PLEASE DO COMMENTS AND SHARE OUR BLOG.

No comments:

Post a Comment