INDIAN STATES SOCIO-ECONOMIC DATA ANALYSIS USING DATA STEP | PROC SQL | PROC MEANS | PROC CORR | PROC UNIVARIATE | MACROS | DATE FUNCTIONS
options nocenter;
1.INDIAN STATES DATASET CREATION WITH DATE FORMATS
data indian_states;
length State $20;
format Data_Date date9.;
input State $ Literacy_Rate GDP Population Industrial_Growth Employment_Rate
Data_Date :date9.;
datalines;
Andhra_Pradesh 67.4 13.5 53 6.2 59.1 01JAN2023
Telangana 72.8 14.2 39 7.5 61.4 01JAN2023
Tamil_Nadu 80.1 23.1 76 8.1 64.8 01JAN2023
Karnataka 75.6 21.9 68 7.9 63.2 01JAN2023
Kerala 94.0 9.8 35 5.1 60.9 01JAN2023
Maharashtra 82.3 38.5 125 6.9 62.7 01JAN2023
Gujarat 78.0 25.6 70 8.4 65.3 01JAN2023
Rajasthan 66.1 12.7 81 5.8 58.4 01JAN2023
Uttar_Pradesh 69.7 17.1 235 4.9 56.2 01JAN2023
Madhya_Pradesh 70.6 11.4 86 5.3 57.1 01JAN2023
West_Bengal 76.3 14.9 99 6.1 60.2 01JAN2023
Punjab 75.8 7.6 30 5.7 61.0 01JAN2023
;
run;
proc print data=indian_states;
run;
OUTPUT:
| Obs | State | Data_Date | Literacy_Rate | GDP | Population | Industrial_Growth | Employment_Rate |
|---|---|---|---|---|---|---|---|
| 1 | Andhra_Pradesh | 01JAN2023 | 67.4 | 13.5 | 53 | 6.2 | 59.1 |
| 2 | Telangana | 01JAN2023 | 72.8 | 14.2 | 39 | 7.5 | 61.4 |
| 3 | Tamil_Nadu | 01JAN2023 | 80.1 | 23.1 | 76 | 8.1 | 64.8 |
| 4 | Karnataka | 01JAN2023 | 75.6 | 21.9 | 68 | 7.9 | 63.2 |
| 5 | Kerala | 01JAN2023 | 94.0 | 9.8 | 35 | 5.1 | 60.9 |
| 6 | Maharashtra | 01JAN2023 | 82.3 | 38.5 | 125 | 6.9 | 62.7 |
| 7 | Gujarat | 01JAN2023 | 78.0 | 25.6 | 70 | 8.4 | 65.3 |
| 8 | Rajasthan | 01JAN2023 | 66.1 | 12.7 | 81 | 5.8 | 58.4 |
| 9 | Uttar_Pradesh | 01JAN2023 | 69.7 | 17.1 | 235 | 4.9 | 56.2 |
| 10 | Madhya_Pradesh | 01JAN2023 | 70.6 | 11.4 | 86 | 5.3 | 57.1 |
| 11 | West_Bengal | 01JAN2023 | 76.3 | 14.9 | 99 | 6.1 | 60.2 |
| 12 | Punjab | 01JAN2023 | 75.8 | 7.6 | 30 | 5.7 | 61.0 |
2.DATE FUNCTIONS USING INTNX AND INTCK
data indian_states_dates;
set indian_states;
Next_Year = intnx('year', Data_Date, 1, 'same');
Months_Since = intck('month', Data_Date, '01JAN2024'd);
format Next_Year date9.;
run;
proc print data=indian_states_dates;
run;
OUTPUT:
| Obs | State | Data_Date | Literacy_Rate | GDP | Population | Industrial_Growth | Employment_Rate | Next_Year | Months_Since |
|---|---|---|---|---|---|---|---|---|---|
| 1 | Andhra_Pradesh | 01JAN2023 | 67.4 | 13.5 | 53 | 6.2 | 59.1 | 01JAN2024 | 12 |
| 2 | Telangana | 01JAN2023 | 72.8 | 14.2 | 39 | 7.5 | 61.4 | 01JAN2024 | 12 |
| 3 | Tamil_Nadu | 01JAN2023 | 80.1 | 23.1 | 76 | 8.1 | 64.8 | 01JAN2024 | 12 |
| 4 | Karnataka | 01JAN2023 | 75.6 | 21.9 | 68 | 7.9 | 63.2 | 01JAN2024 | 12 |
| 5 | Kerala | 01JAN2023 | 94.0 | 9.8 | 35 | 5.1 | 60.9 | 01JAN2024 | 12 |
| 6 | Maharashtra | 01JAN2023 | 82.3 | 38.5 | 125 | 6.9 | 62.7 | 01JAN2024 | 12 |
| 7 | Gujarat | 01JAN2023 | 78.0 | 25.6 | 70 | 8.4 | 65.3 | 01JAN2024 | 12 |
| 8 | Rajasthan | 01JAN2023 | 66.1 | 12.7 | 81 | 5.8 | 58.4 | 01JAN2024 | 12 |
| 9 | Uttar_Pradesh | 01JAN2023 | 69.7 | 17.1 | 235 | 4.9 | 56.2 | 01JAN2024 | 12 |
| 10 | Madhya_Pradesh | 01JAN2023 | 70.6 | 11.4 | 86 | 5.3 | 57.1 | 01JAN2024 | 12 |
| 11 | West_Bengal | 01JAN2023 | 76.3 | 14.9 | 99 | 6.1 | 60.2 | 01JAN2024 | 12 |
| 12 | Punjab | 01JAN2023 | 75.8 | 7.6 | 30 | 5.7 | 61.0 | 01JAN2024 | 12 |
3.PROC SQL FOR DERIVED METRICS
proc sql;
create table state_metrics as
select State,Literacy_Rate,GDP,Population,Industrial_Growth,Employment_Rate,
GDP/Population as GDP_Per_Capita format=8.2
from indian_states;
quit;
proc print data=state_metrics;
run;
OUTPUT:
| Obs | State | Literacy_Rate | GDP | Population | Industrial_Growth | Employment_Rate | GDP_Per_Capita |
|---|---|---|---|---|---|---|---|
| 1 | Andhra_Pradesh | 67.4 | 13.5 | 53 | 6.2 | 59.1 | 0.25 |
| 2 | Telangana | 72.8 | 14.2 | 39 | 7.5 | 61.4 | 0.36 |
| 3 | Tamil_Nadu | 80.1 | 23.1 | 76 | 8.1 | 64.8 | 0.30 |
| 4 | Karnataka | 75.6 | 21.9 | 68 | 7.9 | 63.2 | 0.32 |
| 5 | Kerala | 94.0 | 9.8 | 35 | 5.1 | 60.9 | 0.28 |
| 6 | Maharashtra | 82.3 | 38.5 | 125 | 6.9 | 62.7 | 0.31 |
| 7 | Gujarat | 78.0 | 25.6 | 70 | 8.4 | 65.3 | 0.37 |
| 8 | Rajasthan | 66.1 | 12.7 | 81 | 5.8 | 58.4 | 0.16 |
| 9 | Uttar_Pradesh | 69.7 | 17.1 | 235 | 4.9 | 56.2 | 0.07 |
| 10 | Madhya_Pradesh | 70.6 | 11.4 | 86 | 5.3 | 57.1 | 0.13 |
| 11 | West_Bengal | 76.3 | 14.9 | 99 | 6.1 | 60.2 | 0.15 |
| 12 | Punjab | 75.8 | 7.6 | 30 | 5.7 | 61.0 | 0.25 |
4.PROC MEANS (DESCRIPTIVE STATISTICS)
proc means data=indian_states mean min max std;
var Literacy_Rate GDP Population Industrial_Growth Employment_Rate;
run;
OUTPUT:
The MEANS Procedure
| Variable | Mean | Minimum | Maximum | Std Dev |
|---|---|---|---|---|
Literacy_Rate GDP Population Industrial_Growth Employment_Rate | 75.7250000 17.5250000 83.0833333 6.4916667 60.8583333 | 66.1000000 7.6000000 30.0000000 4.9000000 56.2000000 | 94.0000000 38.5000000 235.0000000 8.4000000 65.3000000 | 7.5975026 8.5741816 55.2094497 1.2295294 2.8579899 |
5.PROC CORR (RELATIONSHIP ANALYSIS)
proc corr data=indian_states;
var Literacy_Rate GDP Industrial_Growth Employment_Rate;
run;
OUTPUT:
The CORR Procedure
| 4 Variables: | Literacy_Rate GDP Industrial_Growth Employment_Rate |
|---|
| Simple Statistics | ||||||
|---|---|---|---|---|---|---|
| Variable | N | Mean | Std Dev | Sum | Minimum | Maximum |
| Literacy_Rate | 12 | 75.72500 | 7.59750 | 908.70000 | 66.10000 | 94.00000 |
| GDP | 12 | 17.52500 | 8.57418 | 210.30000 | 7.60000 | 38.50000 |
| Industrial_Growth | 12 | 6.49167 | 1.22953 | 77.90000 | 4.90000 | 8.40000 |
| Employment_Rate | 12 | 60.85833 | 2.85799 | 730.30000 | 56.20000 | 65.30000 |
| Pearson Correlation Coefficients, N = 12 Prob > |r| under H0: Rho=0 | ||||
|---|---|---|---|---|
| Literacy_Rate | GDP | Industrial_Growth | Employment_Rate | |
| Literacy_Rate | 1.00000 | 0.22374 0.4845 | 0.09734 0.7634 | 0.51749 0.0849 |
| GDP | 0.22374 0.4845 | 1.00000 | 0.58529 0.0456 | 0.55222 0.0626 |
| Industrial_Growth | 0.09734 0.7634 | 0.58529 0.0456 | 1.00000 | 0.86941 0.0002 |
| Employment_Rate | 0.51749 0.0849 | 0.55222 0.0626 | 0.86941 0.0002 | 1.00000 |
6.PROC UNIVARIATE (DISTRIBUTION ANALYSIS)
proc univariate data=indian_states;
var GDP Population;
histogram GDP Population;
run;
OUTPUT:
The UNIVARIATE Procedure
Variable: GDP
| Moments | |||
|---|---|---|---|
| N | 12 | Sum Weights | 12 |
| Mean | 17.525 | Sum Observations | 210.3 |
| Std Deviation | 8.57418165 | Variance | 73.5165909 |
| Skewness | 1.40366643 | Kurtosis | 2.22885838 |
| Uncorrected SS | 4494.19 | Corrected SS | 808.6825 |
| Coeff Variation | 48.9254302 | Std Error Mean | 2.47515304 |
| Basic Statistical Measures | |||
|---|---|---|---|
| Location | Variability | ||
| Mean | 17.52500 | Std Deviation | 8.57418 |
| Median | 14.55000 | Variance | 73.51659 |
| Mode | . | Range | 30.90000 |
| Interquartile Range | 10.45000 | ||
| Tests for Location: Mu0=0 | ||||
|---|---|---|---|---|
| Test | Statistic | p Value | ||
| Student's t | t | 7.08037 | Pr > |t| | <.0001 |
| Sign | M | 6 | Pr >= |M| | 0.0005 |
| Signed Rank | S | 39 | Pr >= |S| | 0.0005 |
| Quantiles (Definition 5) | |
|---|---|
| Level | Quantile |
| 100% Max | 38.50 |
| 99% | 38.50 |
| 95% | 38.50 |
| 90% | 25.60 |
| 75% Q3 | 22.50 |
| 50% Median | 14.55 |
| 25% Q1 | 12.05 |
| 10% | 9.80 |
| 5% | 7.60 |
| 1% | 7.60 |
| 0% Min | 7.60 |
| Extreme Observations | |||
|---|---|---|---|
| Lowest | Highest | ||
| Value | Obs | Value | Obs |
| 7.6 | 12 | 17.1 | 9 |
| 9.8 | 5 | 21.9 | 4 |
| 11.4 | 10 | 23.1 | 3 |
| 12.7 | 8 | 25.6 | 7 |
| 13.5 | 1 | 38.5 | 6 |
The UNIVARIATE Procedure
The UNIVARIATE Procedure
Variable: Population
| Moments | |||
|---|---|---|---|
| N | 12 | Sum Weights | 12 |
| Mean | 83.0833333 | Sum Observations | 997 |
| Std Deviation | 55.2094497 | Variance | 3048.08333 |
| Skewness | 2.07706004 | Kurtosis | 5.35833297 |
| Uncorrected SS | 116363 | Corrected SS | 33528.9167 |
| Coeff Variation | 66.4506917 | Std Error Mean | 15.9375953 |
| Basic Statistical Measures | |||
|---|---|---|---|
| Location | Variability | ||
| Mean | 83.08333 | Std Deviation | 55.20945 |
| Median | 73.00000 | Variance | 3048 |
| Mode | . | Range | 205.00000 |
| Interquartile Range | 46.50000 | ||
| Tests for Location: Mu0=0 | ||||
|---|---|---|---|---|
| Test | Statistic | p Value | ||
| Student's t | t | 5.213041 | Pr > |t| | 0.0003 |
| Sign | M | 6 | Pr >= |M| | 0.0005 |
| Signed Rank | S | 39 | Pr >= |S| | 0.0005 |
| Quantiles (Definition 5) | |
|---|---|
| Level | Quantile |
| 100% Max | 235.0 |
| 99% | 235.0 |
| 95% | 235.0 |
| 90% | 125.0 |
| 75% Q3 | 92.5 |
| 50% Median | 73.0 |
| 25% Q1 | 46.0 |
| 10% | 35.0 |
| 5% | 30.0 |
| 1% | 30.0 |
| 0% Min | 30.0 |
| Extreme Observations | |||
|---|---|---|---|
| Lowest | Highest | ||
| Value | Obs | Value | Obs |
| 30 | 12 | 81 | 8 |
| 35 | 5 | 86 | 10 |
| 39 | 2 | 99 | 11 |
| 53 | 1 | 125 | 6 |
| 68 | 4 | 235 | 9 |
The UNIVARIATE Procedure
7.MACRO FOR STATE-LEVEL CATEGORIZATION
%macro categorize_state;
data categorize_states;
set indian_states;
length Development_Level $15;
if GDP >= 20 and Literacy_Rate >= 75 then Development_Level = "High";
else if GDP >= 10 then Development_Level = "Medium";
else Development_Level = "Low";
run;
proc print data=categorize_states;
run;
%mend;
%categorize_state;
OUTPUT:
| Obs | State | Data_Date | Literacy_Rate | GDP | Population | Industrial_Growth | Employment_Rate | Development_Level |
|---|---|---|---|---|---|---|---|---|
| 1 | Andhra_Pradesh | 01JAN2023 | 67.4 | 13.5 | 53 | 6.2 | 59.1 | Medium |
| 2 | Telangana | 01JAN2023 | 72.8 | 14.2 | 39 | 7.5 | 61.4 | Medium |
| 3 | Tamil_Nadu | 01JAN2023 | 80.1 | 23.1 | 76 | 8.1 | 64.8 | High |
| 4 | Karnataka | 01JAN2023 | 75.6 | 21.9 | 68 | 7.9 | 63.2 | High |
| 5 | Kerala | 01JAN2023 | 94.0 | 9.8 | 35 | 5.1 | 60.9 | Low |
| 6 | Maharashtra | 01JAN2023 | 82.3 | 38.5 | 125 | 6.9 | 62.7 | High |
| 7 | Gujarat | 01JAN2023 | 78.0 | 25.6 | 70 | 8.4 | 65.3 | High |
| 8 | Rajasthan | 01JAN2023 | 66.1 | 12.7 | 81 | 5.8 | 58.4 | Medium |
| 9 | Uttar_Pradesh | 01JAN2023 | 69.7 | 17.1 | 235 | 4.9 | 56.2 | Medium |
| 10 | Madhya_Pradesh | 01JAN2023 | 70.6 | 11.4 | 86 | 5.3 | 57.1 | Medium |
| 11 | West_Bengal | 01JAN2023 | 76.3 | 14.9 | 99 | 6.1 | 60.2 | Medium |
| 12 | Punjab | 01JAN2023 | 75.8 | 7.6 | 30 | 5.7 | 61.0 | Low |
8.ADDITIONAL SAS PROCEDURE
proc print data=categorize_states(obs=5);
title "Indian States Development Classification";
run;
OUTPUT:
| Obs | State | Data_Date | Literacy_Rate | GDP | Population | Industrial_Growth | Employment_Rate | Development_Level |
|---|---|---|---|---|---|---|---|---|
| 1 | Andhra_Pradesh | 01JAN2023 | 67.4 | 13.5 | 53 | 6.2 | 59.1 | Medium |
| 2 | Telangana | 01JAN2023 | 72.8 | 14.2 | 39 | 7.5 | 61.4 | Medium |
| 3 | Tamil_Nadu | 01JAN2023 | 80.1 | 23.1 | 76 | 8.1 | 64.8 | High |
| 4 | Karnataka | 01JAN2023 | 75.6 | 21.9 | 68 | 7.9 | 63.2 | High |
| 5 | Kerala | 01JAN2023 | 94.0 | 9.8 | 35 | 5.1 | 60.9 | Low |
9.PROC CORR
proc corr data=indian_states;
var Literacy_Rate GDP Population Employment_Rate;
table Literacy_Rate*GDP;
run;
/* Note: In practice above there is an Invalid in this code Find it,Correct it and Use it /*
The CORR Procedure
| 1 With Variables: | GDP |
|---|---|
| 4 Variables: | Literacy_Rate GDP Population Employment_Rate |
| Simple Statistics | ||||||
|---|---|---|---|---|---|---|
| Variable | N | Mean | Std Dev | Sum | Minimum | Maximum |
| GDP | 12 | 17.52500 | 8.57418 | 210.30000 | 7.60000 | 38.50000 |
| Literacy_Rate | 12 | 75.72500 | 7.59750 | 908.70000 | 66.10000 | 94.00000 |
| Population | 12 | 83.08333 | 55.20945 | 997.00000 | 30.00000 | 235.00000 |
| Employment_Rate | 12 | 60.85833 | 2.85799 | 730.30000 | 56.20000 | 65.30000 |
| Pearson Correlation Coefficients, N = 12 Prob > |r| under H0: Rho=0 | ||||
|---|---|---|---|---|
| Literacy_Rate | GDP | Population | Employment_Rate | |
| GDP | 0.22374 0.4845 | 1.00000 | 0.33029 0.2944 | 0.55222 0.0626 |
No comments:
Post a Comment