INDIAN RIVERS DATA ANALYSIS USING DATA STEP | PROC SQL | PROC MEANS | PROC SORT | PROC UNIVARIATE | PROC SGPLOT AND MACRO AUTOMATION
1. CREATING THE DATASET USING THE DATA STEP
options nocenter nodate nonumber;
data work.indian_rivers;
length River_Name $20 Origin_State $20 End_Location $25;
infile datalines dsd;
input River_Name $ Length_KM Origin_State $ End_Location $ No_of_States
Annual_Flow_BCM;
datalines;
Ganga,2525,Uttarakhand,Bay_of_Bengal,11,525
Godavari,1465,Maharashtra,Bay_of_Bengal,7,110
Krishna,1400,Maharashtra,Bay_of_Bengal,5,78
Yamuna,1376,Uttarakhand,Ganga_Confluence,6,93
Brahmaputra,2900,Arunachal_Pradesh,Bay_of_Bengal,4,537
Narmada,1312,Madhya_Pradesh,Arabian_Sea,3,96
Tapti,724,Madhya_Pradesh,Arabian_Sea,3,37
Mahanadi,858,Chhattisgarh,Bay_of_Bengal,5,66
Cauvery,805,Karnataka,Bay_of_Bengal,3,21
Sabarmati,371,Rajasthan,Arabian_Sea,2,12
Beas,470,Himachal_Pradesh,Sutlej_River,2,15
Sutlej,1450,Tibet,Indus_River,4,77
;
run;
proc print data=work.indian_rivers;
title "INDIAN RIVERS DATASET";
run;
OUTPUT:
| Obs | River_Name | Origin_State | End_Location | Length_KM | No_of_States | Annual_Flow_BCM |
|---|---|---|---|---|---|---|
| 1 | Ganga | Uttarakhand | Bay_of_Bengal | 2525 | 11 | 525 |
| 2 | Godavari | Maharashtra | Bay_of_Bengal | 1465 | 7 | 110 |
| 3 | Krishna | Maharashtra | Bay_of_Bengal | 1400 | 5 | 78 |
| 4 | Yamuna | Uttarakhand | Ganga_Confluence | 1376 | 6 | 93 |
| 5 | Brahmaputra | Arunachal_Pradesh | Bay_of_Bengal | 2900 | 4 | 537 |
| 6 | Narmada | Madhya_Pradesh | Arabian_Sea | 1312 | 3 | 96 |
| 7 | Tapti | Madhya_Pradesh | Arabian_Sea | 724 | 3 | 37 |
| 8 | Mahanadi | Chhattisgarh | Bay_of_Bengal | 858 | 5 | 66 |
| 9 | Cauvery | Karnataka | Bay_of_Bengal | 805 | 3 | 21 |
| 10 | Sabarmati | Rajasthan | Arabian_Sea | 371 | 2 | 12 |
| 11 | Beas | Himachal_Pradesh | Sutlej_River | 470 | 2 | 15 |
| 12 | Sutlej | Tibet | Indus_River | 1450 | 4 | 77 |
2. DESCRIPTIVE STATISTICS USING PROC MEANS
proc means data=work.indian_rivers mean min max maxdec=2;
var Length_KM No_of_States Annual_Flow_BCM;
title "SUMMARY STATISTICS FOR NUMERIC VARIABLES";
run;
OUTPUT:
The MEANS Procedure
| Variable | Mean | Minimum | Maximum |
|---|---|---|---|
Length_KM No_of_States Annual_Flow_BCM | 1304.67 4.58 138.92 | 371.00 2.00 12.00 | 2900.00 11.00 537.00 |
3. DATA SORTING USING PROC SORT
proc sort data=work.indian_rivers out=work.rivers_sorted;
by descending Annual_Flow_BCM Length_KM;
run;
proc print data=work.rivers_sorted;
title "RIVERS SORTED BY DESCENDING FLOW AND ASCENDING LENGTH";
run;
OUTPUT:
| Obs | River_Name | Origin_State | End_Location | Length_KM | No_of_States | Annual_Flow_BCM |
|---|---|---|---|---|---|---|
| 1 | Brahmaputra | Arunachal_Pradesh | Bay_of_Bengal | 2900 | 4 | 537 |
| 2 | Ganga | Uttarakhand | Bay_of_Bengal | 2525 | 11 | 525 |
| 3 | Godavari | Maharashtra | Bay_of_Bengal | 1465 | 7 | 110 |
| 4 | Narmada | Madhya_Pradesh | Arabian_Sea | 1312 | 3 | 96 |
| 5 | Yamuna | Uttarakhand | Ganga_Confluence | 1376 | 6 | 93 |
| 6 | Krishna | Maharashtra | Bay_of_Bengal | 1400 | 5 | 78 |
| 7 | Sutlej | Tibet | Indus_River | 1450 | 4 | 77 |
| 8 | Mahanadi | Chhattisgarh | Bay_of_Bengal | 858 | 5 | 66 |
| 9 | Tapti | Madhya_Pradesh | Arabian_Sea | 724 | 3 | 37 |
| 10 | Cauvery | Karnataka | Bay_of_Bengal | 805 | 3 | 21 |
| 11 | Beas | Himachal_Pradesh | Sutlej_River | 470 | 2 | 15 |
| 12 | Sabarmati | Rajasthan | Arabian_Sea | 371 | 2 | 12 |
4. VISUALIZING RIVER DATA USING PROC SGPLOT
proc sgplot data=work.indian_rivers;
title "LENGTH vs ANNUAL FLOW OF MAJOR INDIAN RIVERS";
scatter x=Length_KM y=Annual_Flow_BCM
/ datalabel=River_Name markerattrs=(symbol=circlefilled);
xaxis label="River Length (KM)";
yaxis label="Annual Flow (Billion Cubic Meters)";
run;
OUTPUT:
5. USING PROC SQL FOR DATA MANIPULATION
proc sql;
title "RIVERS LONGER THAN 1000 KM";
select River_Name, Length_KM, Origin_State, Annual_Flow_BCM
from work.indian_rivers
where Length_KM > 1000
order by Annual_Flow_BCM desc;
quit;
OUTPUT:
| River_Name | Length_KM | Origin_State | Annual_Flow_BCM |
|---|---|---|---|
| Brahmaputra | 2900 | Arunachal_Pradesh | 537 |
| Ganga | 2525 | Uttarakhand | 525 |
| Godavari | 1465 | Maharashtra | 110 |
| Narmada | 1312 | Madhya_Pradesh | 96 |
| Yamuna | 1376 | Uttarakhand | 93 |
| Krishna | 1400 | Maharashtra | 78 |
| Sutlej | 1450 | Tibet | 77 |
6. MACRO AUTOMATION FOR REUSABLE ANALYSIS
/*Macro Example: Compute average flow for selected rivers*/
%macro river_flow(rivername);
proc sql noprint;
select mean(Annual_Flow_BCM) into :avgflow
from work.indian_rivers
where River_Name="&rivername";
quit;
%put Average Flow for &rivername = &avgflow Billion Cubic Meters;
%mend;
%river_flow(Ganga);
LOG:
%river_flow(Godavari);
LOG:
%river_flow(Narmada);
LOG:
7. DISTRIBUTION ANALYSIS USING PROC UNIVARIATE
proc univariate data=work.indian_rivers normal;
var Annual_Flow_BCM;
title "DISTRIBUTION ANALYSIS OF RIVER FLOW (BCM)";
run;
OUTPUT:
The UNIVARIATE Procedure
Variable: Annual_Flow_BCM
| Moments | |||
|---|---|---|---|
| N | 12 | Sum Weights | 12 |
| Mean | 138.916667 | Sum Observations | 1667 |
| Std Deviation | 186.092088 | Variance | 34630.2652 |
| Skewness | 1.9118054 | Kurtosis | 2.29904603 |
| Uncorrected SS | 612507 | Corrected SS | 380932.917 |
| Coeff Variation | 133.959511 | Std Error Mean | 53.7201585 |
| Basic Statistical Measures | |||
|---|---|---|---|
| Location | Variability | ||
| Mean | 138.9167 | Std Deviation | 186.09209 |
| Median | 77.5000 | Variance | 34630 |
| Mode | . | Range | 525.00000 |
| Interquartile Range | 74.00000 | ||
| Tests for Location: Mu0=0 | ||||
|---|---|---|---|---|
| Test | Statistic | p Value | ||
| Student's t | t | 2.585932 | Pr > |t| | 0.0253 |
| Sign | M | 6 | Pr >= |M| | 0.0005 |
| Signed Rank | S | 39 | Pr >= |S| | 0.0005 |
| Tests for Normality | ||||
|---|---|---|---|---|
| Test | Statistic | p Value | ||
| Shapiro-Wilk | W | 0.629754 | Pr < W | 0.0002 |
| Kolmogorov-Smirnov | D | 0.395076 | Pr > D | <0.0100 |
| Cramer-von Mises | W-Sq | 0.382163 | Pr > W-Sq | <0.0050 |
| Anderson-Darling | A-Sq | 2.042281 | Pr > A-Sq | <0.0050 |
| Quantiles (Definition 5) | |
|---|---|
| Level | Quantile |
| 100% Max | 537.0 |
| 99% | 537.0 |
| 95% | 537.0 |
| 90% | 525.0 |
| 75% Q3 | 103.0 |
| 50% Median | 77.5 |
| 25% Q1 | 29.0 |
| 10% | 15.0 |
| 5% | 12.0 |
| 1% | 12.0 |
| 0% Min | 12.0 |
| Extreme Observations | |||
|---|---|---|---|
| Lowest | Highest | ||
| Value | Obs | Value | Obs |
| 12 | 10 | 93 | 4 |
| 15 | 11 | 96 | 6 |
| 21 | 9 | 110 | 2 |
| 37 | 7 | 525 | 1 |
| 66 | 8 | 537 | 5 |
8. DERIVING NEW VARIABLES (Flow Efficiency)
data work.river_efficiency;
set work.indian_rivers;
Flow_Efficiency = Annual_Flow_BCM / Length_KM;
run;
proc print data=work.river_efficiency;
title "DERIVED VARIABLE: FLOW EFFICIENCY (BCM PER KM)";
run;
OUTPUT:
| Obs | River_Name | Origin_State | End_Location | Length_KM | No_of_States | Annual_Flow_BCM | Flow_Efficiency |
|---|---|---|---|---|---|---|---|
| 1 | Ganga | Uttarakhand | Bay_of_Bengal | 2525 | 11 | 525 | 0.20792 |
| 2 | Godavari | Maharashtra | Bay_of_Bengal | 1465 | 7 | 110 | 0.07509 |
| 3 | Krishna | Maharashtra | Bay_of_Bengal | 1400 | 5 | 78 | 0.05571 |
| 4 | Yamuna | Uttarakhand | Ganga_Confluence | 1376 | 6 | 93 | 0.06759 |
| 5 | Brahmaputra | Arunachal_Pradesh | Bay_of_Bengal | 2900 | 4 | 537 | 0.18517 |
| 6 | Narmada | Madhya_Pradesh | Arabian_Sea | 1312 | 3 | 96 | 0.07317 |
| 7 | Tapti | Madhya_Pradesh | Arabian_Sea | 724 | 3 | 37 | 0.05110 |
| 8 | Mahanadi | Chhattisgarh | Bay_of_Bengal | 858 | 5 | 66 | 0.07692 |
| 9 | Cauvery | Karnataka | Bay_of_Bengal | 805 | 3 | 21 | 0.02609 |
| 10 | Sabarmati | Rajasthan | Arabian_Sea | 371 | 2 | 12 | 0.03235 |
| 11 | Beas | Himachal_Pradesh | Sutlej_River | 470 | 2 | 15 | 0.03191 |
| 12 | Sutlej | Tibet | Indus_River | 1450 | 4 | 77 | 0.05310 |
9. ADVANCED SQL GROUPING (BY TERMINATION LOCATION)
proc sql;
title "TOTAL FLOW CONTRIBUTION BY END LOCATION";
select End_Location,
count(*) as River_Count,
sum(Annual_Flow_BCM) as Total_Flow_BCM format=8.2
from work.indian_rivers
group by End_Location
order by Total_Flow_BCM desc;
quit;
OUTPUT:
| Obs | River_Name | Origin_State | End_Location | Length_KM | No_of_States | Annual_Flow_BCM | Flow_Efficiency |
|---|---|---|---|---|---|---|---|
| 1 | Ganga | Uttarakhand | Bay_of_Bengal | 2525 | 11 | 525 | 0.20792 |
| 2 | Godavari | Maharashtra | Bay_of_Bengal | 1465 | 7 | 110 | 0.07509 |
| 3 | Krishna | Maharashtra | Bay_of_Bengal | 1400 | 5 | 78 | 0.05571 |
| 4 | Yamuna | Uttarakhand | Ganga_Confluence | 1376 | 6 | 93 | 0.06759 |
| 5 | Brahmaputra | Arunachal_Pradesh | Bay_of_Bengal | 2900 | 4 | 537 | 0.18517 |
| 6 | Narmada | Madhya_Pradesh | Arabian_Sea | 1312 | 3 | 96 | 0.07317 |
| 7 | Tapti | Madhya_Pradesh | Arabian_Sea | 724 | 3 | 37 | 0.05110 |
| 8 | Mahanadi | Chhattisgarh | Bay_of_Bengal | 858 | 5 | 66 | 0.07692 |
| 9 | Cauvery | Karnataka | Bay_of_Bengal | 805 | 3 | 21 | 0.02609 |
| 10 | Sabarmati | Rajasthan | Arabian_Sea | 371 | 2 | 12 | 0.03235 |
| 11 | Beas | Himachal_Pradesh | Sutlej_River | 470 | 2 | 15 | 0.03191 |
| 12 | Sutlej | Tibet | Indus_River | 1450 | 4 | 77 | 0.05310 |
10. COMPARATIVE VISUALIZATION BY END LOCATION
proc sgplot data=work.indian_rivers;
vbar End_Location / response=Annual_Flow_BCM stat=sum datalabel;
title "TOTAL ANNUAL FLOW CONTRIBUTION BY TERMINATION BASIN";
yaxis label="Total Flow (BCM)";
xaxis label="End Location / Basin";
run;
OUTPUT:
11. MACRO FOR AUTOMATED GRAPH GENERATION
%macro plot_numeric(var);
proc sgplot data=work.indian_rivers;
vbar River_Name / response=&var datalabel;
title "BAR CHART OF &var FOR INDIAN RIVERS";
yaxis label="&var";
run;
%mend;
%plot_numeric(Length_KM);
OUTPUT:
%plot_numeric(Annual_Flow_BCM);
No comments:
Post a Comment