FAMOUS NATURAL LANDSCAPES DATA ANALYSIS USING SAS DATA STEP | PROC SQL | PROC FREQ | PROC MEANS | PROC UNIVARIATE | MACROS | DATE FUNCTIONS (MDY | INTCK | INTNX) | MERGE | APPEND | TRANSPOSE
options nocenter;
1.LANDSCAPES BASE DATASET CREATION
data landscapes_base;
length Landscape_Name $30 Location $25 Risk_Level $10;
format Established_Date date9.;
input Landscape_Name $ Location $ Height Area Tourists Risk_Level $ Established_Date :date9.;
datalines;
MountEverest Nepal 8848 141 800 High 29MAY1953
GrandCanyon USA 2400 4926 6000 Medium 26FEB1919
NiagaraFalls USA_Canada 51 17 9000 Medium 01JAN1885
VictoriaFalls Zambia_Zimbabwe 108 1708 1500 High 18NOV1855
GreatBarrierReef Australia 0 344400 2000 Medium 11OCT1981
SaharaDesert Africa 450 9100000 1000 High 01JAN1920
AmazonRainforest Brazil 300 5500000 3500 High 05SEP1930
MountFuji Japan 3776 1227 3000 Medium 22JUN1934
YosemiteValley USA 1219 3029 4000 Low 01OCT1890
IguazuFalls Argentina_Brazil 82 2610 1400 Medium 02DEC1934
TableMountain SouthAfrica 1086 221 4500 Low 01JAN1923
DeadSea Israel_Jordan -430 605 1200 Medium 01JAN1948
;
run;
proc print data=landscapes_base;
run;
OUTPUT:
| Obs | Landscape_Name | Location | Risk_Level | Established_Date | Height | Area | Tourists |
|---|---|---|---|---|---|---|---|
| 1 | MountEverest | Nepal | High | 29MAY1953 | 8848 | 141 | 800 |
| 2 | GrandCanyon | USA | Medium | 26FEB1919 | 2400 | 4926 | 6000 |
| 3 | NiagaraFalls | USA_Canada | Medium | 01JAN1885 | 51 | 17 | 9000 |
| 4 | VictoriaFalls | Zambia_Zimbabwe | High | 18NOV1855 | 108 | 1708 | 1500 |
| 5 | GreatBarrierReef | Australia | Medium | 11OCT1981 | 0 | 344400 | 2000 |
| 6 | SaharaDesert | Africa | High | 01JAN1920 | 450 | 9100000 | 1000 |
| 7 | AmazonRainforest | Brazil | High | 05SEP1930 | 300 | 5500000 | 3500 |
| 8 | MountFuji | Japan | Medium | 22JUN1934 | 3776 | 1227 | 3000 |
| 9 | YosemiteValley | USA | Low | 01OCT1890 | 1219 | 3029 | 4000 |
| 10 | IguazuFalls | Argentina_Brazil | Medium | 02DEC1934 | 82 | 2610 | 1400 |
| 11 | TableMountain | SouthAfrica | Low | 01JAN1923 | 1086 | 221 | 4500 |
| 12 | DeadSea | Israel_Jordan | Medium | 01JAN1948 | -430 | 605 | 1200 |
2.USING DATE FUNCTIONS (MDY, INTCK, INTNX)
2.1 Creating Derived Date Variables
data landscapes_dates;
set landscapes_base;
Review_Date = mdy(1,1,2025);
Years_Since_Established = intck('year', Established_Date, Review_Date);
Next_Review = intnx('year', Established_Date, 100, 'same');
format Review_Date Next_Review date9.;
run;
proc print data=landscapes_dates;
run;
OUTPUT:
| Obs | Landscape_Name | Location | Risk_Level | Established_Date | Height | Area | Tourists | Review_Date | Years_Since_Established | Next_Review |
|---|---|---|---|---|---|---|---|---|---|---|
| 1 | MountEverest | Nepal | High | 29MAY1953 | 8848 | 141 | 800 | 01JAN2025 | 72 | 29MAY2053 |
| 2 | GrandCanyon | USA | Medium | 26FEB1919 | 2400 | 4926 | 6000 | 01JAN2025 | 106 | 26FEB2019 |
| 3 | NiagaraFalls | USA_Canada | Medium | 01JAN1885 | 51 | 17 | 9000 | 01JAN2025 | 140 | 01JAN1985 |
| 4 | VictoriaFalls | Zambia_Zimbabwe | High | 18NOV1855 | 108 | 1708 | 1500 | 01JAN2025 | 170 | 18NOV1955 |
| 5 | GreatBarrierReef | Australia | Medium | 11OCT1981 | 0 | 344400 | 2000 | 01JAN2025 | 44 | 11OCT2081 |
| 6 | SaharaDesert | Africa | High | 01JAN1920 | 450 | 9100000 | 1000 | 01JAN2025 | 105 | 01JAN2020 |
| 7 | AmazonRainforest | Brazil | High | 05SEP1930 | 300 | 5500000 | 3500 | 01JAN2025 | 95 | 05SEP2030 |
| 8 | MountFuji | Japan | Medium | 22JUN1934 | 3776 | 1227 | 3000 | 01JAN2025 | 91 | 22JUN2034 |
| 9 | YosemiteValley | USA | Low | 01OCT1890 | 1219 | 3029 | 4000 | 01JAN2025 | 135 | 01OCT1990 |
| 10 | IguazuFalls | Argentina_Brazil | Medium | 02DEC1934 | 82 | 2610 | 1400 | 01JAN2025 | 91 | 02DEC2034 |
| 11 | TableMountain | SouthAfrica | Low | 01JAN1923 | 1086 | 221 | 4500 | 01JAN2025 | 102 | 01JAN2023 |
| 12 | DeadSea | Israel_Jordan | Medium | 01JAN1948 | -430 | 605 | 1200 | 01JAN2025 | 77 | 01JAN2048 |
3.PROC SQL – DATA SELECTION AND DERIVATIONS
proc sql;
create table landscapes_sql as
select Landscape_Name,Location,Height,Area,Tourists,Risk_Level,Years_Since_Established,
case
when Tourists > 5000 then 'Very High'
when Tourists between 2000 and 5000 then 'High'
else 'Moderate'
end as Tourist_Pressure
from landscapes_dates;
quit;
proc print data=landscapes_sql;
run;
OUTPUT:
| Obs | Landscape_Name | Location | Height | Area | Tourists | Risk_Level | Years_Since_Established | Tourist_Pressure |
|---|---|---|---|---|---|---|---|---|
| 1 | MountEverest | Nepal | 8848 | 141 | 800 | High | 72 | Moderate |
| 2 | GrandCanyon | USA | 2400 | 4926 | 6000 | Medium | 106 | Very High |
| 3 | NiagaraFalls | USA_Canada | 51 | 17 | 9000 | Medium | 140 | Very High |
| 4 | VictoriaFalls | Zambia_Zimbabwe | 108 | 1708 | 1500 | High | 170 | Moderate |
| 5 | GreatBarrierReef | Australia | 0 | 344400 | 2000 | Medium | 44 | High |
| 6 | SaharaDesert | Africa | 450 | 9100000 | 1000 | High | 105 | Moderate |
| 7 | AmazonRainforest | Brazil | 300 | 5500000 | 3500 | High | 95 | High |
| 8 | MountFuji | Japan | 3776 | 1227 | 3000 | Medium | 91 | High |
| 9 | YosemiteValley | USA | 1219 | 3029 | 4000 | Low | 135 | High |
| 10 | IguazuFalls | Argentina_Brazil | 82 | 2610 | 1400 | Medium | 91 | Moderate |
| 11 | TableMountain | SouthAfrica | 1086 | 221 | 4500 | Low | 102 | High |
| 12 | DeadSea | Israel_Jordan | -430 | 605 | 1200 | Medium | 77 | Moderate |
4.MACRO FOR SAFETY ZONE CLASSIFICATION
%macro safety_zone;
data landscapes_safety;
set landscapes_sql;
length Safety_Zone $12.;
if Risk_Level='Low' and Tourists < 5000 then Safety_Zone='SAFE';
else if Risk_Level='Medium' then Safety_Zone='CAUTION';
else Safety_Zone='DANGER';
run;
proc print data=landscapes_safety;
run;
%mend;
%safety_zone;
OUTPUT:
| Obs | Landscape_Name | Location | Height | Area | Tourists | Risk_Level | Years_Since_Established | Tourist_Pressure | SAFETY_ZONE |
|---|---|---|---|---|---|---|---|---|---|
| 1 | MountEverest | Nepal | 8848 | 141 | 800 | High | 72 | Moderate | DANGER |
| 2 | GrandCanyon | USA | 2400 | 4926 | 6000 | Medium | 106 | Very High | CAUTION |
| 3 | NiagaraFalls | USA_Canada | 51 | 17 | 9000 | Medium | 140 | Very High | CAUTION |
| 4 | VictoriaFalls | Zambia_Zimbabwe | 108 | 1708 | 1500 | High | 170 | Moderate | DANGER |
| 5 | GreatBarrierReef | Australia | 0 | 344400 | 2000 | Medium | 44 | High | CAUTION |
| 6 | SaharaDesert | Africa | 450 | 9100000 | 1000 | High | 105 | Moderate | DANGER |
| 7 | AmazonRainforest | Brazil | 300 | 5500000 | 3500 | High | 95 | High | DANGER |
| 8 | MountFuji | Japan | 3776 | 1227 | 3000 | Medium | 91 | High | CAUTION |
| 9 | YosemiteValley | USA | 1219 | 3029 | 4000 | Low | 135 | High | SAFE |
| 10 | IguazuFalls | Argentina_Brazil | 82 | 2610 | 1400 | Medium | 91 | Moderate | CAUTION |
| 11 | TableMountain | SouthAfrica | 1086 | 221 | 4500 | Low | 102 | High | SAFE |
| 12 | DeadSea | Israel_Jordan | -430 | 605 | 1200 | Medium | 77 | Moderate | CAUTION |
5.PROC FREQ – RISK & SAFETY DISTRIBUTION
proc freq data=landscapes_safety;
tables Risk_Level Safety_Zone Tourist_Pressure;
run;
OUTPUT:
The FREQ Procedure
| Risk_Level | Frequency | Percent | Cumulative Frequency | Cumulative Percent |
|---|---|---|---|---|
| High | 4 | 33.33 | 4 | 33.33 |
| Low | 2 | 16.67 | 6 | 50.00 |
| Medium | 6 | 50.00 | 12 | 100.00 |
| SAFETY_ZONE | Frequency | Percent | Cumulative Frequency | Cumulative Percent |
|---|---|---|---|---|
| CAUTION | 6 | 50.00 | 6 | 50.00 |
| DANGER | 4 | 33.33 | 10 | 83.33 |
| SAFE | 2 | 16.67 | 12 | 100.00 |
| Tourist_Pressure | Frequency | Percent | Cumulative Frequency | Cumulative Percent |
|---|---|---|---|---|
| High | 5 | 41.67 | 5 | 41.67 |
| Moderate | 5 | 41.67 | 10 | 83.33 |
| Very High | 2 | 16.67 | 12 | 100.00 |
6.PROC MEANS – NUMERICAL SUMMARY
proc means data=landscapes_safety mean min max;
var Height Area Tourists Years_Since_Established;
run;
OUTPUT:
The MEANS Procedure
| Variable | Mean | Minimum | Maximum |
|---|---|---|---|
Height Area Tourists Years_Since_Established | 1490.83 1246573.67 3158.33 102.3333333 | -430.0000000 17.0000000 800.0000000 44.0000000 | 8848.00 9100000.00 9000.00 170.0000000 |
7.PROC UNIVARIATE – DISTRIBUTION ANALYSIS
proc univariate data=landscapes_safety;
var Tourists Area;
histogram Tourists Area;
run;
OUTPUT:
The UNIVARIATE Procedure
Variable: Tourists
| Moments | |||
|---|---|---|---|
| N | 12 | Sum Weights | 12 |
| Mean | 3158.33333 | Sum Observations | 37900 |
| Std Deviation | 2451.14383 | Variance | 6008106.06 |
| Skewness | 1.35641286 | Kurtosis | 1.72775908 |
| Uncorrected SS | 185790000 | Corrected SS | 66089166.7 |
| Coeff Variation | 77.6087755 | Std Error Mean | 707.584274 |
| Basic Statistical Measures | |||
|---|---|---|---|
| Location | Variability | ||
| Mean | 3158.333 | Std Deviation | 2451 |
| Median | 2500.000 | Variance | 6008106 |
| Mode | . | Range | 8200 |
| Interquartile Range | 2950 | ||
| Tests for Location: Mu0=0 | ||||
|---|---|---|---|---|
| Test | Statistic | p Value | ||
| Student's t | t | 4.463544 | Pr > |t| | 0.0010 |
| Sign | M | 6 | Pr >= |M| | 0.0005 |
| Signed Rank | S | 39 | Pr >= |S| | 0.0005 |
| Quantiles (Definition 5) | |
|---|---|
| Level | Quantile |
| 100% Max | 9000 |
| 99% | 9000 |
| 95% | 9000 |
| 90% | 6000 |
| 75% Q3 | 4250 |
| 50% Median | 2500 |
| 25% Q1 | 1300 |
| 10% | 1000 |
| 5% | 800 |
| 1% | 800 |
| 0% Min | 800 |
| Extreme Observations | |||
|---|---|---|---|
| Lowest | Highest | ||
| Value | Obs | Value | Obs |
| 800 | 1 | 3500 | 7 |
| 1000 | 6 | 4000 | 9 |
| 1200 | 12 | 4500 | 11 |
| 1400 | 10 | 6000 | 2 |
| 1500 | 4 | 9000 | 3 |
The UNIVARIATE Procedure
The UNIVARIATE Procedure
Variable: Area
| Moments | |||
|---|---|---|---|
| N | 12 | Sum Weights | 12 |
| Mean | 1246573.67 | Sum Observations | 14958884 |
| Std Deviation | 2931510.71 | Variance | 8.59376E12 |
| Skewness | 2.35228821 | Kurtosis | 4.82890991 |
| Uncorrected SS | 1.13179E14 | Corrected SS | 9.45313E13 |
| Coeff Variation | 235.165461 | Std Error Mean | 846254.249 |
| Basic Statistical Measures | |||
|---|---|---|---|
| Location | Variability | ||
| Mean | 1246574 | Std Deviation | 2931511 |
| Median | 2159 | Variance | 8.59376E12 |
| Mode | . | Range | 9099983 |
| Interquartile Range | 174250 | ||
| Tests for Location: Mu0=0 | ||||
|---|---|---|---|---|
| Test | Statistic | p Value | ||
| Student's t | t | 1.473049 | Pr > |t| | 0.1688 |
| Sign | M | 6 | Pr >= |M| | 0.0005 |
| Signed Rank | S | 39 | Pr >= |S| | 0.0005 |
| Quantiles (Definition 5) | |
|---|---|
| Level | Quantile |
| 100% Max | 9100000 |
| 99% | 9100000 |
| 95% | 9100000 |
| 90% | 5500000 |
| 75% Q3 | 174663 |
| 50% Median | 2159 |
| 25% Q1 | 413 |
| 10% | 141 |
| 5% | 17 |
| 1% | 17 |
| 0% Min | 17 |
| Extreme Observations | |||
|---|---|---|---|
| Lowest | Highest | ||
| Value | Obs | Value | Obs |
| 17 | 3 | 3029 | 9 |
| 141 | 1 | 4926 | 2 |
| 221 | 11 | 344400 | 5 |
| 605 | 12 | 5500000 | 7 |
| 1227 | 8 | 9100000 | 6 |
The UNIVARIATE Procedure
8.PROC APPEND – ADDITIONAL LANDSCAPES
data landscapes_new;
length Landscape_Name $30 Location $25 Risk_Level $10;
input Landscape_Name $ Location $ Height Area Tourists Risk_Level $ Established_Date :date9.;
datalines;
AngelFalls Venezuela 979 30000 900 Medium 16NOV1933
;
run;
proc print data=landscapes_new;
run;
OUTPUT:
| Obs | Landscape_Name | Location | Risk_Level | Height | Area | Tourists | Established_Date |
|---|---|---|---|---|---|---|---|
| 1 | AngelFalls | Venezuela | Medium | 979 | 30000 | 900 | -9542 |
proc append base=landscapes_base
data=landscapes_new force;
run;
proc print data=landscapes_base;
run;
OUTPUT:
| Obs | Landscape_Name | Location | Risk_Level | Established_Date | Height | Area | Tourists |
|---|---|---|---|---|---|---|---|
| 1 | MountEverest | Nepal | High | 29MAY1953 | 8848 | 141 | 800 |
| 2 | GrandCanyon | USA | Medium | 26FEB1919 | 2400 | 4926 | 6000 |
| 3 | NiagaraFalls | USA_Canada | Medium | 01JAN1885 | 51 | 17 | 9000 |
| 4 | VictoriaFalls | Zambia_Zimbabwe | High | 18NOV1855 | 108 | 1708 | 1500 |
| 5 | GreatBarrierReef | Australia | Medium | 11OCT1981 | 0 | 344400 | 2000 |
| 6 | SaharaDesert | Africa | High | 01JAN1920 | 450 | 9100000 | 1000 |
| 7 | AmazonRainforest | Brazil | High | 05SEP1930 | 300 | 5500000 | 3500 |
| 8 | MountFuji | Japan | Medium | 22JUN1934 | 3776 | 1227 | 3000 |
| 9 | YosemiteValley | USA | Low | 01OCT1890 | 1219 | 3029 | 4000 |
| 10 | IguazuFalls | Argentina_Brazil | Medium | 02DEC1934 | 82 | 2610 | 1400 |
| 11 | TableMountain | SouthAfrica | Low | 01JAN1923 | 1086 | 221 | 4500 |
| 12 | DeadSea | Israel_Jordan | Medium | 01JAN1948 | -430 | 605 | 1200 |
| 13 | AngelFalls | Venezuela | Medium | 16NOV1933 | 979 | 30000 | 900 |
9.MERGE – COMBINING SAFETY & DATE INFO
proc sort data=landscapes_base; by Landscape_Name; run;
proc print data=landscapes_base;
run;
OUTPUT:
| Obs | Landscape_Name | Location | Risk_Level | Established_Date | Height | Area | Tourists |
|---|---|---|---|---|---|---|---|
| 1 | AmazonRainforest | Brazil | High | 05SEP1930 | 300 | 5500000 | 3500 |
| 2 | AngelFalls | Venezuela | Medium | 16NOV1933 | 979 | 30000 | 900 |
| 3 | DeadSea | Israel_Jordan | Medium | 01JAN1948 | -430 | 605 | 1200 |
| 4 | GrandCanyon | USA | Medium | 26FEB1919 | 2400 | 4926 | 6000 |
| 5 | GreatBarrierReef | Australia | Medium | 11OCT1981 | 0 | 344400 | 2000 |
| 6 | IguazuFalls | Argentina_Brazil | Medium | 02DEC1934 | 82 | 2610 | 1400 |
| 7 | MountEverest | Nepal | High | 29MAY1953 | 8848 | 141 | 800 |
| 8 | MountFuji | Japan | Medium | 22JUN1934 | 3776 | 1227 | 3000 |
| 9 | NiagaraFalls | USA_Canada | Medium | 01JAN1885 | 51 | 17 | 9000 |
| 10 | SaharaDesert | Africa | High | 01JAN1920 | 450 | 9100000 | 1000 |
| 11 | TableMountain | SouthAfrica | Low | 01JAN1923 | 1086 | 221 | 4500 |
| 12 | VictoriaFalls | Zambia_Zimbabwe | High | 18NOV1855 | 108 | 1708 | 1500 |
| 13 | YosemiteValley | USA | Low | 01OCT1890 | 1219 | 3029 | 4000 |
proc sort data=landscapes_safety; by Landscape_Name; run;
proc print data=landscapes_safety;
run;
OUTPUT:
| Obs | Landscape_Name | Location | Height | Area | Tourists | Risk_Level | Years_Since_Established | Tourist_Pressure | SAFETY_ZONE |
|---|---|---|---|---|---|---|---|---|---|
| 1 | AmazonRainforest | Brazil | 300 | 5500000 | 3500 | High | 95 | High | DANGER |
| 2 | DeadSea | Israel_Jordan | -430 | 605 | 1200 | Medium | 77 | Moderate | CAUTION |
| 3 | GrandCanyon | USA | 2400 | 4926 | 6000 | Medium | 106 | Very High | CAUTION |
| 4 | GreatBarrierReef | Australia | 0 | 344400 | 2000 | Medium | 44 | High | CAUTION |
| 5 | IguazuFalls | Argentina_Brazil | 82 | 2610 | 1400 | Medium | 91 | Moderate | CAUTION |
| 6 | MountEverest | Nepal | 8848 | 141 | 800 | High | 72 | Moderate | DANGER |
| 7 | MountFuji | Japan | 3776 | 1227 | 3000 | Medium | 91 | High | CAUTION |
| 8 | NiagaraFalls | USA_Canada | 51 | 17 | 9000 | Medium | 140 | Very High | CAUTION |
| 9 | SaharaDesert | Africa | 450 | 9100000 | 1000 | High | 105 | Moderate | DANGER |
| 10 | TableMountain | SouthAfrica | 1086 | 221 | 4500 | Low | 102 | High | SAFE |
| 11 | VictoriaFalls | Zambia_Zimbabwe | 108 | 1708 | 1500 | High | 170 | Moderate | DANGER |
| 12 | YosemiteValley | USA | 1219 | 3029 | 4000 | Low | 135 | High | SAFE |
data landscapes_merged;
merge landscapes_base(in=a) landscapes_safety(in=b);
by Landscape_Name;
if a;
run;
proc print data=landscapes_merged;
run;
OUTPUT:
| Obs | Landscape_Name | Location | Risk_Level | Established_Date | Height | Area | Tourists | Years_Since_Established | Tourist_Pressure | SAFETY_ZONE |
|---|---|---|---|---|---|---|---|---|---|---|
| 1 | AmazonRainforest | Brazil | High | 05SEP1930 | 300 | 5500000 | 3500 | 95 | High | DANGER |
| 2 | AngelFalls | Venezuela | Medium | 16NOV1933 | 979 | 30000 | 900 | . | ||
| 3 | DeadSea | Israel_Jordan | Medium | 01JAN1948 | -430 | 605 | 1200 | 77 | Moderate | CAUTION |
| 4 | GrandCanyon | USA | Medium | 26FEB1919 | 2400 | 4926 | 6000 | 106 | Very High | CAUTION |
| 5 | GreatBarrierReef | Australia | Medium | 11OCT1981 | 0 | 344400 | 2000 | 44 | High | CAUTION |
| 6 | IguazuFalls | Argentina_Brazil | Medium | 02DEC1934 | 82 | 2610 | 1400 | 91 | Moderate | CAUTION |
| 7 | MountEverest | Nepal | High | 29MAY1953 | 8848 | 141 | 800 | 72 | Moderate | DANGER |
| 8 | MountFuji | Japan | Medium | 22JUN1934 | 3776 | 1227 | 3000 | 91 | High | CAUTION |
| 9 | NiagaraFalls | USA_Canada | Medium | 01JAN1885 | 51 | 17 | 9000 | 140 | Very High | CAUTION |
| 10 | SaharaDesert | Africa | High | 01JAN1920 | 450 | 9100000 | 1000 | 105 | Moderate | DANGER |
| 11 | TableMountain | SouthAfrica | Low | 01JAN1923 | 1086 | 221 | 4500 | 102 | High | SAFE |
| 12 | VictoriaFalls | Zambia_Zimbabwe | High | 18NOV1855 | 108 | 1708 | 1500 | 170 | Moderate | DANGER |
| 13 | YosemiteValley | USA | Low | 01OCT1890 | 1219 | 3029 | 4000 | 135 | High | SAFE |
10.PROC TRANSPOSE – SAFETY ZONE VIEW
proc transpose data=landscapes_safety
out=safety_transposed;
by Risk_Level NotSorted;
var Tourists;
run;
proc print data=safety_transposed;
run;
OUTPUT:
| Obs | Risk_Level | _NAME_ | COL1 | COL2 | COL3 | COL4 |
|---|---|---|---|---|---|---|
| 1 | High | Tourists | 3500 | . | . | . |
| 2 | Medium | Tourists | 1200 | 6000 | 2000 | 1400 |
| 3 | High | Tourists | 800 | . | . | . |
| 4 | Medium | Tourists | 3000 | 9000 | . | . |
| 5 | High | Tourists | 1000 | . | . | . |
| 6 | Low | Tourists | 4500 | . | . | . |
| 7 | High | Tourists | 1500 | . | . | . |
| 8 | Low | Tourists | 4000 | . | . | . |
No comments:
Post a Comment