Monday, 22 December 2025

347.FOREST TYPES DATA ANALYSIS USING SAS DATA STEP | PROC SQL | PROC MEANS | PROC FORMAT | PROC UNIVARIATE | MACROS | DATE FUNCTIONS (INTNX, INTCK) | MERGE | APPEND

FOREST TYPES DATA ANALYSIS USING SAS DATA STEP | PROC SQL | PROC MEANS | PROC FORMAT |  PROC UNIVARIATE | MACROS | DATE FUNCTIONS (INTNX, INTCK) | MERGE | APPEND

options nocenter;

1.FOREST BASE CREATING THE BASE DATASET

data forest_base;

    length Forest_Name $20 Country $15 Fire_Risk $10;

    format Survey_Date date9.;

    input Forest_Name $ Area Country $ Animal_Count Rainfall Fire_Risk $ 

          Survey_Date :date9.;

    datalines;

Amazon 5500000 Brazil 3000000 2200 High 01JAN2023

Congo 3700000 Congo 1800000 1900 Medium 15FEB2023

Sundarbans 10000 India 250000 1800 High 20MAR2023

BlackForest 6000 Germany 150000 1200 Low 05APR2023

Taiga 8000000 Russia 4000000 600 Medium 10MAY2023

Daintree 1200 Australia 90000 3500 Medium 12JUN2023

Borneo 740000 Malaysia 600000 3000 High 18JUL2023

Kinabalu 750 Malaysia 120000 2800 Medium 25AUG2023

Sinharaja 88 SriLanka 65000 5000 Low 01SEP2023

Yakushima 505 Japan 70000 4300 Low 10OCT2023

Tongass 68000 USA 200000 4000 Medium 15NOV2023

Sherwood 423 UK 35000 900 Low 01DEC2023

;

run;

proc print data=forest_base;

run;

OUTPUT:

ObsForest_NameCountryFire_RiskSurvey_DateAreaAnimal_CountRainfall
1AmazonBrazilHigh01JAN2023550000030000002200
2CongoCongoMedium15FEB2023370000018000001900
3SundarbansIndiaHigh20MAR2023100002500001800
4BlackForestGermanyLow05APR202360001500001200
5TaigaRussiaMedium10MAY202380000004000000600
6DaintreeAustraliaMedium12JUN20231200900003500
7BorneoMalaysiaHigh18JUL20237400006000003000
8KinabaluMalaysiaMedium25AUG20237501200002800
9SinharajaSriLankaLow01SEP202388650005000
10YakushimaJapanLow10OCT2023505700004300
11TongassUSAMedium15NOV2023680002000004000
12SherwoodUKLow01DEC202342335000900


2.USING PROC CONTENTS (STRUCTURAL CHECK)

proc contents data=forest_base;

run;

OUTPUT:

The CONTENTS Procedure

Data Set NameWORK.FOREST_BASEObservations12
Member TypeDATAVariables7
EngineV9Indexes0
Created12/23/2025 07:19:56Observation Length80
Last Modified12/23/2025 07:19:56Deleted Observations0
Protection CompressedNO
Data Set Type SortedNO
Label   
Data RepresentationSOLARIS_X86_64, LINUX_X86_64, ALPHA_TRU64, LINUX_IA64  
Encodingutf-8 Unicode (UTF-8)  
Engine/Host Dependent Information
Data Set Page Size131072
Number of Data Set Pages1
First Data Page1
Max Obs per Page1635
Obs in First Data Page12
Number of Data Set Repairs0
Filename/saswork/SAS_workD61200018CFF_odaws01-apse1-2.oda.sas.com/SAS_work11FC00018CFF_odaws01-apse1-2.oda.sas.com/forest_base.sas7bdat
Release Created9.0401M8
Host CreatedLinux
Inode Number134334086
Access Permissionrw-r--r--
Owner Nameu63247146
File Size256KB
File Size (bytes)262144
Alphabetic List of Variables and Attributes
#VariableTypeLenFormat
6Animal_CountNum8 
5AreaNum8 
2CountryChar15 
3Fire_RiskChar10 
1Forest_NameChar20 
7RainfallNum8 
4Survey_DateNum8DATE9.

3.PROC SQL FOR DERIVED DATA

Creating an Enhanced Dataset with Safety Index

proc sql;

    create table forest_sql as

    select *,

           case 

              when Rainfall < 1000 and Animal_Count < 100000 then 'HIGH RISK'

              when Rainfall between 1000 and 2500 then 'MODERATE RISK'

              else 'LOW RISK'

           end as Safety_Level

    from forest_base;

quit;

proc print data=forest_sql;

run;

OUTPUT:

ObsForest_NameCountryFire_RiskSurvey_DateAreaAnimal_CountRainfallSafety_Level
1AmazonBrazilHigh01JAN2023550000030000002200MODERATE RISK
2CongoCongoMedium15FEB2023370000018000001900MODERATE RISK
3SundarbansIndiaHigh20MAR2023100002500001800MODERATE RISK
4BlackForestGermanyLow05APR202360001500001200MODERATE RISK
5TaigaRussiaMedium10MAY202380000004000000600LOW RISK
6DaintreeAustraliaMedium12JUN20231200900003500LOW RISK
7BorneoMalaysiaHigh18JUL20237400006000003000LOW RISK
8KinabaluMalaysiaMedium25AUG20237501200002800LOW RISK
9SinharajaSriLankaLow01SEP202388650005000LOW RISK
10YakushimaJapanLow10OCT2023505700004300LOW RISK
11TongassUSAMedium15NOV2023680002000004000LOW RISK
12SherwoodUKLow01DEC202342335000900HIGH RISK


4.DATE FUNCTIONS (INTNX & INTCK)

Adding Next Survey Date

data forest_dates;

    set forest_sql;

    Next_Survey = intnx('month', Survey_Date, 6, 'same');

    Months_Since = intck('month', Survey_Date, today());

    format Next_Survey date9.;

run;

proc print data=forest_dates;

run;

OUTPUT:

ObsForest_NameCountryFire_RiskSurvey_DateAreaAnimal_CountRainfallSafety_LevelNext_SurveyMonths_Since
1AmazonBrazilHigh01JAN2023550000030000002200MODERATE RISK01JUL202335
2CongoCongoMedium15FEB2023370000018000001900MODERATE RISK15AUG202334
3SundarbansIndiaHigh20MAR2023100002500001800MODERATE RISK20SEP202333
4BlackForestGermanyLow05APR202360001500001200MODERATE RISK05OCT202332
5TaigaRussiaMedium10MAY202380000004000000600LOW RISK10NOV202331
6DaintreeAustraliaMedium12JUN20231200900003500LOW RISK12DEC202330
7BorneoMalaysiaHigh18JUL20237400006000003000LOW RISK18JAN202429
8KinabaluMalaysiaMedium25AUG20237501200002800LOW RISK25FEB202428
9SinharajaSriLankaLow01SEP202388650005000LOW RISK01MAR202427
10YakushimaJapanLow10OCT2023505700004300LOW RISK10APR202426
11TongassUSAMedium15NOV2023680002000004000LOW RISK15MAY202425
12SherwoodUKLow01DEC202342335000900HIGH RISK01JUN202424


5.PROC MEANS (NUMERICAL SUMMARY)

proc means data=forest_dates min max mean;

    var Area Animal_Count Rainfall Months_Since;

run;

OUTPUT:

The MEANS Procedure

VariableMinimumMaximumMean
Area
Animal_Count
Rainfall
Months_Since
88.0000000
35000.00
600.0000000
24.0000000
8000000.00
4000000.00
5000.00
35.0000000
1502247.17
865000.00
2600.00
29.5000000

6.PROC FORMAT (CUSTOM RISK LABELS)

proc format;

    value rainfmt

        low - 1000 = 'Low Rainfall'

        1001 - 2500 = 'Moderate Rainfall'

        2501 - high = 'Heavy Rainfall';

run;

LOG:

NOTE: Format RAINFMT has been output.

proc print data=forest_dates;

    format Rainfall rainfmt.;

run;

OUTPUT:

ObsForest_NameCountryFire_RiskSurvey_DateAreaAnimal_CountRainfallSafety_LevelNext_SurveyMonths_Since
1AmazonBrazilHigh01JAN202355000003000000Moderate RainfallMODERATE RISK01JUL202335
2CongoCongoMedium15FEB202337000001800000Moderate RainfallMODERATE RISK15AUG202334
3SundarbansIndiaHigh20MAR202310000250000Moderate RainfallMODERATE RISK20SEP202333
4BlackForestGermanyLow05APR20236000150000Moderate RainfallMODERATE RISK05OCT202332
5TaigaRussiaMedium10MAY202380000004000000Low RainfallLOW RISK10NOV202331
6DaintreeAustraliaMedium12JUN2023120090000Heavy RainfallLOW RISK12DEC202330
7BorneoMalaysiaHigh18JUL2023740000600000Heavy RainfallLOW RISK18JAN202429
8KinabaluMalaysiaMedium25AUG2023750120000Heavy RainfallLOW RISK25FEB202428
9SinharajaSriLankaLow01SEP20238865000Heavy RainfallLOW RISK01MAR202427
10YakushimaJapanLow10OCT202350570000Heavy RainfallLOW RISK10APR202426
11TongassUSAMedium15NOV202368000200000Heavy RainfallLOW RISK15MAY202425
12SherwoodUKLow01DEC202342335000Low RainfallHIGH RISK01JUN202424


7.PROC UNIVARIATE (DISTRIBUTION ANALYSIS)

proc univariate data=forest_dates;

    var Rainfall Animal_Count;

    histogram Rainfall Animal_Count;

run;

OUTPUT:

The UNIVARIATE Procedure

Variable: Rainfall

Moments
N12Sum Weights12
Mean2600Sum Observations31200
Std Deviation1406.47852Variance1978181.82
Skewness0.22372835Kurtosis-1.0163311
Uncorrected SS102880000Corrected SS21760000
Coeff Variation54.0953276Std Error Mean406.015375
Basic Statistical Measures
LocationVariability
Mean2600.000Std Deviation1406
Median2500.000Variance1978182
Mode.Range4400
  Interquartile Range2250
Tests for Location: Mu0=0
TestStatisticp Value
Student's tt6.403698Pr > |t|<.0001
SignM6Pr >= |M|0.0005
Signed RankS39Pr >= |S|0.0005
Quantiles (Definition 5)
LevelQuantile
100% Max5000
99%5000
95%5000
90%4300
75% Q33750
50% Median2500
25% Q11500
10%900
5%600
1%600
0% Min600
Extreme Observations
LowestHighest
ValueObsValueObs
600530007
9001235006
12004400011
18003430010
1900250009

The UNIVARIATE Procedure

Histogram for Rainfall

The UNIVARIATE Procedure

Variable: Animal_Count

Moments
N12Sum Weights12
Mean865000Sum Observations10380000
Std Deviation1340934.21Variance1.7981E12
Skewness1.71859782Kurtosis1.84889294
Uncorrected SS2.87579E13Corrected SS1.97792E13
Coeff Variation155.021296Std Error Mean387094.362
Basic Statistical Measures
LocationVariability
Mean865000.0Std Deviation1340934
Median175000.0Variance1.7981E12
Mode.Range3965000
  Interquartile Range1120000
Tests for Location: Mu0=0
TestStatisticp Value
Student's tt2.234597Pr > |t|0.0471
SignM6Pr >= |M|0.0005
Signed RankS39Pr >= |S|0.0005
Quantiles (Definition 5)
LevelQuantile
100% Max4000000
99%4000000
95%4000000
90%3000000
75% Q31200000
50% Median175000
25% Q180000
10%65000
5%35000
1%35000
0% Min35000
Extreme Observations
LowestHighest
ValueObsValueObs
35000122500003
6500096000007
700001018000002
90000630000001
120000840000005

The UNIVARIATE Procedure

Histogram for Animal_Count

8.MACRO FOR FOREST SAFETY CLASSIFICATION

%macro safety_flag(ds=, out=);

    data &out;

        set &ds;

        if Rainfall < 1000 and Fire_Risk='High' then Safety_Flag='CRITICAL';

        else if Fire_Risk='Medium' then Safety_Flag='WATCH';

        else Safety_Flag='SAFE';

    run;

proc print data=&out;

    run;

%mend;


%safety_flag(ds=forest_dates, out=forest_safe);

OUTPUT:

ObsForest_NameCountryFire_RiskSurvey_DateAreaAnimal_CountRainfallSafety_LevelNext_SurveyMonths_SinceSafety_Flag
1AmazonBrazilHigh01JAN2023550000030000002200MODERATE RISK01JUL202335SAFE
2CongoCongoMedium15FEB2023370000018000001900MODERATE RISK15AUG202334WATCH
3SundarbansIndiaHigh20MAR2023100002500001800MODERATE RISK20SEP202333SAFE
4BlackForestGermanyLow05APR202360001500001200MODERATE RISK05OCT202332SAFE
5TaigaRussiaMedium10MAY202380000004000000600LOW RISK10NOV202331WATCH
6DaintreeAustraliaMedium12JUN20231200900003500LOW RISK12DEC202330WATCH
7BorneoMalaysiaHigh18JUL20237400006000003000LOW RISK18JAN202429SAFE
8KinabaluMalaysiaMedium25AUG20237501200002800LOW RISK25FEB202428WATCH
9SinharajaSriLankaLow01SEP202388650005000LOW RISK01MAR202427SAFE
10YakushimaJapanLow10OCT2023505700004300LOW RISK10APR202426SAFE
11TongassUSAMedium15NOV2023680002000004000LOW RISK15MAY202425WATCH
12SherwoodUKLow01DEC202342335000900HIGH RISK01JUN202424SAFE


9.USING MERGE STATEMENT

data animal_density;

    set forest_safe;

    Density = Animal_Count / Area;

    keep Forest_Name Density;

run;

proc print data=animal_density;

run;

OUTPUT:

ObsForest_NameDensity
1Amazon0.545
2Congo0.486
3Sundarbans25.000
4BlackForest25.000
5Taiga0.500
6Daintree75.000
7Borneo0.811
8Kinabalu160.000
9Sinharaja738.636
10Yakushima138.614
11Tongass2.941
12Sherwood82.742


proc sort data=forest_safe; by Forest_Name; run;

proc print data=forest_safe;

run;

OUTPUT:

ObsForest_NameCountryFire_RiskSurvey_DateAreaAnimal_CountRainfallSafety_LevelNext_SurveyMonths_SinceSafety_Flag
1AmazonBrazilHigh01JAN2023550000030000002200MODERATE RISK01JUL202335SAFE
2BlackForestGermanyLow05APR202360001500001200MODERATE RISK05OCT202332SAFE
3BorneoMalaysiaHigh18JUL20237400006000003000LOW RISK18JAN202429SAFE
4CongoCongoMedium15FEB2023370000018000001900MODERATE RISK15AUG202334WATCH
5DaintreeAustraliaMedium12JUN20231200900003500LOW RISK12DEC202330WATCH
6KinabaluMalaysiaMedium25AUG20237501200002800LOW RISK25FEB202428WATCH
7SherwoodUKLow01DEC202342335000900HIGH RISK01JUN202424SAFE
8SinharajaSriLankaLow01SEP202388650005000LOW RISK01MAR202427SAFE
9SundarbansIndiaHigh20MAR2023100002500001800MODERATE RISK20SEP202333SAFE
10TaigaRussiaMedium10MAY202380000004000000600LOW RISK10NOV202331WATCH
11TongassUSAMedium15NOV2023680002000004000LOW RISK15MAY202425WATCH
12YakushimaJapanLow10OCT2023505700004300LOW RISK10APR202426SAFE


proc sort data=animal_density; by Forest_Name; run;

proc print data=animal_density;

run;

OUTPUT:

ObsForest_NameDensity
1Amazon0.545
2BlackForest25.000
3Borneo0.811
4Congo0.486
5Daintree75.000
6Kinabalu160.000
7Sherwood82.742
8Sinharaja738.636
9Sundarbans25.000
10Taiga0.500
11Tongass2.941
12Yakushima138.614


data forest_merged;

    merge forest_safe animal_density;

    by Forest_Name;

run;

proc print data=forest_merged;

run;

OUTPUT:

ObsForest_NameCountryFire_RiskSurvey_DateAreaAnimal_CountRainfallSafety_LevelNext_SurveyMonths_SinceSafety_FlagDensity
1AmazonBrazilHigh01JAN2023550000030000002200MODERATE RISK01JUL202335SAFE0.545
2BlackForestGermanyLow05APR202360001500001200MODERATE RISK05OCT202332SAFE25.000
3BorneoMalaysiaHigh18JUL20237400006000003000LOW RISK18JAN202429SAFE0.811
4CongoCongoMedium15FEB2023370000018000001900MODERATE RISK15AUG202334WATCH0.486
5DaintreeAustraliaMedium12JUN20231200900003500LOW RISK12DEC202330WATCH75.000
6KinabaluMalaysiaMedium25AUG20237501200002800LOW RISK25FEB202428WATCH160.000
7SherwoodUKLow01DEC202342335000900HIGH RISK01JUN202424SAFE82.742
8SinharajaSriLankaLow01SEP202388650005000LOW RISK01MAR202427SAFE738.636
9SundarbansIndiaHigh20MAR2023100002500001800MODERATE RISK20SEP202333SAFE25.000
10TaigaRussiaMedium10MAY202380000004000000600LOW RISK10NOV202331WATCH0.500
11TongassUSAMedium15NOV2023680002000004000LOW RISK15MAY202425WATCH2.941
12YakushimaJapanLow10OCT2023505700004300LOW RISK10APR202426SAFE138.614


10.APPEND STATEMENT (ADDING NEW OBSERVATIONS)

Creating New Forest Data

data new_forests;

    length Forest_Name $20 Country $15 Fire_Risk $10;

    format Survey_Date date9.;

    input Forest_Name $ Area Country $ Animal_Count Rainfall Fire_Risk $ 

          Survey_Date :date9.;

    datalines;

GirForest 1412 India 40000 800 Medium 15JAN2024

;

run;

proc print data=new_forests;

run;

OUTPUT:

ObsForest_NameCountryFire_RiskSurvey_DateAreaAnimal_CountRainfall
1GirForestIndiaMedium15JAN2024141240000800

Appending

proc append base=forest_merged 

            data=new_forests force;

run;

proc print data=forest_merged;

run;

OUTPUT:

ObsForest_NameCountryFire_RiskSurvey_DateAreaAnimal_CountRainfallSafety_LevelNext_SurveyMonths_SinceSafety_FlagDensity
1AmazonBrazilHigh01JAN2023550000030000002200MODERATE RISK01JUL202335SAFE0.545
2BlackForestGermanyLow05APR202360001500001200MODERATE RISK05OCT202332SAFE25.000
3BorneoMalaysiaHigh18JUL20237400006000003000LOW RISK18JAN202429SAFE0.811
4CongoCongoMedium15FEB2023370000018000001900MODERATE RISK15AUG202334WATCH0.486
5DaintreeAustraliaMedium12JUN20231200900003500LOW RISK12DEC202330WATCH75.000
6KinabaluMalaysiaMedium25AUG20237501200002800LOW RISK25FEB202428WATCH160.000
7SherwoodUKLow01DEC202342335000900HIGH RISK01JUN202424SAFE82.742
8SinharajaSriLankaLow01SEP202388650005000LOW RISK01MAR202427SAFE738.636
9SundarbansIndiaHigh20MAR2023100002500001800MODERATE RISK20SEP202333SAFE25.000
10TaigaRussiaMedium10MAY202380000004000000600LOW RISK10NOV202331WATCH0.500
11TongassUSAMedium15NOV2023680002000004000LOW RISK15MAY202425WATCH2.941
12YakushimaJapanLow10OCT2023505700004300LOW RISK10APR202426SAFE138.614
13GirForestIndiaMedium15JAN2024141240000800 .. .


11.PROC MEANS

proc means forest_merged;

    var Rainfall Area;

run;


/* Note: In practice above there is an Invalid in this code Find it,Correct it and Use it /*

OUTPUT:

The MEANS Procedure

VariableNMeanStd DevMinimumMaximum
Rainfall
Area
13
13
2461.54
1386798.31
1436.16
2637717.45
600.0000000
88.0000000
5000.00
8000000.00

12.FINAL VALIDATION OUTPUT

proc print data=forest_merged;

run;

OUTPUT:
ObsForest_NameCountryFire_RiskSurvey_DateAreaAnimal_CountRainfallSafety_LevelNext_SurveyMonths_SinceSafety_FlagDensity
1AmazonBrazilHigh01JAN2023550000030000002200MODERATE RISK01JUL202335SAFE0.545
2BlackForestGermanyLow05APR202360001500001200MODERATE RISK05OCT202332SAFE25.000
3BorneoMalaysiaHigh18JUL20237400006000003000LOW RISK18JAN202429SAFE0.811
4CongoCongoMedium15FEB2023370000018000001900MODERATE RISK15AUG202334WATCH0.486
5DaintreeAustraliaMedium12JUN20231200900003500LOW RISK12DEC202330WATCH75.000
6KinabaluMalaysiaMedium25AUG20237501200002800LOW RISK25FEB202428WATCH160.000
7SherwoodUKLow01DEC202342335000900HIGH RISK01JUN202424SAFE82.742
8SinharajaSriLankaLow01SEP202388650005000LOW RISK01MAR202427SAFE738.636
9SundarbansIndiaHigh20MAR2023100002500001800MODERATE RISK20SEP202333SAFE25.000
10TaigaRussiaMedium10MAY202380000004000000600LOW RISK10NOV202331WATCH0.500
11TongassUSAMedium15NOV2023680002000004000LOW RISK15MAY202425WATCH2.941
12YakushimaJapanLow10OCT2023505700004300LOW RISK10APR202426SAFE138.614
13GirForestIndiaMedium15JAN2024141240000800 .. .

YESTERDAY'S QUESTION

9.PROC CORR

proc corr data=indian_states;

    var Literacy_Rate GDP Population Employment_Rate;

    table Literacy_Rate*GDP;

run;


**TABLE is not a valid statement in PROC CORR

/* Note: In practice above there is an Invalid in this code Find it,Correct it and Use it /*


OUTPUT:

The CORR Procedure

1 With Variables:GDP
4 Variables:Literacy_Rate GDP Population Employment_Rate
Simple Statistics
VariableNMeanStd DevSumMinimumMaximum
GDP1217.525008.57418210.300007.6000038.50000
Literacy_Rate1275.725007.59750908.7000066.1000094.00000
Population1283.0833355.20945997.0000030.00000235.00000
Employment_Rate1260.858332.85799730.3000056.2000065.30000
Pearson Correlation Coefficients, N = 12
Prob > |r| under H0: Rho=0
 Literacy_RateGDPPopulationEmployment_Rate
GDP
0.22374
0.4845
1.00000
 
0.33029
0.2944
0.55222
0.0626




To Visit My Previous Electronic Data Analysis:Click Here
To Visit My Previous IPS Toppers Dataset:Click Here
To Visit My Previous Global Money Loan Trends Dataset:Click Here
To Visit My Previous Statewise Population Analysis:Click Here



Follow Us On : 


 


--- FOLLOW OUR BLOG FOR MORE INFORMATION.

--->PLEASE DO COMMENTS AND SHARE OUR BLOG.





No comments:

Post a Comment