Sunday, 7 December 2025

332.MARINE LIFE DATASET CREATION USING PROC SQL | PROC MEANS | PROC UNIVARIATE | PROC FREQ | PROC SGPLOT WITH MACRO-BASED EXTINCTION RISK CLASSIFICATION AND ADVANCED DATE FUNCTIONS (INTCK | INTNX | YEAR | MONTH)

MARINE LIFE DATASET CREATION USING PROC SQL | PROC MEANS | PROC UNIVARIATE | PROC FREQ | PROC SGPLOT WITH MACRO-BASED EXTINCTION RISK CLASSIFICATION AND ADVANCED DATE FUNCTIONS (INTCK | INTNX | YEAR | MONTH)

options nocenter;

1. Creating the Marine Life Dataset

data marine_life_raw;

    length Species $25 Ocean $10 Diet_Type $15;

    format First_Recorded Protection_Start Last_Survey_Date date9.;

    input Species $ Ocean $ Min_Depth Max_Depth Lifespan Diet_Type $ Risk_Score 

          First_Recorded :date9. Protection_Start :date9. Last_Survey_Date :date9.;

          /* Depth range in meters */

    Depth_Range = Max_Depth - Min_Depth;

    datalines;

Blue_Whale        Pacific    0   500  80  Filter_Feeder  4.5 01JAN1960 01JAN1986 15JUL2024

Great_White_Shark Atlantic   0   1200 70  Carnivore      3.8 01JAN1975 01JAN1991 20AUG2023

Clownfish         Pacific    1   50   10  Omnivore       1.8 01JAN1985 01JAN2005 10JUN2024

Giant_Squid       Atlantic   200 2000 5   Carnivore      2.6 01JAN1990 01JAN2008 05MAY2023

Leatherback_Turtle Indian    0   1000 60  Omnivore       4.2 01JAN1950 01JAN1975 30SEP2024

Manta_Ray         Pacific    0   1000 30  Filter_Feeder  2.9 01JAN1980 01JAN2000 01APR2024

Orca              Arctic     0   800  50  Carnivore      3.2 01JAN1965 01JAN1985 11NOV2024

Green_Sea_Turtle  Atlantic   0   200  70  Herbivore      3.0 01JAN1955 01JAN1970 18MAR2024

Anglerfish        Atlantic   300 1500 20  Carnivore      2.0 01JAN1995 01JAN2010 25DEC2023

Coral_Grouper     Indian     5   100  15  Carnivore      2.4 01JAN1988 01JAN2002 05JAN2024

Humpback_Whale    Southern   0   300  90  Filter_Feeder  3.6 01JAN1952 01JAN1980 08SEP2024

Sea_Otter         Pacific    0   40   15  Carnivore      3.4 01JAN1970 01JAN1995 14FEB2024

;

run;

proc print data=marine_life_raw;

run;

OUTPUT:

ObsSpeciesOceanDiet_TypeFirst_RecordedProtection_StartLast_Survey_DateMin_DepthMax_DepthLifespanRisk_ScoreDepth_Range
1Blue_WhalePacificFilter_Feeder01JAN196001JAN198615JUL20240500804.5500
2Great_White_SharkAtlanticCarnivore01JAN197501JAN199120AUG202301200703.81200
3ClownfishPacificOmnivore01JAN198501JAN200510JUN2024150101.849
4Giant_SquidAtlanticCarnivore01JAN199001JAN200805MAY2023200200052.61800
5Leatherback_TurtleIndianOmnivore01JAN195001JAN197530SEP202401000604.21000
6Manta_RayPacificFilter_Feeder01JAN198001JAN200001APR202401000302.91000
7OrcaArcticCarnivore01JAN196501JAN198511NOV20240800503.2800
8Green_Sea_TurtleAtlanticHerbivore01JAN195501JAN197018MAR20240200703.0200
9AnglerfishAtlanticCarnivore01JAN199501JAN201025DEC20233001500202.01200
10Coral_GrouperIndianCarnivore01JAN198801JAN200205JAN20245100152.495
11Humpback_WhaleSouthernFilter_Feeder01JAN195201JAN198008SEP20240300903.6300
12Sea_OtterPacificCarnivore01JAN197001JAN199514FEB2024040153.440


2. Macro for Extinction Risk Classification

%macro classify_risk(in=marine_life_raw, out=marine_life_risk);

    data &out.;

        set &in.;

        length Extinction_Risk $22;

        

        select;

            when (Risk_Score >= 4.5) Extinction_Risk = 'Critically Endangered';

            when (Risk_Score >= 3.5) Extinction_Risk = 'Endangered';

            when (Risk_Score >= 2.5) Extinction_Risk = 'Vulnerable';

            when (Risk_Score >= 1.5) Extinction_Risk = 'Near Threatened';

            otherwise                Extinction_Risk = 'Least Concern';

        end;

    run;

proc print data=&out.;

    run;

%mend classify_risk;


%classify_risk();

OUTPUT:

ObsSpeciesOceanDiet_TypeFirst_RecordedProtection_StartLast_Survey_DateMin_DepthMax_DepthLifespanRisk_ScoreDepth_RangeExtinction_Risk
1Blue_WhalePacificFilter_Feeder01JAN196001JAN198615JUL20240500804.5500Critically Endangered
2Great_White_SharkAtlanticCarnivore01JAN197501JAN199120AUG202301200703.81200Endangered
3ClownfishPacificOmnivore01JAN198501JAN200510JUN2024150101.849Near Threatened
4Giant_SquidAtlanticCarnivore01JAN199001JAN200805MAY2023200200052.61800Vulnerable
5Leatherback_TurtleIndianOmnivore01JAN195001JAN197530SEP202401000604.21000Endangered
6Manta_RayPacificFilter_Feeder01JAN198001JAN200001APR202401000302.91000Vulnerable
7OrcaArcticCarnivore01JAN196501JAN198511NOV20240800503.2800Vulnerable
8Green_Sea_TurtleAtlanticHerbivore01JAN195501JAN197018MAR20240200703.0200Vulnerable
9AnglerfishAtlanticCarnivore01JAN199501JAN201025DEC20233001500202.01200Near Threatened
10Coral_GrouperIndianCarnivore01JAN198801JAN200205JAN20245100152.495Near Threatened
11Humpback_WhaleSouthernFilter_Feeder01JAN195201JAN198008SEP20240300903.6300Endangered
12Sea_OtterPacificCarnivore01JAN197001JAN199514FEB2024040153.440Vulnerable


3. Date Calculations with INTCK, INTNX and more

data marine_life_final;

    set marine_life_risk;

    format Years_Monitored Years_Protected 8.

           Next_Survey_Date date9.;

    

    /* 1. Years monitored: from first recorded date until last survey */

    Years_Monitored = intck('year', First_Recorded, Last_Survey_Date, 'c');

    

    /* 2. Years under protection: from Protection_Start until today() */

    if not missing(Protection_Start) then

        Years_Protected = intck('year', Protection_Start, today(), 'c');

    else

        Years_Protected = .;

    

    /* 3. Next survey: one year after the last survey, same day and month */

    Next_Survey_Date = intnx('year', Last_Survey_Date, 1, 'same');

    

    /* 4. Extra date-related variables: year and season of last survey */

    First_Year_Recorded = year(First_Recorded);

    Last_Survey_Year    = year(Last_Survey_Date);

    Last_Survey_Month   = month(Last_Survey_Date);

    

    length Last_Survey_Season $10;

    if Last_Survey_Month in (12,1,2) then Last_Survey_Season = 'Winter';

    else if Last_Survey_Month in (3,4,5) then Last_Survey_Season = 'Spring';

    else if Last_Survey_Month in (6,7,8) then Last_Survey_Season = 'Summer';

    else Last_Survey_Season = 'Autumn';

    

    label

        Years_Monitored   = 'Years Between First Record and Last Survey'

        Years_Protected   = 'Years Under Protection Until Today'

        Next_Survey_Date  = 'Planned Next Survey Date'

        First_Year_Recorded = 'Year of First Scientific Record'

        Last_Survey_Year    = 'Year of Last Survey'

        Last_Survey_Season  = 'Season of Last Survey';

run;

proc print data=marine_life_final;

run;

OUTPUT:

ObsSpeciesOceanDiet_TypeFirst_RecordedProtection_StartLast_Survey_DateMin_DepthMax_DepthLifespanRisk_ScoreDepth_RangeExtinction_RiskYears_MonitoredYears_ProtectedNext_Survey_DateFirst_Year_RecordedLast_Survey_YearLast_Survey_MonthLast_Survey_Season
1Blue_WhalePacificFilter_Feeder01JAN196001JAN198615JUL20240500804.5500Critically Endangered643915JUL2025196020247Summer
2Great_White_SharkAtlanticCarnivore01JAN197501JAN199120AUG202301200703.81200Endangered483420AUG2024197520238Summer
3ClownfishPacificOmnivore01JAN198501JAN200510JUN2024150101.849Near Threatened392010JUN2025198520246Summer
4Giant_SquidAtlanticCarnivore01JAN199001JAN200805MAY2023200200052.61800Vulnerable331705MAY2024199020235Spring
5Leatherback_TurtleIndianOmnivore01JAN195001JAN197530SEP202401000604.21000Endangered745030SEP2025195020249Autumn
6Manta_RayPacificFilter_Feeder01JAN198001JAN200001APR202401000302.91000Vulnerable442501APR2025198020244Spring
7OrcaArcticCarnivore01JAN196501JAN198511NOV20240800503.2800Vulnerable594011NOV20251965202411Autumn
8Green_Sea_TurtleAtlanticHerbivore01JAN195501JAN197018MAR20240200703.0200Vulnerable695518MAR2025195520243Spring
9AnglerfishAtlanticCarnivore01JAN199501JAN201025DEC20233001500202.01200Near Threatened281525DEC20241995202312Winter
10Coral_GrouperIndianCarnivore01JAN198801JAN200205JAN20245100152.495Near Threatened362305JAN2025198820241Winter
11Humpback_WhaleSouthernFilter_Feeder01JAN195201JAN198008SEP20240300903.6300Endangered724508SEP2025195220249Autumn
12Sea_OtterPacificCarnivore01JAN197001JAN199514FEB2024040153.440Vulnerable543014FEB2025197020242Winter


4. Descriptive Statistics with PROC MEANS

proc means data=marine_life_final mean min max std maxdec=1;

    class Ocean;

    var Lifespan Min_Depth Max_Depth Depth_Range 

        Years_Monitored Years_Protected;

run;

OUTPUT:

The MEANS Procedure

OceanN ObsVariableLabelMeanMinimumMaximumStd Dev
Arctic1
Lifespan
Min_Depth
Max_Depth
Depth_Range
Years_Monitored
Years_Protected
 
 
 
 
Years Between First Record and Last Survey
Years Under Protection Until Today
50.0
0.0
800.0
800.0
59.0
40.0
50.0
0.0
800.0
800.0
59.0
40.0
50.0
0.0
800.0
800.0
59.0
40.0
.
.
.
.
.
.
Atlantic4
Lifespan
Min_Depth
Max_Depth
Depth_Range
Years_Monitored
Years_Protected
 
 
 
 
Years Between First Record and Last Survey
Years Under Protection Until Today
41.3
125.0
1225.0
1100.0
44.5
30.3
5.0
0.0
200.0
200.0
28.0
15.0
70.0
300.0
2000.0
1800.0
69.0
55.0
33.8
150.0
758.8
663.3
18.4
18.6
Indian2
Lifespan
Min_Depth
Max_Depth
Depth_Range
Years_Monitored
Years_Protected
 
 
 
 
Years Between First Record and Last Survey
Years Under Protection Until Today
37.5
2.5
550.0
547.5
55.0
36.5
15.0
0.0
100.0
95.0
36.0
23.0
60.0
5.0
1000.0
1000.0
74.0
50.0
31.8
3.5
636.4
639.9
26.9
19.1
Pacific4
Lifespan
Min_Depth
Max_Depth
Depth_Range
Years_Monitored
Years_Protected
 
 
 
 
Years Between First Record and Last Survey
Years Under Protection Until Today
33.8
0.3
397.5
397.3
50.3
28.5
10.0
0.0
40.0
40.0
39.0
20.0
80.0
1.0
1000.0
1000.0
64.0
39.0
32.0
0.5
455.4
455.6
11.1
8.1
Southern1
Lifespan
Min_Depth
Max_Depth
Depth_Range
Years_Monitored
Years_Protected
 
 
 
 
Years Between First Record and Last Survey
Years Under Protection Until Today
90.0
0.0
300.0
300.0
72.0
45.0
90.0
0.0
300.0
300.0
72.0
45.0
90.0
0.0
300.0
300.0
72.0
45.0
.
.
.
.
.
.

5. Distribution Analysis with PROC UNIVARIATE

proc univariate data=marine_life_final;

    var Lifespan Depth_Range Years_Monitored;

    histogram Lifespan Depth_Range / kernel;

    inset n mean median min max / position=ne;

run;

OUTPUT:

The UNIVARIATE Procedure

Variable: Lifespan

Moments
N12Sum Weights12
Mean42.9166667Sum Observations515
Std Deviation30.4106989Variance924.810606
Skewness0.20389178Kurtosis-1.6882325
Uncorrected SS32275Corrected SS10172.9167
Coeff Variation70.8598809Std Error Mean8.77881259
Basic Statistical Measures
LocationVariability
Mean42.91667Std Deviation30.41070
Median40.00000Variance924.81061
Mode15.00000Range85.00000
  Interquartile Range55.00000

Note: The mode displayed is the smallest of 2 modes with a count of 2.

Tests for Location: Mu0=0
TestStatisticp Value
Student's tt4.888664Pr > |t|0.0005
SignM6Pr >= |M|0.0005
Signed RankS39Pr >= |S|0.0005
Quantiles (Definition 5)
LevelQuantile
100% Max90
99%90
95%90
90%80
75% Q370
50% Median40
25% Q115
10%10
5%5
1%5
0% Min5
Extreme Observations
LowestHighest
ValueObsValueObs
54605
103702
1512708
1510801
2099011

The UNIVARIATE Procedure

Histogram for Lifespan

The UNIVARIATE Procedure

Variable: Depth_Range

Moments
N12Sum Weights12
Mean682Sum Observations8184
Std Deviation569.811612Variance324685.273
Skewness0.49949145Kurtosis-0.666479
Uncorrected SS9153026Corrected SS3571538
Coeff Variation83.5500897Std Error Mean164.490444
Basic Statistical Measures
LocationVariability
Mean682.000Std Deviation569.81161
Median650.000Variance324685
Mode1000.000Range1760
  Interquartile Range952.50000

Note: The mode displayed is the smallest of 2 modes with a count of 2.

Tests for Location: Mu0=0
TestStatisticp Value
Student's tt4.146138Pr > |t|0.0016
SignM6Pr >= |M|0.0005
Signed RankS39Pr >= |S|0.0005
Quantiles (Definition 5)
LevelQuantile
100% Max1800.0
99%1800.0
95%1800.0
90%1200.0
75% Q31100.0
50% Median650.0
25% Q1147.5
10%49.0
5%40.0
1%40.0
0% Min40.0
Extreme Observations
LowestHighest
ValueObsValueObs
401210005
49310006
951012002
200812009
3001118004

The UNIVARIATE Procedure

Histogram for Depth_Range

The UNIVARIATE Procedure

Variable: Years_Monitored (Years Between First Record and Last Survey)

Moments
N12Sum Weights12
Mean51.6666667Sum Observations620
Std Deviation15.9848413Variance255.515152
Skewness0.02401469Kurtosis-1.4730386
Uncorrected SS34844Corrected SS2810.66667
Coeff Variation30.9384025Std Error Mean4.61442621
Basic Statistical Measures
LocationVariability
Mean51.66667Std Deviation15.98484
Median51.00000Variance255.51515
Mode.Range46.00000
  Interquartile Range29.00000
Tests for Location: Mu0=0
TestStatisticp Value
Student's tt11.19677Pr > |t|<.0001
SignM6Pr >= |M|0.0005
Signed RankS39Pr >= |S|0.0005
Quantiles (Definition 5)
LevelQuantile
100% Max74.0
99%74.0
95%74.0
90%72.0
75% Q366.5
50% Median51.0
25% Q137.5
10%33.0
5%28.0
1%28.0
0% Min28.0
Extreme Observations
LowestHighest
ValueObsValueObs
289597
334641
3610698
3937211
446745

6. Categorical Patterns with PROC FREQ

proc freq data=marine_life_final;

    tables Diet_Type*Extinction_Risk / norow nocol nopercent;

    tables Ocean*Extinction_Risk / chisq;

run;

OUTPUT:

The FREQ Procedure

Frequency
Table of Diet_Type by Extinction_Risk
Diet_TypeExtinction_Risk
Critically EndangeredEndangeredNear ThreatenedVulnerableTotal
Carnivore
0
1
2
3
6
Filter_Feeder
1
1
0
1
3
Herbivore
0
0
0
1
1
Omnivore
0
1
1
0
2
Total
1
3
3
5
12
Frequency
Percent
Row Pct
Col Pct
Table of Ocean by Extinction_Risk
OceanExtinction_Risk
Critically EndangeredEndangeredNear ThreatenedVulnerableTotal
Arctic
0
0.00
0.00
0.00
0
0.00
0.00
0.00
0
0.00
0.00
0.00
1
8.33
100.00
20.00
1
8.33
 
 
Atlantic
0
0.00
0.00
0.00
1
8.33
25.00
33.33
1
8.33
25.00
33.33
2
16.67
50.00
40.00
4
33.33
 
 
Indian
0
0.00
0.00
0.00
1
8.33
50.00
33.33
1
8.33
50.00
33.33
0
0.00
0.00
0.00
2
16.67
 
 
Pacific
1
8.33
25.00
100.00
0
0.00
0.00
0.00
1
8.33
25.00
33.33
2
16.67
50.00
40.00
4
33.33
 
 
Southern
0
0.00
0.00
0.00
1
8.33
100.00
33.33
0
0.00
0.00
0.00
0
0.00
0.00
0.00
1
8.33
 
 
Total
1
8.33
3
25.00
3
25.00
5
41.67
12
100.00

Statistics for Table of Ocean by Extinction_Risk

StatisticDFValueProb
WARNING: 100% of the cells have expected counts less
than 5. Chi-Square may not be a valid test.
Chi-Square129.20000.6858
Likelihood Ratio Chi-Square1210.95190.5330
Mantel-Haenszel Chi-Square11.43230.2314
Phi Coefficient 0.8756 
Contingency Coefficient 0.6588 
Cramer's V 0.5055 

Sample Size = 12

7. Data Exploration with PROC SQL

7.1 Identify deep-dwelling species (Max_Depth >= 1000 m) 

proc sql;

    create table deep_species as

    select Species,Ocean,Lifespan,Extinction_Risk,Min_Depth,Max_Depth,Depth_Range,

           Years_Monitored

    from marine_life_final

    where Max_Depth >= 1000

    order by Max_Depth desc;

quit;

proc print data=deep_species;

run;

OUTPUT:

ObsSpeciesOceanLifespanExtinction_RiskMin_DepthMax_DepthDepth_RangeYears_Monitored
1Giant_SquidAtlantic5Vulnerable2002000180033
2AnglerfishAtlantic20Near Threatened3001500120028
3Great_White_SharkAtlantic70Endangered01200120048
4Leatherback_TurtleIndian60Endangered01000100074
5Manta_RayPacific30Vulnerable01000100044


7.2 Summary by Ocean: average lifespan and risk score 

proc sql;

    select Ocean,

           count(*) as Num_Species,

           mean(Lifespan)   as Avg_Lifespan   format=8.1,

           mean(Risk_Score) as Avg_Risk_Score format=8.2,

           mean(Years_Monitored) as Avg_Years_Monitored format=8.1

    from marine_life_final

    group by Ocean;

quit;

OUTPUT:

OceanNum_SpeciesAvg_LifespanAvg_Risk_ScoreAvg_Years_Monitored
Arctic150.03.2059.0
Atlantic441.32.8544.5
Indian237.53.3055.0
Pacific433.83.1550.3
Southern190.03.6072.0


8. Visualization with PROC SGPLOT

proc sgplot data=marine_life_final;

    scatter x=Depth_Range 

            y=Lifespan 

            / group=Extinction_Risk

              datalabel=Species;

    xaxis label="Depth Range (m)";

    yaxis label="Lifespan (years)";

    title "Lifespan vs Depth Range by Extinction Risk for Marine Species";

run;

OUTPUT:

The SGPlot Procedure


9. Extra Macro for Date-Based Risk Monitoring Group

%macro flag_longterm(ds=marine_life_final, out=marine_life_flagged, years=40);

    data &out.;

        set &ds.;

        length Monitoring_Group $20;

        

        if Years_Monitored >= &years then Monitoring_Group = "Long-term";

        else                               Monitoring_Group = "Recent";

    run;

proc print data=&out.;

    run;

%mend flag_longterm;


%flag_longterm(years=35);

OUTPUT:

ObsSpeciesOceanDiet_TypeFirst_RecordedProtection_StartLast_Survey_DateMin_DepthMax_DepthLifespanRisk_ScoreDepth_RangeExtinction_RiskYears_MonitoredYears_ProtectedNext_Survey_DateFirst_Year_RecordedLast_Survey_YearLast_Survey_MonthLast_Survey_SeasonMonitoring_Group
1Blue_WhalePacificFilter_Feeder01JAN196001JAN198615JUL20240500804.5500Critically Endangered643915JUL2025196020247SummerLong-term
2Great_White_SharkAtlanticCarnivore01JAN197501JAN199120AUG202301200703.81200Endangered483420AUG2024197520238SummerLong-term
3ClownfishPacificOmnivore01JAN198501JAN200510JUN2024150101.849Near Threatened392010JUN2025198520246SummerLong-term
4Giant_SquidAtlanticCarnivore01JAN199001JAN200805MAY2023200200052.61800Vulnerable331705MAY2024199020235SpringRecent
5Leatherback_TurtleIndianOmnivore01JAN195001JAN197530SEP202401000604.21000Endangered745030SEP2025195020249AutumnLong-term
6Manta_RayPacificFilter_Feeder01JAN198001JAN200001APR202401000302.91000Vulnerable442501APR2025198020244SpringLong-term
7OrcaArcticCarnivore01JAN196501JAN198511NOV20240800503.2800Vulnerable594011NOV20251965202411AutumnLong-term
8Green_Sea_TurtleAtlanticHerbivore01JAN195501JAN197018MAR20240200703.0200Vulnerable695518MAR2025195520243SpringLong-term
9AnglerfishAtlanticCarnivore01JAN199501JAN201025DEC20233001500202.01200Near Threatened281525DEC20241995202312WinterRecent
10Coral_GrouperIndianCarnivore01JAN198801JAN200205JAN20245100152.495Near Threatened362305JAN2025198820241WinterLong-term
11Humpback_WhaleSouthernFilter_Feeder01JAN195201JAN198008SEP20240300903.6300Endangered724508SEP2025195220249AutumnLong-term
12Sea_OtterPacificCarnivore01JAN197001JAN199514FEB2024040153.440Vulnerable543014FEB2025197020242WinterLong-term


/* Quick frequency of monitoring group vs extinction risk */

proc freq data=marine_life_flagged;

    tables Monitoring_Group*Extinction_Risk;

run;

OUTPUT:

The FREQ Procedure

Frequency
Percent
Row Pct
Col Pct
Table of Monitoring_Group by Extinction_Risk
Monitoring_GroupExtinction_Risk
Critically EndangeredEndangeredNear ThreatenedVulnerableTotal
Long-term
1
8.33
10.00
100.00
3
25.00
30.00
100.00
2
16.67
20.00
66.67
4
33.33
40.00
80.00
10
83.33
 
 
Recent
0
0.00
0.00
0.00
0
0.00
0.00
0.00
1
8.33
50.00
33.33
1
8.33
50.00
20.00
2
16.67
 
 
Total
1
8.33
3
25.00
3
25.00
5
41.67
12
100.00



To Visit My Previous Different Types Of Oils Dataset:Click Here
To Visit My Previous Different Types Of Series 2025 Dataset:Click Here
To Visit My Previous Analyzing Yoga Asanas Worldwide Dataset:Click Here
To Visit My Previous Analyzing Indian Languages Dataset:Click Here  



Follow Us On : 


 


--- FOLLOW OUR BLOG FOR MORE INFORMATION.

--->PLEASE DO COMMENTS AND SHARE OUR BLOG.




No comments:

Post a Comment