313.Are We Losing Wildlife Faster Than We Think? – Animal Conservation Analysis Using SAS
Are We Losing Wildlife Faster Than We Think? – Animal Conservation Analysis Using SAS
options fmtsearch=(work.formats) nocenter nodate nonumber;
1. Proc Format
proc format library=work.formats;
/* Standardize continent labels */
value $continentfmt
'AF' = 'Africa'
'AS' = 'Asia'
'EU' = 'Europe'
'NA' = 'North America'
'SA' = 'South America'
'OC' = 'Oceania'
'AN' = 'Antarctica'
other = 'Unknown';
LOG:
/* Diet categories */
value $dietfmt
'Herbivore' = 'Herbivore'
'Carnivore' = 'Carnivore'
'Omnivore' = 'Omnivore'
'Insectivore' = 'Insectivore'
other = 'Other/Unknown';
LOG:
/* Conservation status */
value $consfmt
'LC' = 'Least Concern'
'NT' = 'Near Threatened'
'VU' = 'Vulnerable'
'EN' = 'Endangered'
'CR' = 'Critically Endangered'
'EW' = 'Extinct in Wild'
'EX' = 'Extinct'
other = 'Unknown';
LOG:
/* Population bins using numeric format (for PROC FREQ / charts) */
value popcat
low - <1000 = 'Tiny (<1k)'
1000 - <10000 = 'Small (1k-10k)'
10000 - <100000 = 'Medium (10k-100k)'
100000 - <1000000 = 'Large (100k-1M)'
1000000 - high = 'Very Large (>1M)';
run;
LOG:
2. Create the dataset using PROC SQL
proc sql;
create table work.animals as
select
"African Elephant" as Species,
"AF" as Continent,
415000 as Population,
60 as Lifespan,
"Herbivore" as Diet,
"EN" as Conservation_Status,
'15FEB2022'd as Last_Survey_Date,
'01JAN2022'd as Population_Estimate_Date
from sashelp.class(obs=1)
union all
select "Bengal Tiger", "AS", 2500, 15, "Carnivore", "EN", '10DEC2023'd, '01JAN2023'd
from sashelp.class(obs=1)
union all
select "Giant Panda", "AS", 1864, 20, "Herbivore", "VU", '05MAY2021'd, '01JAN2021'd
from sashelp.class(obs=1)
union all
select "Blue Whale", "OC", 10000, 90, "Carnivore", "EN", '20JUN2020'd, '01JAN2020'd
from sashelp.class(obs=1)
union all
select "Red Kangaroo", "OC", 11500000, 22, "Herbivore", "LC", '12NOV2022'd, '01JAN2022'd
from sashelp.class(obs=1)
union all
select "Galapagos Tortoise", "SA", 15000, 100, "Herbivore","EN",'02APR2019'd,'01JAN2019'd
from sashelp.class(obs=1)
union all
select "Polar Bear", "AN", 26000, 25, "Carnivore", "VU",'30SEP2022'd,'01JAN2022'd
from sashelp.class(obs=1)
union all
select "Grizzly Bear", "NA", 55000, 25, "Omnivore", "LC",'18JUL2023'd,'01JAN2023'd
from sashelp.class(obs=1)
union all
select "Emperor Penguin", "AN", 595000, 20, "Carnivore","NT",'05AUG2021'd,'01JAN2021'd
from sashelp.class(obs=1)
union all
select "Mountain Gorilla", "AF", 1005, 35, "Herbivore","CR",'22NOV2023'd,'01JAN2023'd
from sashelp.class(obs=1)
union all
select "Komodo Dragon", "OC", 3000, 30, "Carnivore","VU",'11MAR2022'd,'01JAN2022'd
from sashelp.class(obs=1)
union all
select "European Hedgehog", "EU", 1200000, 8, "Omnivore","LC",'03OCT2023'd,'01JAN2023'd
from sashelp.class(obs=1)
;
quit;
Proc print data=work.animals;
run;
OUTPUT:
| Obs | Species | Continent | Population | Lifespan | Diet | Conservation_Status | Last_Survey_Date | Population_Estimate_Date |
|---|---|---|---|---|---|---|---|---|
| 1 | African Elephant | AF | 415000 | 60 | Herbivore | EN | 22691 | 22646 |
| 2 | Bengal Tiger | AS | 2500 | 15 | Carnivore | EN | 23354 | 23011 |
| 3 | Giant Panda | AS | 1864 | 20 | Herbivore | VU | 22405 | 22281 |
| 4 | Blue Whale | OC | 10000 | 90 | Carnivore | EN | 22086 | 21915 |
| 5 | Red Kangaroo | OC | 11500000 | 22 | Herbivore | LC | 22961 | 22646 |
| 6 | Galapagos Tortoise | SA | 15000 | 100 | Herbivore | EN | 21641 | 21550 |
| 7 | Polar Bear | AN | 26000 | 25 | Carnivore | VU | 22918 | 22646 |
| 8 | Grizzly Bear | NA | 55000 | 25 | Omnivore | LC | 23209 | 23011 |
| 9 | Emperor Penguin | AN | 595000 | 20 | Carnivore | NT | 22497 | 22281 |
| 10 | Mountain Gorilla | AF | 1005 | 35 | Herbivore | CR | 23336 | 23011 |
| 11 | Komodo Dragon | OC | 3000 | 30 | Carnivore | VU | 22715 | 22646 |
| 12 | European Hedgehog | EU | 1200000 | 8 | Omnivore | LC | 23286 | 23011 |
3. Add labels and formats
data work.animals;
set work.animals;
label Species = "Species (Common Name)"
Continent = "Continent Code"
Population = "Estimated Population (count)"
Lifespan = "Average Lifespan (years)"
Diet = "Primary Diet"
Conservation_Status = "Conservation Status (code)"
Last_Survey_Date = "Last Survey Date"
Population_Estimate_Date = "Population Estimate Date";
format Continent $continentfmt. Diet $dietfmt. Conservation_Status $consfmt.
Population popcat. Last_Survey_Date date9. Population_Estimate_Date date9.;
run;
proc print data=work.animals (obs=12) label noobs;
title "WORK.ANIMALS - First 12 Observations";
run;
OUTPUT:
| Species (Common Name) | Continent Code | Estimated Population (count) | Average Lifespan (years) | Primary Diet | Conservation Status (code) | Last Survey Date | Population Estimate Date |
|---|---|---|---|---|---|---|---|
| African Elephant | Africa | Large (100k-1M) | 60 | Herbivore | Endangered | 15FEB2022 | 01JAN2022 |
| Bengal Tiger | Asia | Small (1k-10k) | 15 | Carnivore | Endangered | 10DEC2023 | 01JAN2023 |
| Giant Panda | Asia | Small (1k-10k) | 20 | Herbivore | Vulnerable | 05MAY2021 | 01JAN2021 |
| Blue Whale | Oceania | Medium (10k-100k) | 90 | Carnivore | Endangered | 20JUN2020 | 01JAN2020 |
| Red Kangaroo | Oceania | Very Large (>1M) | 22 | Herbivore | Least Concern | 12NOV2022 | 01JAN2022 |
| Galapagos Tortoise | South America | Medium (10k-100k) | 100 | Herbivore | Endangered | 02APR2019 | 01JAN2019 |
| Polar Bear | Antarctica | Medium (10k-100k) | 25 | Carnivore | Vulnerable | 30SEP2022 | 01JAN2022 |
| Grizzly Bear | North America | Medium (10k-100k) | 25 | Omnivore | Least Concern | 18JUL2023 | 01JAN2023 |
| Emperor Penguin | Antarctica | Large (100k-1M) | 20 | Carnivore | Near Threatened | 05AUG2021 | 01JAN2021 |
| Mountain Gorilla | Africa | Small (1k-10k) | 35 | Herbivore | Critically Endangered | 22NOV2023 | 01JAN2023 |
| Komodo Dragon | Oceania | Small (1k-10k) | 30 | Carnivore | Vulnerable | 11MAR2022 | 01JAN2022 |
| European Hedgehog | Europe | Very Large (>1M) | 8 | Omnivore | Least Concern | 03OCT2023 | 01JAN2023 |
4. Frequency distributions
proc freq data=work.animals;
tables Continent Conservation_Status Diet Population / nocum nopercent;
title "Frequencies of Continent, Status, Diet and Population Bins";
run;
OUTPUT:
The FREQ Procedure
| Continent Code | |
|---|---|
| Continent | Frequency |
| Africa | 2 |
| Antarctica | 2 |
| Asia | 2 |
| Europe | 1 |
| North America | 1 |
| Oceania | 3 |
| South America | 1 |
| Conservation Status (code) | |
|---|---|
| Conservation_Status | Frequency |
| Critically Endangered | 1 |
| Endangered | 4 |
| Least Concern | 3 |
| Near Threatened | 1 |
| Vulnerable | 3 |
| Primary Diet | |
|---|---|
| Diet | Frequency |
| Carnivore | 5 |
| Herbivore | 5 |
| Omnivore | 2 |
| Estimated Population (count) | |
|---|---|
| Population | Frequency |
| Small (1k-10k) | 4 |
| Medium (10k-100k) | 4 |
| Large (100k-1M) | 2 |
| Very Large (>1M) | 2 |
5. Numeric summaries
proc means data=work.animals n mean std min max median maxdec=1;
var Population Lifespan;
title "Numeric Summary: Population & Lifespan";
run;
OUTPUT:
The MEANS Procedure
| Variable | Label | N | Mean | Std Dev | Minimum | Maximum | Median |
|---|---|---|---|---|---|---|---|
Population Lifespan | Estimated Population (count) Average Lifespan (years) | 12 12 | 1152030.8 37.5 | 3279331.9 29.8 | 1005.0 8.0 | 11500000.0 100.0 | 20500.0 25.0 |
6. Univariate diagnostics for Population
proc univariate data=work.animals;
var Population;
histogram Population / midpoints=0 to 12000000 by 1000000;
inset mean std min max / position=ne;
title "Population Distribution (Univariate)";
run;
OUTPUT:
The UNIVARIATE Procedure
Variable: Population (Estimated Population (count))
| Moments | |||
|---|---|---|---|
| N | 12 | Sum Weights | 12 |
| Mean | 1152030.75 | Sum Observations | 13824369 |
| Std Deviation | 3279331.94 | Variance | 1.0754E13 |
| Skewness | 3.38941694 | Kurtosis | 11.6086323 |
| Uncorrected SS | 1.3422E14 | Corrected SS | 1.18294E14 |
| Coeff Variation | 284.656632 | Std Error Mean | 946661.589 |
| Basic Statistical Measures | |||
|---|---|---|---|
| Location | Variability | ||
| Mean | 1152031 | Std Deviation | 3279332 |
| Median | 20500 | Variance | 1.0754E13 |
| Mode | . | Range | 11498995 |
| Interquartile Range | 502250 | ||
| Tests for Location: Mu0=0 | ||||
|---|---|---|---|---|
| Test | Statistic | p Value | ||
| Student's t | t | 1.21694 | Pr > |t| | 0.2491 |
| Sign | M | 6 | Pr >= |M| | 0.0005 |
| Signed Rank | S | 39 | Pr >= |S| | 0.0005 |
| Quantiles (Definition 5) | |
|---|---|
| Level | Quantile |
| 100% Max | 11500000 |
| 99% | 11500000 |
| 95% | 11500000 |
| 90% | 1200000 |
| 75% Q3 | 505000 |
| 50% Median | 20500 |
| 25% Q1 | 2750 |
| 10% | 1864 |
| 5% | 1005 |
| 1% | 1005 |
| 0% Min | 1005 |
| Extreme Observations | |||
|---|---|---|---|
| Lowest | Highest | ||
| Value | Obs | Value | Obs |
| 1005 | 10 | 55000 | 8 |
| 1864 | 3 | 415000 | 1 |
| 2500 | 2 | 595000 | 9 |
| 3000 | 11 | 1200000 | 12 |
| 10000 | 4 | 11500000 | 5 |
The UNIVARIATE Procedure
7. Tabulate cross-tab summary by Continent and Conservation_Status
proc tabulate data=work.animals format=8.0;
class Continent Conservation_Status;
var Population;
table Continent all,
Conservation_Status * (n='Count' colpctn='Pct within Continent') / box='Continent by Conservation Status';
title "Tabulation: Continent x Conservation Status";
run;
OUTPUT:
| Continent by Conservation Status | Conservation Status (code) | |||||||||
|---|---|---|---|---|---|---|---|---|---|---|
| Critically Endangered | Endangered | Least Concern | Near Threatened | Vulnerable | ||||||
| Count | Pct within Continent | Count | Pct within Continent | Count | Pct within Continent | Count | Pct within Continent | Count | Pct within Continent | |
| Continent Code | 1 | 100 | 1 | 25 | . | . | . | . | . | . |
| Africa | ||||||||||
| Antarctica | . | . | . | . | . | . | 1 | 100 | 1 | 33 |
| Asia | . | . | 1 | 25 | . | . | . | . | 1 | 33 |
| Europe | . | . | . | . | 1 | 33 | . | . | . | . |
| North America | . | . | . | . | 1 | 33 | . | . | . | . |
| Oceania | . | . | 1 | 25 | 1 | 33 | . | . | 1 | 33 |
| South America | . | . | 1 | 25 | . | . | . | . | . | . |
| All | 1 | 100 | 4 | 100 | 3 | 100 | 1 | 100 | 3 | 100 |
8. Generate a basic chart using PROC GCHART (bar chart)
goptions reset=all device=png htext=1.2;
proc gchart data=work.animals;
vbar Conservation_Status / discrete sumvar=Population subgroup=Continent
raxis=axis1 maxis=axis2
coutline=black
midpoints='LC' 'NT' 'VU' 'EN' 'CR' 'EW' 'EX';
title "Total Population by Conservation Status (bars) with Continent subgrouping";
run;
quit;
OUTPUT:
9. Macro: create conservation summary dataset and print + chart
%macro conservation_report(outds=work.cons_summary, chart=YES, minpop=0);
/* Aggregate counts and population sums by status and continent */
proc sql;
create table &outds as
select Conservation_Status,
put(Conservation_Status,$consfmt.) as Status_Label,
Continent,
put(Continent,$continentfmt.) as Continent_Label,
count(*) as Species_Count,
sum(Population) as Total_Population,
mean(Lifespan) as Mean_Lifespan
from work.animals
where Population >= &minpop
group by Conservation_Status, Continent
order by Conservation_Status, Continent;
quit;
title "Conservation Summary (Population >= &minpop)";
proc print data=&outds noobs label;
var Conservation_Status Status_Label Continent_Label Species_Count Total_Population Mean_Lifespan;
run;
%if %upcase(&chart)=YES %then %do;
/* simple pie of species counts by conservation status */
proc gchart data=&outds;
pie Conservation_Status / sumvar=Species_Count
other=0
percent=outside
value=none;
title "Species Count by Conservation Status (Pie)";
run;
quit;
%end;
%mend conservation_report;
%conservation_report(outds=work.cons_summary_all, chart=YES, minpop=0);
OUTPUT:
| Conservation Status (code) | Status_Label | Continent_Label | Species_Count | Total_Population | Mean_Lifespan |
|---|---|---|---|---|---|
| Critically Endangered | Critically Endangered | Africa | 1 | 1005 | 35 |
| Endangered | Endangered | Africa | 1 | 415000 | 60 |
| Endangered | Endangered | Asia | 1 | 2500 | 15 |
| Endangered | Endangered | Oceania | 1 | 10000 | 90 |
| Endangered | Endangered | South America | 1 | 15000 | 100 |
| Least Concern | Least Concern | Europe | 1 | 1200000 | 8 |
| Least Concern | Least Concern | North America | 1 | 55000 | 25 |
| Least Concern | Least Concern | Oceania | 1 | 11500000 | 22 |
| Near Threatened | Near Threatened | Antarctica | 1 | 595000 | 20 |
| Vulnerable | Vulnerable | Antarctica | 1 | 26000 | 25 |
| Vulnerable | Vulnerable | Asia | 1 | 1864 | 20 |
| Vulnerable | Vulnerable | Oceania | 1 | 3000 | 30 |
10. Macro: Create population bins and produce summary report
%macro population_binning(in=work.animals, out=work.animals_binned);
/* create a dataset with population bin labels using the popcat format */
data &out;
set ∈
Population_Category = put(Population, popcat.);
format Population_Category $30.;
run;
proc freq data=&out;
tables Population_Category / nocum nopercent;
title "Population Categories (based on popcat format)";
run;
proc means data=&out n mean median min max;
class Population_Category;
var Population Lifespan;
title "Summary by Population Category";
run;
%mend population_binning;
%population_binning(in=work.animals, out=work.animals_binned);
OUTPUT:
The FREQ Procedure
| Population_Category | Frequency |
|---|---|
| Large (100k-1M) | 2 |
| Medium (10k-100k) | 4 |
| Small (1k-10k) | 4 |
| Very Large (>1M) | 2 |
The MEANS Procedure
| Population_Category | N Obs | Variable | Label | N | Mean | Median | Minimum | Maximum |
|---|---|---|---|---|---|---|---|---|
| Large (100k-1M) | 2 | Population Lifespan | Estimated Population (count) Average Lifespan (years) | 2 2 | 505000.00 40.0000000 | 505000.00 40.0000000 | 415000.00 20.0000000 | 595000.00 60.0000000 |
| Medium (10k-100k) | 4 | Population Lifespan | Estimated Population (count) Average Lifespan (years) | 4 4 | 26500.00 60.0000000 | 20500.00 57.5000000 | 10000.00 25.0000000 | 55000.00 100.0000000 |
| Small (1k-10k) | 4 | Population Lifespan | Estimated Population (count) Average Lifespan (years) | 4 4 | 2092.25 25.0000000 | 2182.00 25.0000000 | 1005.00 15.0000000 | 3000.00 35.0000000 |
| Very Large (>1M) | 2 | Population Lifespan | Estimated Population (count) Average Lifespan (years) | 2 2 | 6350000.00 15.0000000 | 6350000.00 15.0000000 | 1200000.00 8.0000000 | 11500000.00 22.0000000 |
11.Using PROC REPORT for a clean summary table
proc report data=work.cons_summary_all nowd;
columns Status_Label Continent_Label Species_Count Total_Population Mean_Lifespan;
define Status_Label / group 'Conservation Status';
define Continent_Label / group 'Continent';
define Species_Count / analysis 'No. Species';
define Total_Population / analysis sum format=comma12. 'Total Population';
define Mean_Lifespan / analysis mean format=6.1 'Avg Lifespan (yrs)';
title "Formatted Report: Conservation Summary";
run;
OUTPUT:
| Conservation Status | Continent | No. Species | Total Population | Avg Lifespan (yrs) |
|---|---|---|---|---|
| Critically Endangered | Africa | 1 | 1,005 | 35.0 |
| Endangered | Africa | 1 | 415,000 | 60.0 |
| Asia | 1 | 2,500 | 15.0 | |
| Oceania | 1 | 10,000 | 90.0 | |
| South America | 1 | 15,000 | 100.0 | |
| Least Concern | Europe | 1 | 1,200,000 | 8.0 |
| North America | 1 | 55,000 | 25.0 | |
| Oceania | 1 | 11,500,000 | 22.0 | |
| Near Threatened | Antarctica | 1 | 595,000 | 20.0 |
| Vulnerable | Antarctica | 1 | 26,000 | 25.0 |
| Asia | 1 | 1,864 | 20.0 | |
| Oceania | 1 | 3,000 | 30.0 |
Comments
Post a Comment