AIRPORT TRAFFIC ANALYSIS USING PROC SQL | PROC MEANS | PROC TABULATE | PROC SGPLOT | PROC UNIVARIATE | PROC FREQ | PROC REPORT | PROC TRANSPOSE | PROC CORR | MACRO-BASED BINNING IN SAS
options nocenter validvarname=any nodate nonumber;
1. Create airport dataset
data work.airports;
infile datalines dsd truncover;
informat Airport_Code $8. Country $40. Continent $20. Passengers_per_Year comma12.
Cargo_Tons comma12. Runways 8. EST_DATE date9. LAST_RENOV date9.;
format EST_DATE LAST_RENOV date9.;
input Airport_Code :$8. Country :$40. Continent :$20. Passengers_per_Year :comma12.
Cargo_Tons :comma12. Runways EST_DATE :date9. LAST_RENOV :date9.;
datalines;
JFK,United States,North_America,61500000,220000,4,04JAN1948,15JUN2019
LHR,United Kingdom,Europe,80000000,210000,2,01MAR1946,01SEP2015
DEL,India,Asia,65000000,150000,3,01APR1962,12DEC2018
DXB,United Arab Emirates,Asia,89000000,250000,3,25JUL1960,10NOV2020
SIN,Singapore,Asia,62000000,180000,3,01APR1971,30APR2017
HND,Japan,Asia,85000000,170000,4,01MAY1964,05MAR2018
LAX,United States,North_America,88000000,300000,4,01OCT1930,20AUG2016
CDG,France,Europe,72000000,160000,4,08MAR1974,20MAY2018
AMS,Netherlands,Europe,52000000,140000,6,01JUN1967,01JUN2019
PEK,China,Asia,101000000,400000,4,01JAN1958,14FEB2021
SYD,Australia,Oceania,42000000,90000,3,15MAY1919,01JUL2017
GRU,Brazil,South_America,43000000,85000,3,01JAN1936,22SEP2016
;
run;
proc print data=work.airports label noobs;
title "Airport Source Dataset (work.airports)";
var Airport_Code Country Continent Passengers_per_Year Cargo_Tons Runways EST_DATE LAST_RENOV;
run;
OUTPUT:
| Airport_Code | Country | Continent | Passengers_per_Year | Cargo_Tons | Runways | EST_DATE | LAST_RENOV |
|---|---|---|---|---|---|---|---|
| JFK | United States | North_America | 61500000 | 220000 | 4 | 04JAN1948 | 15JUN2019 |
| LHR | United Kingdom | Europe | 80000000 | 210000 | 2 | 01MAR1946 | 01SEP2015 |
| DEL | India | Asia | 65000000 | 150000 | 3 | 01APR1962 | 12DEC2018 |
| DXB | United Arab Emirates | Asia | 89000000 | 250000 | 3 | 25JUL1960 | 10NOV2020 |
| SIN | Singapore | Asia | 62000000 | 180000 | 3 | 01APR1971 | 30APR2017 |
| HND | Japan | Asia | 85000000 | 170000 | 4 | 01MAY1964 | 05MAR2018 |
| LAX | United States | North_America | 88000000 | 300000 | 4 | 01OCT1930 | 20AUG2016 |
| CDG | France | Europe | 72000000 | 160000 | 4 | 08MAR1974 | 20MAY2018 |
| AMS | Netherlands | Europe | 52000000 | 140000 | 6 | 01JUN1967 | 01JUN2019 |
| PEK | China | Asia | 101000000 | 400000 | 4 | 01JAN1958 | 14FEB2021 |
| SYD | Australia | Oceania | 42000000 | 90000 | 3 | 15MAY1919 | 01JUL2017 |
| GRU | Brazil | South_America | 43000000 | 85000 | 3 | 01JAN1936 | 22SEP2016 |
2. Derived date variables and numeric checks
data work.airports2;
set work.airports;
/* Derive calendar variables */
Est_Year = year(EST_DATE);
Est_Qtr = qtr(EST_DATE);
Est_Month = month(EST_DATE);
LastRenov_Year = year(LAST_RENOV);
Days_Since_Established = today() - EST_DATE;
Age_Years = int(Days_Since_Established / 365.25);
/* Normalize continent value (ensure proper capitalization for reporting) */
Continent_up = propcase(strip(Continent));
/* Basic data quality flags */
flag_passengers_missing = (missing(Passengers_per_Year));
flag_cargo_missing = (missing(Cargo_Tons));
flag_runways_zero = (Runways <= 0);
output;
run;
Proc print data=work.airports2;
run;
OUTPUT:
| Obs | Airport_Code | Country | Continent | Passengers_per_Year | Cargo_Tons | Runways | EST_DATE | LAST_RENOV | Est_Year | Est_Qtr | Est_Month | LastRenov_Year | Days_Since_Established | Age_Years | Continent_up | flag_passengers_missing | flag_cargo_missing | flag_runways_zero |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | JFK | United States | North_America | 61500000 | 220000 | 4 | 04JAN1948 | 15JUN2019 | 1948 | 1 | 1 | 2019 | 28443 | 77 | North_america | 0 | 0 | 0 |
| 2 | LHR | United Kingdom | Europe | 80000000 | 210000 | 2 | 01MAR1946 | 01SEP2015 | 1946 | 1 | 3 | 2015 | 29117 | 79 | Europe | 0 | 0 | 0 |
| 3 | DEL | India | Asia | 65000000 | 150000 | 3 | 01APR1962 | 12DEC2018 | 1962 | 2 | 4 | 2018 | 23242 | 63 | Asia | 0 | 0 | 0 |
| 4 | DXB | United Arab Emirates | Asia | 89000000 | 250000 | 3 | 25JUL1960 | 10NOV2020 | 1960 | 3 | 7 | 2020 | 23857 | 65 | Asia | 0 | 0 | 0 |
| 5 | SIN | Singapore | Asia | 62000000 | 180000 | 3 | 01APR1971 | 30APR2017 | 1971 | 2 | 4 | 2017 | 19955 | 54 | Asia | 0 | 0 | 0 |
| 6 | HND | Japan | Asia | 85000000 | 170000 | 4 | 01MAY1964 | 05MAR2018 | 1964 | 2 | 5 | 2018 | 22481 | 61 | Asia | 0 | 0 | 0 |
| 7 | LAX | United States | North_America | 88000000 | 300000 | 4 | 01OCT1930 | 20AUG2016 | 1930 | 4 | 10 | 2016 | 34747 | 95 | North_america | 0 | 0 | 0 |
| 8 | CDG | France | Europe | 72000000 | 160000 | 4 | 08MAR1974 | 20MAY2018 | 1974 | 1 | 3 | 2018 | 18883 | 51 | Europe | 0 | 0 | 0 |
| 9 | AMS | Netherlands | Europe | 52000000 | 140000 | 6 | 01JUN1967 | 01JUN2019 | 1967 | 2 | 6 | 2019 | 21355 | 58 | Europe | 0 | 0 | 0 |
| 10 | PEK | China | Asia | 101000000 | 400000 | 4 | 01JAN1958 | 14FEB2021 | 1958 | 1 | 1 | 2021 | 24793 | 67 | Asia | 0 | 0 | 0 |
| 11 | SYD | Australia | Oceania | 42000000 | 90000 | 3 | 15MAY1919 | 01JUL2017 | 1919 | 2 | 5 | 2017 | 38904 | 106 | Oceania | 0 | 0 | 0 |
| 12 | GRU | Brazil | South_America | 43000000 | 85000 | 3 | 01JAN1936 | 22SEP2016 | 1936 | 1 | 1 | 2016 | 32829 | 89 | South_america | 0 | 0 | 0 |
3. PROC MEANS & PROC UNIVARIATE — descriptive stats
proc means data=work.airports2 n mean median std min maxdec=0;
var Passengers_per_Year Cargo_Tons Runways Age_Years;
title "Numeric Summary: Passengers, Cargo, Runways, Age";
run;
OUTPUT:
The MEANS Procedure
| Variable | N | Mean | Median | Std Dev | Minimum |
|---|---|---|---|---|---|
Passengers_per_Year Cargo_Tons Runways Age_Years | 12 12 12 12 | 70041667 196250 4 72 | 68500000 175000 4 66 | 19005332 89013 1 17 | 42000000 85000 2 51 |
proc univariate data=work.airports2 cibasic;
var Passengers_per_Year Cargo_Tons;
title "Univariate Distribution (Passengers and Cargo)";
run;
OUTPUT:
The UNIVARIATE Procedure
Variable: Passengers_per_Year
| Moments | |||
|---|---|---|---|
| N | 12 | Sum Weights | 12 |
| Mean | 70041666.7 | Sum Observations | 840500000 |
| Std Deviation | 19005332.2 | Variance | 3.61203E14 |
| Skewness | -0.0373413 | Kurtosis | -1.0370466 |
| Uncorrected SS | 6.28433E16 | Corrected SS | 3.97323E15 |
| Coeff Variation | 27.1343232 | Std Error Mean | 5486366.83 |
| Basic Statistical Measures | |||
|---|---|---|---|
| Location | Variability | ||
| Mean | 70041667 | Std Deviation | 19005332 |
| Median | 68500000 | Variance | 3.61203E14 |
| Mode | . | Range | 59000000 |
| Interquartile Range | 29750000 | ||
| Basic Confidence Limits Assuming Normality | |||
|---|---|---|---|
| Parameter | Estimate | 95% Confidence Limits | |
| Mean | 70041667 | 57966255 | 82117079 |
| Std Deviation | 19005332 | 13463285 | 32268736 |
| Variance | 3.61203E14 | 1.8126E14 | 1.04127E15 |
| Tests for Location: Mu0=0 | ||||
|---|---|---|---|---|
| Test | Statistic | p Value | ||
| Student's t | t | 12.76649 | Pr > |t| | <.0001 |
| Sign | M | 6 | Pr >= |M| | 0.0005 |
| Signed Rank | S | 39 | Pr >= |S| | 0.0005 |
| Quantiles (Definition 5) | |
|---|---|
| Level | Quantile |
| 100% Max | 101000000 |
| 99% | 101000000 |
| 95% | 101000000 |
| 90% | 89000000 |
| 75% Q3 | 86500000 |
| 50% Median | 68500000 |
| 25% Q1 | 56750000 |
| 10% | 43000000 |
| 5% | 42000000 |
| 1% | 42000000 |
| 0% Min | 42000000 |
| Extreme Observations | |||
|---|---|---|---|
| Lowest | Highest | ||
| Value | Obs | Value | Obs |
| 42000000 | 11 | 80000000 | 2 |
| 43000000 | 12 | 85000000 | 6 |
| 52000000 | 9 | 88000000 | 7 |
| 61500000 | 1 | 89000000 | 4 |
| 62000000 | 5 | 101000000 | 10 |
The UNIVARIATE Procedure
Variable: Cargo_Tons
| Moments | |||
|---|---|---|---|
| N | 12 | Sum Weights | 12 |
| Mean | 196250 | Sum Observations | 2355000 |
| Std Deviation | 89012.8949 | Variance | 7923295455 |
| Skewness | 1.05541209 | Kurtosis | 1.35182923 |
| Uncorrected SS | 5.49325E11 | Corrected SS | 8.71563E10 |
| Coeff Variation | 45.3568891 | Std Error Mean | 25695.8094 |
| Basic Statistical Measures | |||
|---|---|---|---|
| Location | Variability | ||
| Mean | 196250.0 | Std Deviation | 89013 |
| Median | 175000.0 | Variance | 7923295455 |
| Mode | . | Range | 315000 |
| Interquartile Range | 90000 | ||
| Basic Confidence Limits Assuming Normality | |||
|---|---|---|---|
| Parameter | Estimate | 95% Confidence Limits | |
| Mean | 196250 | 139694 | 252806 |
| Std Deviation | 89013 | 63056 | 151133 |
| Variance | 7923295455 | 3976097360 | 2.28412E10 |
| Tests for Location: Mu0=0 | ||||
|---|---|---|---|---|
| Test | Statistic | p Value | ||
| Student's t | t | 7.637432 | Pr > |t| | <.0001 |
| Sign | M | 6 | Pr >= |M| | 0.0005 |
| Signed Rank | S | 39 | Pr >= |S| | 0.0005 |
| Quantiles (Definition 5) | |
|---|---|
| Level | Quantile |
| 100% Max | 400000 |
| 99% | 400000 |
| 95% | 400000 |
| 90% | 300000 |
| 75% Q3 | 235000 |
| 50% Median | 175000 |
| 25% Q1 | 145000 |
| 10% | 90000 |
| 5% | 85000 |
| 1% | 85000 |
| 0% Min | 85000 |
| Extreme Observations | |||
|---|---|---|---|
| Lowest | Highest | ||
| Value | Obs | Value | Obs |
| 85000 | 12 | 210000 | 2 |
| 90000 | 11 | 220000 | 1 |
| 140000 | 9 | 250000 | 4 |
| 150000 | 3 | 300000 | 7 |
| 160000 | 8 | 400000 | 10 |
4. Macro-based binning for traffic categories
%macro traffic_bins(in=work.airports2, out=work.airports_binned, var=Passengers_per_Year,
low=20000000, mid=50000000, high=80000000);
/* Create bins: Low, Medium, High, Very_High using thresholds passed as params */
data &out.;
set &in.;
length Traffic_Category $12.;
if missing(&var.) then Traffic_Category = 'Missing';
else if &var. < &low. then Traffic_Category = 'Low';
else if &var. >= &low. and &var. < &mid. then Traffic_Category = 'Medium';
else if &var. >= &mid. and &var. < &high. then Traffic_Category = 'High';
else Traffic_Category = 'Very_High';
run;
proc print data=&out.;
run;
/* Create format for nice printing */
proc freq data=&out. noprint;
tables Traffic_Category / out=_freq_;
run;
proc print data=_freq_;
run;
%mend traffic_bins;
%traffic_bins();
OUTPUT:
| Obs | Airport_Code | Country | Continent | Passengers_per_Year | Cargo_Tons | Runways | EST_DATE | LAST_RENOV | Est_Year | Est_Qtr | Est_Month | LastRenov_Year | Days_Since_Established | Age_Years | Continent_up | flag_passengers_missing | flag_cargo_missing | flag_runways_zero | Traffic_Category |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | JFK | United States | North_America | 61500000 | 220000 | 4 | 04JAN1948 | 15JUN2019 | 1948 | 1 | 1 | 2019 | 28443 | 77 | North_america | 0 | 0 | 0 | High |
| 2 | LHR | United Kingdom | Europe | 80000000 | 210000 | 2 | 01MAR1946 | 01SEP2015 | 1946 | 1 | 3 | 2015 | 29117 | 79 | Europe | 0 | 0 | 0 | Very_High |
| 3 | DEL | India | Asia | 65000000 | 150000 | 3 | 01APR1962 | 12DEC2018 | 1962 | 2 | 4 | 2018 | 23242 | 63 | Asia | 0 | 0 | 0 | High |
| 4 | DXB | United Arab Emirates | Asia | 89000000 | 250000 | 3 | 25JUL1960 | 10NOV2020 | 1960 | 3 | 7 | 2020 | 23857 | 65 | Asia | 0 | 0 | 0 | Very_High |
| 5 | SIN | Singapore | Asia | 62000000 | 180000 | 3 | 01APR1971 | 30APR2017 | 1971 | 2 | 4 | 2017 | 19955 | 54 | Asia | 0 | 0 | 0 | High |
| 6 | HND | Japan | Asia | 85000000 | 170000 | 4 | 01MAY1964 | 05MAR2018 | 1964 | 2 | 5 | 2018 | 22481 | 61 | Asia | 0 | 0 | 0 | Very_High |
| 7 | LAX | United States | North_America | 88000000 | 300000 | 4 | 01OCT1930 | 20AUG2016 | 1930 | 4 | 10 | 2016 | 34747 | 95 | North_america | 0 | 0 | 0 | Very_High |
| 8 | CDG | France | Europe | 72000000 | 160000 | 4 | 08MAR1974 | 20MAY2018 | 1974 | 1 | 3 | 2018 | 18883 | 51 | Europe | 0 | 0 | 0 | High |
| 9 | AMS | Netherlands | Europe | 52000000 | 140000 | 6 | 01JUN1967 | 01JUN2019 | 1967 | 2 | 6 | 2019 | 21355 | 58 | Europe | 0 | 0 | 0 | High |
| 10 | PEK | China | Asia | 101000000 | 400000 | 4 | 01JAN1958 | 14FEB2021 | 1958 | 1 | 1 | 2021 | 24793 | 67 | Asia | 0 | 0 | 0 | Very_High |
| 11 | SYD | Australia | Oceania | 42000000 | 90000 | 3 | 15MAY1919 | 01JUL2017 | 1919 | 2 | 5 | 2017 | 38904 | 106 | Oceania | 0 | 0 | 0 | Medium |
| 12 | GRU | Brazil | South_America | 43000000 | 85000 | 3 | 01JAN1936 | 22SEP2016 | 1936 | 1 | 1 | 2016 | 32829 | 89 | South_america | 0 | 0 | 0 | Medium |
| Obs | Traffic_Category | COUNT | PERCENT |
|---|---|---|---|
| 1 | High | 5 | 41.6667 |
| 2 | Medium | 2 | 16.6667 |
| 3 | Very_High | 5 | 41.6667 |
proc print data=work.airports_binned noobs;
title "Airports with Traffic Category (based on Passengers per Year)";
var Airport_Code Passengers_per_Year Traffic_Category;
run;
OUTPUT:
| Airport_Code | Passengers_per_Year | Traffic_Category |
|---|---|---|
| JFK | 61500000 | High |
| LHR | 80000000 | Very_High |
| DEL | 65000000 | High |
| DXB | 89000000 | Very_High |
| SIN | 62000000 | High |
| HND | 85000000 | Very_High |
| LAX | 88000000 | Very_High |
| CDG | 72000000 | High |
| AMS | 52000000 | High |
| PEK | 101000000 | Very_High |
| SYD | 42000000 | Medium |
| GRU | 43000000 | Medium |
5. PROC TABULATE — multi-dimensional summary
proc tabulate data=work.airports_binned format=12.0;
class Continent_up Traffic_Category;
var Passengers_per_Year Cargo_Tons Runways;
table
Continent_up,
(Traffic_Category all='All Traffic') *
(Passengers_per_Year*sum='Total Passengers'
Passengers_per_Year*mean='Avg Passengers'
Cargo_Tons*sum='Total Cargo (t)'
Runways*mean='Avg Runways');
title "Tabulate: Summary of Passengers/Cargo by Continent and Traffic Category";
run;
OUTPUT:
| Traffic_Category | All Traffic | |||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| High | Medium | Very_High | ||||||||||||||
| Passengers_per_Year | Passengers_per_Year | Cargo_Tons | Runways | Passengers_per_Year | Passengers_per_Year | Cargo_Tons | Runways | Passengers_per_Year | Passengers_per_Year | Cargo_Tons | Runways | Passengers_per_Year | Passengers_per_Year | Cargo_Tons | Runways | |
| Total Passengers | Avg Passengers | Total Cargo (t) | Avg Runways | Total Passengers | Avg Passengers | Total Cargo (t) | Avg Runways | Total Passengers | Avg Passengers | Total Cargo (t) | Avg Runways | Total Passengers | Avg Passengers | Total Cargo (t) | Avg Runways | |
| Continent_up | 127000000 | 63500000 | 330000 | 3 | . | . | . | . | 275000000 | 91666667 | 820000 | 4 | 402000000 | 80400000 | 1150000 | 3 |
| Asia | ||||||||||||||||
| Europe | 124000000 | 62000000 | 300000 | 5 | . | . | . | . | 80000000 | 80000000 | 210000 | 2 | 204000000 | 68000000 | 510000 | 4 |
| North_america | 61500000 | 61500000 | 220000 | 4 | . | . | . | . | 88000000 | 88000000 | 300000 | 4 | 149500000 | 74750000 | 520000 | 4 |
| Oceania | . | . | . | . | 42000000 | 42000000 | 90000 | 3 | . | . | . | . | 42000000 | 42000000 | 90000 | 3 |
| South_america | . | . | . | . | 43000000 | 43000000 | 85000 | 3 | . | . | . | . | 43000000 | 43000000 | 85000 | 3 |
6. PROC SGPLOT — visualizations
proc sgplot data=work.airports_binned;
title "Total Passengers by Continent (sum)";
vbar Continent_up / response=Passengers_per_Year stat=sum datalabel;
run;
OUTPUT:
proc sgplot data=work.airports_binned;
title "Passengers vs Cargo (scatter, labeled by Airport_Code)";
scatter x=Passengers_per_Year y=Cargo_Tons / datalabel=Airport_Code markerattrs=(symbol=CircleFilled size=10);
xaxis label="Passengers per Year";
yaxis label="Cargo (Tons)";
run;
OUTPUT:
proc sgplot data=work.airports_binned;
title "Runways distribution by Traffic Category";
vbar Traffic_Category / response=Runways stat=mean datalabel;
run;
OUTPUT:
7. PROC FREQ — categorical summaries and cross-tabs
proc freq data=work.airports_binned;
tables Continent_up*Traffic_Category / norow nocol nopercent;
title "Continent x Traffic Category Cross-tabulation";
run;
OUTPUT:
The FREQ Procedure
|
| |||||||||||||||||||||||||||||||||||||||||||||
8. PROC REPORT — formatted summary for reporting
proc report data=work.airports_binned nowd;
column Continent_up Traffic_Category n Passengers_per_Year Cargo_Tons;
define Continent_up / group 'Continent';
define Traffic_Category / group 'Traffic Category';
define n / computed 'Count';
define Passengers_per_Year / analysis mean 'Avg Passengers';
define Cargo_Tons / analysis sum 'Total Cargo (t)';
compute n;
n = _c3_; /* pulls the computed COUNT column */
endcomp;
title "Formatted Report: Avg Passengers and Total Cargo by Continent and Traffic Category";
run;
OUTPUT:
| Continent | Traffic Category | Count | Avg Passengers | Total Cargo (t) |
|---|---|---|---|---|
| Asia | High | . | 63500000 | 330000 |
| Very_High | . | 91666667 | 820000 | |
| Europe | High | . | 62000000 | 300000 |
| Very_High | . | 80000000 | 210000 | |
| North_america | High | . | 61500000 | 220000 |
| Very_High | . | 88000000 | 300000 | |
| Oceania | Medium | . | 42000000 | 90000 |
| South_america | Medium | . | 43000000 | 85000 |
9. PROC CORR — check relationships
proc corr data=work.airports_binned pearson;
var Passengers_per_Year Cargo_Tons Runways Age_Years;
title "Correlation Matrix (Passengers, Cargo, Runways, Age)";
run;
OUTPUT:
The CORR Procedure
| 4 Variables: | Passengers_per_Year Cargo_Tons Runways Age_Years |
|---|
| Simple Statistics | ||||||
|---|---|---|---|---|---|---|
| Variable | N | Mean | Std Dev | Sum | Minimum | Maximum |
| Passengers_per_Year | 12 | 70041667 | 19005332 | 840500000 | 42000000 | 101000000 |
| Cargo_Tons | 12 | 196250 | 89013 | 2355000 | 85000 | 400000 |
| Runways | 12 | 3.58333 | 0.99620 | 43.00000 | 2.00000 | 6.00000 |
| Age_Years | 12 | 72.08333 | 17.28088 | 865.00000 | 51.00000 | 106.00000 |
| Pearson Correlation Coefficients, N = 12 Prob > |r| under H0: Rho=0 | ||||
|---|---|---|---|---|
| Passengers_per_Year | Cargo_Tons | Runways | Age_Years | |
| Passengers_per_Year | 1.00000 | 0.86125 0.0003 | -0.03261 0.9199 | -0.28332 0.3722 |
| Cargo_Tons | 0.86125 0.0003 | 1.00000 | 0.08842 0.7846 | -0.10350 0.7489 |
| Runways | -0.03261 0.9199 | 0.08842 0.7846 | 1.00000 | -0.30936 0.3278 |
| Age_Years | -0.28332 0.3722 | -0.10350 0.7489 | -0.30936 0.3278 | 1.00000 |
10. PROC TRANSPOSE — wide-to-long example
proc sort data=work.airports_binned out=work.airports_binned_sorted;
by Airport_Code Country Continent_up Traffic_Category;
run;
proc transpose data=work.airports_binned_sorted out=work.long_pass(drop=_NAME_);
by Airport_Code Country Continent_up Traffic_Category;
var Passengers_per_Year Cargo_Tons Runways;
run;
proc print data=work.long_pass noobs;
title "Transposed (Long) Format of Passengers/Cargo/Runways per Airport";
run;
OUTPUT:
| Airport_Code | Country | Continent_up | Traffic_Category | COL1 |
|---|---|---|---|---|
| AMS | Netherlands | Europe | High | 52000000 |
| AMS | Netherlands | Europe | High | 140000 |
| AMS | Netherlands | Europe | High | 6 |
| CDG | France | Europe | High | 72000000 |
| CDG | France | Europe | High | 160000 |
| CDG | France | Europe | High | 4 |
| DEL | India | Asia | High | 65000000 |
| DEL | India | Asia | High | 150000 |
| DEL | India | Asia | High | 3 |
| DXB | United Arab Emirates | Asia | Very_High | 89000000 |
| DXB | United Arab Emirates | Asia | Very_High | 250000 |
| DXB | United Arab Emirates | Asia | Very_High | 3 |
| GRU | Brazil | South_america | Medium | 43000000 |
| GRU | Brazil | South_america | Medium | 85000 |
| GRU | Brazil | South_america | Medium | 3 |
| HND | Japan | Asia | Very_High | 85000000 |
| HND | Japan | Asia | Very_High | 170000 |
| HND | Japan | Asia | Very_High | 4 |
| JFK | United States | North_america | High | 61500000 |
| JFK | United States | North_america | High | 220000 |
| JFK | United States | North_america | High | 4 |
| LAX | United States | North_america | Very_High | 88000000 |
| LAX | United States | North_america | Very_High | 300000 |
| LAX | United States | North_america | Very_High | 4 |
| LHR | United Kingdom | Europe | Very_High | 80000000 |
| LHR | United Kingdom | Europe | Very_High | 210000 |
| LHR | United Kingdom | Europe | Very_High | 2 |
| PEK | China | Asia | Very_High | 101000000 |
| PEK | China | Asia | Very_High | 400000 |
| PEK | China | Asia | Very_High | 4 |
| SIN | Singapore | Asia | High | 62000000 |
| SIN | Singapore | Asia | High | 180000 |
| SIN | Singapore | Asia | High | 3 |
| SYD | Australia | Oceania | Medium | 42000000 |
| SYD | Australia | Oceania | Medium | 90000 |
| SYD | Australia | Oceania | Medium | 3 |
11. PROC SQL examples — create summary table and export-ready table
proc sql;
create table work.summary_by_continent as
select Continent_up as Continent,
count(*) as Airports,
sum(Passengers_per_Year) as Total_Passengers format=comma12.,
mean(Passengers_per_Year) as Avg_Passengers format=comma12.,
sum(Cargo_Tons) as Total_Cargo format=comma12.,
mean(Runways) as Avg_Runways format=8.2
from work.airports_binned
group by Continent_up
order by Total_Passengers desc;
quit;
proc print data=work.summary_by_continent noobs;
title "SQL Summary: Airports by Continent (by total passengers)";
run;
OUTPUT:
| Continent | Airports | Total_Passengers | Avg_Passengers | Total_Cargo | Avg_Runways |
|---|---|---|---|---|---|
| Asia | 5 | 402,000,000 | 80,400,000 | 1,150,000 | 3.40 |
| Europe | 3 | 204,000,000 | 68,000,000 | 510,000 | 4.00 |
| North_america | 2 | 149,500,000 | 74,750,000 | 520,000 | 4.00 |
| South_america | 1 | 43,000,000 | 43,000,000 | 85,000 | 3.00 |
| Oceania | 1 | 42,000,000 | 42,000,000 | 90,000 | 3.00 |
No comments:
Post a Comment