WORLD TOURIST PLACES DATA CREATION AND ANALYSIS USING PROC SQL | PROC MEANS | PROC FORMAT | PROC UNIVARIATE | PROC SGPLOT | MACROS WITH DATE FUNCTIONS
options nocenter;
1) CREATE BASE DATASET WITH 12 WORLD TOURIST PLACES
data work.world_tourist_places_raw;
length Place_Name $40 Country $25 Best_Season $15 Activities $60;
format Booking_Date Start_Date End_Date date9. Start_Month monyy7.;
informat Booking_Date date9.;
infile datalines truncover;
input Place_Name :$40. Country :$25. Annual_Visitors Average_Expense Best_Season :$15.
Safety_Score Activities :$60. Booking_Date :date9.;
/* Derived travel dates */
Start_Date = intnx('month', Booking_Date, 1, 'b'); /* 1st of next month */
End_Date = intnx('day', Start_Date, 6, 'same'); /* 7-day trip */
Trip_Duration_Days = End_Date - Start_Date + 1;
Days_Between_Book_And_Trip = intck('day', Booking_Date, Start_Date);
Start_Month = Start_Date;
datalines;
Eiffel_Tower_Paris France 7000000 1500 Spring-Summer 8 City_viewing|Museum_visits 01JAN2025
Grand_Canyon_National_Park USA 4500000 1200 Spring-Autumn 9 Hiking|Photography|Camping 15FEB2025
Taj_Mahal_Agra India 8000000 800 Winter 7 Heritage_tour|Street_food 10NOV2024
Great_Wall_Beijing China 9000000 1000 Spring-Autumn 7 Hiking|Cultural_exploration 20MAR2025
Santorini_Islands Greece 2000000 1800 Summer 8 Beaches|Sunsets|Boat_trips 05APR2025
Machu_Picchu_Citadel Peru 1500000 1600 Dry_Season 9 Trekking|Inca_ruins 28AUG2024
Bali_Beach_Resorts Indonesia 6000000 1100 Summer-Autumn 6 Surfing|Yoga|Nightlife 03MAY2025
Dubai_Burj_Khalifa UAE 6500000 1900 Winter 9 Shopping|Skydeck|Desert_safari 18DEC2024
Singapore_Marina_Bay Singapore 5000000 1700 All_Year 9 City_tours|Gardens|Night_safari 23JAN2025
New_York_Times_Square USA 12000000 2000 Winter-NewYear 8 Shopping|Broadway|NYE_countdown 10DEC2024
Tokyo_Shibuya_Crossing Japan 5500000 1600 Spring-Autumn 9 City_life|Anime_culture|Shopping 12MAR2025
Sydney_Opera_House Australia 3000000 1500 Spring-Summer 8 Opera|Harbour_cruises|City_walks 01SEP2024
;
run;
proc print data=work.world_tourist_places_raw;
run;
OUTPUT:
| Obs | Place_Name | Country | Best_Season | Activities | Booking_Date | Start_Date | End_Date | Start_Month | Annual_Visitors | Average_Expense | Safety_Score | Trip_Duration_Days | Days_Between_Book_And_Trip |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | Eiffel_Tower_Paris | France | Spring-Summer | City_viewing|Museum_visits | 01JAN2025 | 01FEB2025 | 07FEB2025 | FEB2025 | 7000000 | 1500 | 8 | 7 | 31 |
| 2 | Grand_Canyon_National_Park | USA | Spring-Autumn | Hiking|Photography|Camping | 15FEB2025 | 01MAR2025 | 07MAR2025 | MAR2025 | 4500000 | 1200 | 9 | 7 | 14 |
| 3 | Taj_Mahal_Agra | India | Winter | Heritage_tour|Street_food | 10NOV2024 | 01DEC2024 | 07DEC2024 | DEC2024 | 8000000 | 800 | 7 | 7 | 21 |
| 4 | Great_Wall_Beijing | China | Spring-Autumn | Hiking|Cultural_exploration | 20MAR2025 | 01APR2025 | 07APR2025 | APR2025 | 9000000 | 1000 | 7 | 7 | 12 |
| 5 | Santorini_Islands | Greece | Summer | Beaches|Sunsets|Boat_trips | 05APR2025 | 01MAY2025 | 07MAY2025 | MAY2025 | 2000000 | 1800 | 8 | 7 | 26 |
| 6 | Machu_Picchu_Citadel | Peru | Dry_Season | Trekking|Inca_ruins | 28AUG2024 | 01SEP2024 | 07SEP2024 | SEP2024 | 1500000 | 1600 | 9 | 7 | 4 |
| 7 | Bali_Beach_Resorts | Indonesia | Summer-Autumn | Surfing|Yoga|Nightlife | 03MAY2025 | 01JUN2025 | 07JUN2025 | JUN2025 | 6000000 | 1100 | 6 | 7 | 29 |
| 8 | Dubai_Burj_Khalifa | UAE | Winter | Shopping|Skydeck|Desert_safari | 18DEC2024 | 01JAN2025 | 07JAN2025 | JAN2025 | 6500000 | 1900 | 9 | 7 | 14 |
| 9 | Singapore_Marina_Bay | Singapore | All_Year | City_tours|Gardens|Night_safari | 23JAN2025 | 01FEB2025 | 07FEB2025 | FEB2025 | 5000000 | 1700 | 9 | 7 | 9 |
| 10 | New_York_Times_Square | USA | Winter-NewYear | Shopping|Broadway|NYE_countdown | 10DEC2024 | 01JAN2025 | 07JAN2025 | JAN2025 | 12000000 | 2000 | 8 | 7 | 22 |
| 11 | Tokyo_Shibuya_Crossing | Japan | Spring-Autumn | City_life|Anime_culture|Shopping | 12MAR2025 | 01APR2025 | 07APR2025 | APR2025 | 5500000 | 1600 | 9 | 7 | 20 |
| 12 | Sydney_Opera_House | Australia | Spring-Summer | Opera|Harbour_cruises|City_walks | 01SEP2024 | 01OCT2024 | 07OCT2024 | OCT2024 | 3000000 | 1500 | 8 | 7 | 30 |
2) DEFINE FORMATS FOR SEASONS, SAFETY AND VISITOR SCALE
proc format;
value safetyfmt
1 - 4 = 'Low'
5 - 7 = 'Medium'
8 - 10 = 'High';
LOG:
value visitorscale
0 - 1999999 = 'Low_traffic'
2000000 - 5999999 = 'Medium_traffic'
6000000 - 9999999 = 'High_traffic'
10000000 - high = 'Very_high_traffic';
LOG:
value $seasonfmt
'Winter' = 'Cool_Winter'
'Spring-Summer' = 'Pleasant_Spring_to_Summer'
'Spring-Autumn' = 'Mild_Spring_to_Autumn'
'Summer' = 'Hot_Summer'
'Dry_Season' = 'Dry_Season_(Andes)'
'Summer-Autumn' = 'Warm_Summer_to_Autumn'
'All_Year' = 'All_Year_Round'
'Winter-NewYear' = 'Winter_and_New_Year';
run;
LOG:
3) ENRICH DATASET WITH CATEGORICAL VARIABLES USING PROC SQL
/*Also apply formats */
proc sql;
create table work.world_tourist_enriched as
select Place_Name,Country,Annual_Visitors,Average_Expense,Best_Season,Safety_Score,
Activities,Booking_Date,Start_Date,End_Date,Trip_Duration_Days,
Days_Between_Book_And_Trip,Start_Month,put(Safety_Score, safetyfmt.)
as Safety_Category length=12,put(Annual_Visitors, visitorscale.) as
Visitor_Traffic length=18,
put(Best_Season, $seasonfmt.) as Season_Desc length=30,
/* Simple expense category */
case
when Average_Expense < 1000 then 'Budget'
when 1000 <= Average_Expense < 1700 then 'Mid_Range'
else 'Luxury'
end as Expense_Category length=10
from work.world_tourist_places_raw
;
quit;
proc print data=work.world_tourist_enriched;
run;
OUTPUT:
| Obs | Place_Name | Country | Annual_Visitors | Average_Expense | Best_Season | Safety_Score | Activities | Booking_Date | Start_Date | End_Date | Trip_Duration_Days | Days_Between_Book_And_Trip | Start_Month | Safety_Category | Visitor_Traffic | Season_Desc | Expense_Category |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | Eiffel_Tower_Paris | France | 7000000 | 1500 | Spring-Summer | 8 | City_viewing|Museum_visits | 01JAN2025 | 01FEB2025 | 07FEB2025 | 7 | 31 | FEB2025 | High | High_traffic | Pleasant_Spring_to_Summer | Mid_Range |
| 2 | Grand_Canyon_National_Park | USA | 4500000 | 1200 | Spring-Autumn | 9 | Hiking|Photography|Camping | 15FEB2025 | 01MAR2025 | 07MAR2025 | 7 | 14 | MAR2025 | High | Medium_traffic | Mild_Spring_to_Autumn | Mid_Range |
| 3 | Taj_Mahal_Agra | India | 8000000 | 800 | Winter | 7 | Heritage_tour|Street_food | 10NOV2024 | 01DEC2024 | 07DEC2024 | 7 | 21 | DEC2024 | Medium | High_traffic | Cool_Winter | Budget |
| 4 | Great_Wall_Beijing | China | 9000000 | 1000 | Spring-Autumn | 7 | Hiking|Cultural_exploration | 20MAR2025 | 01APR2025 | 07APR2025 | 7 | 12 | APR2025 | Medium | High_traffic | Mild_Spring_to_Autumn | Mid_Range |
| 5 | Santorini_Islands | Greece | 2000000 | 1800 | Summer | 8 | Beaches|Sunsets|Boat_trips | 05APR2025 | 01MAY2025 | 07MAY2025 | 7 | 26 | MAY2025 | High | Medium_traffic | Hot_Summer | Luxury |
| 6 | Machu_Picchu_Citadel | Peru | 1500000 | 1600 | Dry_Season | 9 | Trekking|Inca_ruins | 28AUG2024 | 01SEP2024 | 07SEP2024 | 7 | 4 | SEP2024 | High | Low_traffic | Dry_Season_(Andes) | Mid_Range |
| 7 | Bali_Beach_Resorts | Indonesia | 6000000 | 1100 | Summer-Autumn | 6 | Surfing|Yoga|Nightlife | 03MAY2025 | 01JUN2025 | 07JUN2025 | 7 | 29 | JUN2025 | Medium | High_traffic | Warm_Summer_to_Autumn | Mid_Range |
| 8 | Dubai_Burj_Khalifa | UAE | 6500000 | 1900 | Winter | 9 | Shopping|Skydeck|Desert_safari | 18DEC2024 | 01JAN2025 | 07JAN2025 | 7 | 14 | JAN2025 | High | High_traffic | Cool_Winter | Luxury |
| 9 | Singapore_Marina_Bay | Singapore | 5000000 | 1700 | All_Year | 9 | City_tours|Gardens|Night_safari | 23JAN2025 | 01FEB2025 | 07FEB2025 | 7 | 9 | FEB2025 | High | Medium_traffic | All_Year_Round | Luxury |
| 10 | New_York_Times_Square | USA | 12000000 | 2000 | Winter-NewYear | 8 | Shopping|Broadway|NYE_countdown | 10DEC2024 | 01JAN2025 | 07JAN2025 | 7 | 22 | JAN2025 | High | Very_high_traffic | Winter_and_New_Year | Luxury |
| 11 | Tokyo_Shibuya_Crossing | Japan | 5500000 | 1600 | Spring-Autumn | 9 | City_life|Anime_culture|Shopping | 12MAR2025 | 01APR2025 | 07APR2025 | 7 | 20 | APR2025 | High | Medium_traffic | Mild_Spring_to_Autumn | Mid_Range |
| 12 | Sydney_Opera_House | Australia | 3000000 | 1500 | Spring-Summer | 8 | Opera|Harbour_cruises|City_walks | 01SEP2024 | 01OCT2024 | 07OCT2024 | 7 | 30 | OCT2024 | High | Medium_traffic | Pleasant_Spring_to_Summer | Mid_Range |
4) PROC MEANS: BASIC STATISTICS BY SEASON AND EXPENSE CATEGORY
proc means data=work.world_tourist_enriched
mean median min max stddev maxdec=1;
class Best_Season Expense_Category;
var Annual_Visitors Average_Expense Trip_Duration_Days;
run;
OUTPUT:
The MEANS Procedure
| Best_Season | Expense_Category | N Obs | Variable | Mean | Median | Minimum | Maximum | Std Dev |
|---|---|---|---|---|---|---|---|---|
| All_Year | Luxury | 1 | Annual_Visitors Average_Expense Trip_Duration_Days | 5000000.0 1700.0 7.0 | 5000000.0 1700.0 7.0 | 5000000.0 1700.0 7.0 | 5000000.0 1700.0 7.0 | . . . |
| Dry_Season | Mid_Range | 1 | Annual_Visitors Average_Expense Trip_Duration_Days | 1500000.0 1600.0 7.0 | 1500000.0 1600.0 7.0 | 1500000.0 1600.0 7.0 | 1500000.0 1600.0 7.0 | . . . |
| Spring-Autumn | Mid_Range | 3 | Annual_Visitors Average_Expense Trip_Duration_Days | 6333333.3 1266.7 7.0 | 5500000.0 1200.0 7.0 | 4500000.0 1000.0 7.0 | 9000000.0 1600.0 7.0 | 2362907.8 305.5 0.0 |
| Spring-Summer | Mid_Range | 2 | Annual_Visitors Average_Expense Trip_Duration_Days | 5000000.0 1500.0 7.0 | 5000000.0 1500.0 7.0 | 3000000.0 1500.0 7.0 | 7000000.0 1500.0 7.0 | 2828427.1 0.0 0.0 |
| Summer | Luxury | 1 | Annual_Visitors Average_Expense Trip_Duration_Days | 2000000.0 1800.0 7.0 | 2000000.0 1800.0 7.0 | 2000000.0 1800.0 7.0 | 2000000.0 1800.0 7.0 | . . . |
| Summer-Autumn | Mid_Range | 1 | Annual_Visitors Average_Expense Trip_Duration_Days | 6000000.0 1100.0 7.0 | 6000000.0 1100.0 7.0 | 6000000.0 1100.0 7.0 | 6000000.0 1100.0 7.0 | . . . |
| Winter | Budget | 1 | Annual_Visitors Average_Expense Trip_Duration_Days | 8000000.0 800.0 7.0 | 8000000.0 800.0 7.0 | 8000000.0 800.0 7.0 | 8000000.0 800.0 7.0 | . . . |
| Luxury | 1 | Annual_Visitors Average_Expense Trip_Duration_Days | 6500000.0 1900.0 7.0 | 6500000.0 1900.0 7.0 | 6500000.0 1900.0 7.0 | 6500000.0 1900.0 7.0 | . . . | |
| Winter-NewYear | Luxury | 1 | Annual_Visitors Average_Expense Trip_Duration_Days | 12000000.0 2000.0 7.0 | 12000000.0 2000.0 7.0 | 12000000.0 2000.0 7.0 | 12000000.0 2000.0 7.0 | . . . |
5) PROC UNIVARIATE: DISTRIBUTION OF VISITORS AND EXPENSE
proc univariate data=work.world_tourist_enriched;
var Annual_Visitors Average_Expense;
histogram Annual_Visitors Average_Expense;
run;
OUTPUT:
The UNIVARIATE Procedure
Variable: Annual_Visitors
| Moments | |||
|---|---|---|---|
| N | 12 | Sum Weights | 12 |
| Mean | 5833333.33 | Sum Observations | 70000000 |
| Std Deviation | 2994945.24 | Variance | 8.9697E12 |
| Skewness | 0.46655187 | Kurtosis | 0.28994019 |
| Uncorrected SS | 5.07E14 | Corrected SS | 9.86667E13 |
| Coeff Variation | 51.3419183 | Std Error Mean | 864566.219 |
| Basic Statistical Measures | |||
|---|---|---|---|
| Location | Variability | ||
| Mean | 5833333 | Std Deviation | 2994945 |
| Median | 5750000 | Variance | 8.9697E12 |
| Mode | . | Range | 10500000 |
| Interquartile Range | 3750000 | ||
| Tests for Location: Mu0=0 | ||||
|---|---|---|---|---|
| Test | Statistic | p Value | ||
| Student's t | t | 6.747122 | Pr > |t| | <.0001 |
| Sign | M | 6 | Pr >= |M| | 0.0005 |
| Signed Rank | S | 39 | Pr >= |S| | 0.0005 |
| Quantiles (Definition 5) | |
|---|---|
| Level | Quantile |
| 100% Max | 12000000 |
| 99% | 12000000 |
| 95% | 12000000 |
| 90% | 9000000 |
| 75% Q3 | 7500000 |
| 50% Median | 5750000 |
| 25% Q1 | 3750000 |
| 10% | 2000000 |
| 5% | 1500000 |
| 1% | 1500000 |
| 0% Min | 1500000 |
| Extreme Observations | |||
|---|---|---|---|
| Lowest | Highest | ||
| Value | Obs | Value | Obs |
| 1500000 | 6 | 6500000 | 8 |
| 2000000 | 5 | 7000000 | 1 |
| 3000000 | 12 | 8000000 | 3 |
| 4500000 | 2 | 9000000 | 4 |
| 5000000 | 9 | 12000000 | 10 |
The UNIVARIATE Procedure
The UNIVARIATE Procedure
Variable: Average_Expense
| Moments | |||
|---|---|---|---|
| N | 12 | Sum Weights | 12 |
| Mean | 1475 | Sum Observations | 17700 |
| Std Deviation | 374.469321 | Variance | 140227.273 |
| Skewness | -0.4510759 | Kurtosis | -0.7734376 |
| Uncorrected SS | 27650000 | Corrected SS | 1542500 |
| Coeff Variation | 25.3877506 | Std Error Mean | 108.099982 |
| Basic Statistical Measures | |||
|---|---|---|---|
| Location | Variability | ||
| Mean | 1475.000 | Std Deviation | 374.46932 |
| Median | 1550.000 | Variance | 140227 |
| Mode | 1500.000 | Range | 1200 |
| Interquartile Range | 600.00000 | ||
Note: The mode displayed is the smallest of 2 modes with a count of 2.
| Tests for Location: Mu0=0 | ||||
|---|---|---|---|---|
| Test | Statistic | p Value | ||
| Student's t | t | 13.64478 | Pr > |t| | <.0001 |
| Sign | M | 6 | Pr >= |M| | 0.0005 |
| Signed Rank | S | 39 | Pr >= |S| | 0.0005 |
| Quantiles (Definition 5) | |
|---|---|
| Level | Quantile |
| 100% Max | 2000 |
| 99% | 2000 |
| 95% | 2000 |
| 90% | 1900 |
| 75% Q3 | 1750 |
| 50% Median | 1550 |
| 25% Q1 | 1150 |
| 10% | 1000 |
| 5% | 800 |
| 1% | 800 |
| 0% Min | 800 |
| Extreme Observations | |||
|---|---|---|---|
| Lowest | Highest | ||
| Value | Obs | Value | Obs |
| 800 | 3 | 1600 | 11 |
| 1000 | 4 | 1700 | 9 |
| 1100 | 7 | 1800 | 5 |
| 1200 | 2 | 1900 | 8 |
| 1500 | 12 | 2000 | 10 |
The UNIVARIATE Procedure
6) PROC SGPLOT: VISUALIZE RELATION BETWEEN EXPENSE & VISITORS
ods graphics on;
proc sgplot data=work.world_tourist_enriched;
title "Average Expense vs Annual Visitors by Best Season";
scatter x=Average_Expense y=Annual_Visitors / group=Best_Season;
xaxis label="Average Expense per Visitor (USD)";
yaxis label="Annual Visitors";
run;
OUTPUT:
proc sgplot data=work.world_tourist_enriched;
title "Total Annual Visitors by Country";
vbar Country / response=Annual_Visitors stat=sum;
yaxis label="Total Annual Visitors";
run;
ods graphics off;
OUTPUT:
7) MACRO FOR TRAVEL RECOMMENDATION CATEGORIES
%macro travel_reco(min_budget=, max_budget=, min_safety=, season=);
title "Travel Recommendations: season=&season min_safety=&min_safety budget=&min_budget to &max_budget";
data work.travel_candidates;
set work.world_tourist_enriched;
where Average_Expense between &min_budget and &max_budget
and Safety_Score >= &min_safety
and (upcase(Best_Season) = upcase("&season")
or upcase("&season") = 'ANY');
run;
proc print data=work.travel_candidates noobs;
var Place_Name Country Best_Season Safety_Score Average_Expense Activities;
run;
%mend travel_reco;
%travel_reco(min_budget=700, max_budget=1500, min_safety=8, season=Spring-Summer);
OUTPUT:
| Place_Name | Country | Best_Season | Safety_Score | Average_Expense | Activities |
|---|---|---|---|---|---|
| Eiffel_Tower_Paris | France | Spring-Summer | 8 | 1500 | City_viewing|Museum_visits |
| Sydney_Opera_House | Australia | Spring-Summer | 8 | 1500 | Opera|Harbour_cruises|City_walks |
%travel_reco(min_budget=800, max_budget=1200, min_safety=7, season=ANY);
OUTPUT:
| Place_Name | Country | Best_Season | Safety_Score | Average_Expense | Activities |
|---|---|---|---|---|---|
| Grand_Canyon_National_Park | USA | Spring-Autumn | 9 | 1200 | Hiking|Photography|Camping |
| Taj_Mahal_Agra | India | Winter | 7 | 800 | Heritage_tour|Street_food |
| Great_Wall_Beijing | China | Spring-Autumn | 7 | 1000 | Hiking|Cultural_exploration |
8) MACRO SHOWING DATE FUNCTIONS (INTCK, INTNX, TODAY)
%macro date_scenario_report;
data work.date_scenarios;
set work.world_tourist_enriched;
/* Days from booking date to today (report generation date) */
Days_from_Booking_to_Today = intck('day', Booking_Date, today());
/* Number of months between booking and trip start */
Months_Between_Booking_and_Start = intck('month', Booking_Date, Start_Date);
/* Shift Start_Date by +6 months to simulate re-planned trip */
Replanned_Start = intnx('month', Start_Date, 6, 'same');
format Replanned_Start date9.;
/* Extract year and month for reporting */
Booking_Year = year(Booking_Date);
Booking_Month = month(Booking_Date);
format Booking_Date Start_Date End_Date date9.
Start_Month monyy7.;
run;
proc print data=work.date_scenarios noobs;
var Place_Name Country Booking_Date Start_Date End_Date
Days_from_Booking_to_Today
Months_Between_Booking_and_Start
Replanned_Start;
run;
%mend date_scenario_report;
%date_scenario_report;
| Place_Name | Country | Booking_Date | Start_Date | End_Date | Days_from_Booking_to_Today | Months_Between_Booking_and_Start | Replanned_Start |
|---|---|---|---|---|---|---|---|
| Eiffel_Tower_Paris | France | 01JAN2025 | 01FEB2025 | 07FEB2025 | 337 | 1 | 01AUG2025 |
| Grand_Canyon_National_Park | USA | 15FEB2025 | 01MAR2025 | 07MAR2025 | 292 | 1 | 01SEP2025 |
| Taj_Mahal_Agra | India | 10NOV2024 | 01DEC2024 | 07DEC2024 | 389 | 1 | 01JUN2025 |
| Great_Wall_Beijing | China | 20MAR2025 | 01APR2025 | 07APR2025 | 259 | 1 | 01OCT2025 |
| Santorini_Islands | Greece | 05APR2025 | 01MAY2025 | 07MAY2025 | 243 | 1 | 01NOV2025 |
| Machu_Picchu_Citadel | Peru | 28AUG2024 | 01SEP2024 | 07SEP2024 | 463 | 1 | 01MAR2025 |
| Bali_Beach_Resorts | Indonesia | 03MAY2025 | 01JUN2025 | 07JUN2025 | 215 | 1 | 01DEC2025 |
| Dubai_Burj_Khalifa | UAE | 18DEC2024 | 01JAN2025 | 07JAN2025 | 351 | 1 | 01JUL2025 |
| Singapore_Marina_Bay | Singapore | 23JAN2025 | 01FEB2025 | 07FEB2025 | 315 | 1 | 01AUG2025 |
| New_York_Times_Square | USA | 10DEC2024 | 01JAN2025 | 07JAN2025 | 359 | 1 | 01JUL2025 |
| Tokyo_Shibuya_Crossing | Japan | 12MAR2025 | 01APR2025 | 07APR2025 | 267 | 1 | 01OCT2025 |
| Sydney_Opera_House | Australia | 01SEP2024 | 01OCT2024 | 07OCT2024 | 459 | 1 | 01APR2025 |
No comments:
Post a Comment