Friday, 14 November 2025

309.WORLD FESTIVALS DATASET CREATION AND STATISTICAL REPORTING USING PROC SQL | PROC SORT | PROC PRINT | PROC SUMMARY | PROC FORMAT | AND AUTOMATED MACRO-BASED ANALYSIS

WORLD FESTIVALS DATASET CREATION AND STATISTICAL REPORTING USING PROC SQL | PROC SORT | PROC PRINT | PROC SUMMARY | PROC FORMAT | AND AUTOMATED MACRO-BASED ANALYSIS

options validvarname=any nodate nonumber nocenter;

1) Create Worlds Festival Dataset

data work.world_festivals;

    infile datalines dlm='|' dsd truncover;

    length Festival_ID $6 Festival_Name $60 Country $40 Month $9 Type $20;

    format Visitors_per_Year comma15.;

    input Festival_ID :$6. Festival_Name :$60. Country :$40. Month :$9.

          Duration_Days :8. Type :$20. Visitors_per_Year :comma15.;

    /* Basic validation/cleaning */

    Festival_Name = strip(Festival_Name);

    Country = strip(Country);

    Month = propcase(strip(Month));

    Type = upcase(strip(Type));

    if missing(Duration_Days) then Duration_Days = .;

    if Visitors_per_Year = . then Visitors_per_Year = 0;

datalines;

F001|Diwali|India|October|5|Religious|100000000

F002|Carnival (Rio)|Brazil|February|7|Cultural|2000000

F003|Oktoberfest|Germany|September|16|Cultural|6000000

F004|Cherry Blossom|Japan|April|14|Seasonal|30000000

F005|Songkran|Thailand|April|3|Religious|2000000

F006|Mardi Gras|USA|February|9|Cultural|1000000

F007|La Tomatina|Spain|August|1|Cultural|220000

F008|Harbin Ice Festival|China|January|60|Seasonal|1000000

F009|Holi|India|March|2|Religious|25000000

F010|Day of the Dead|Mexico|November|3|Religious|1500000

F011|Glastonbury|United Kingdom|June|5|Music|210000

F012|Coachella|USA|April|3|Music|250000

F013|Eid al-Fitr|Various|Varies|3|Religious|500000000

F014|Running of the Bulls|Spain|July|3|Cultural|80000

F015|Lantern Festival|Taiwan|February|2|Cultural|1200000

;

run;

proc print data=work.world_festivals;

run;

OUTPUT:

ObsFestival_IDFestival_NameCountryMonthTypeVisitors_per_YearDuration_Days
1F001DiwaliIndiaOctoberRELIGIOUS100,000,0005
2F002Carnival (Rio)BrazilFebruaryCULTURAL2,000,0007
3F003OktoberfestGermanySeptemberCULTURAL6,000,00016
4F004Cherry BlossomJapanAprilSEASONAL30,000,00014
5F005SongkranThailandAprilRELIGIOUS2,000,0003
6F006Mardi GrasUSAFebruaryCULTURAL1,000,0009
7F007La TomatinaSpainAugustCULTURAL220,0001
8F008Harbin Ice FestivalChinaJanuarySEASONAL1,000,00060
9F009HoliIndiaMarchRELIGIOUS25,000,0002
10F010Day of the DeadMexicoNovemberRELIGIOUS1,500,0003
11F011GlastonburyUnited KingdomJuneMUSIC210,0005
12F012CoachellaUSAAprilMUSIC250,0003
13F013Eid al-FitrVariousVariesRELIGIOUS500,000,0003
14F014Running of the BullsSpainJulyCULTURAL80,0003
15F015Lantern FestivalTaiwanFebruaryCULTURAL1,200,0002


2) Define formats for visitor size categories and months (PROC FORMAT)

proc format;

    value visitors_fmt

        low - <100000 = 'Small (<100k)'

        100000 - <500000 = 'Medium (100k-500k)'

        500000 - <2000000 = 'Large (500k-2M)'

        2000000 - <20000000 = 'Very Large (2M-20M)'

        20000000 - high = 'Mega (20M+)';

    value $typefmt

        'RELIGIOUS' = 'Religious'

        'CULTURAL'  = 'Cultural'

        'MUSIC'     = 'Music'

        'SEASONAL'  = 'Seasonal'

        other       = 'Other';

run;

LOG:

NOTE: Format VISITORS_FMT has been output.

NOTE: Format $TYPEFMT has been output.

3) Use PROC SQL to create derived tables:

proc sql;

    create table work.world_festivals_summary as

    select Type,

           count(*) as N_Festivals,

           mean(Visitors_per_Year) as Mean_Visitors format=comma15.,

           median(Visitors_per_Year) as Median_Visitors format=comma15.,

           min(Visitors_per_Year) as Min_Visitors format=comma15.,

           max(Visitors_per_Year) as Max_Visitors format=comma15.,

           sum(Visitors_per_Year) as Total_Visitors format=comma15.

    from work.world_festivals

    group by Type

    order by Total_Visitors desc;

quit;

proc print data=work.world_festivals_summary;

run;

OUTPUT:

ObsTypeN_FestivalsMean_VisitorsMedian_VisitorsMin_VisitorsMax_VisitorsTotal_Visitors
1RELIGIOUS5125,700,00025,000,0001,500,000500,000,000628,500,000
2SEASONAL215,500,00015,500,0001,000,00030,000,00031,000,000
3CULTURAL61,750,0001,100,00080,0006,000,00010,500,000
4MUSIC2230,000230,000210,000250,000460,000


proc sql;

    create table work.top_festivals as

    select * 

    from work.world_festivals

    order by Visitors_per_Year desc;

quit;

proc print data=work.top_festivals;

run;

OUTPUT:

ObsFestival_IDFestival_NameCountryMonthTypeVisitors_per_YearDuration_Days
1F013Eid al-FitrVariousVariesRELIGIOUS500,000,0003
2F001DiwaliIndiaOctoberRELIGIOUS100,000,0005
3F004Cherry BlossomJapanAprilSEASONAL30,000,00014
4F009HoliIndiaMarchRELIGIOUS25,000,0002
5F003OktoberfestGermanySeptemberCULTURAL6,000,00016
6F005SongkranThailandAprilRELIGIOUS2,000,0003
7F002Carnival (Rio)BrazilFebruaryCULTURAL2,000,0007
8F010Day of the DeadMexicoNovemberRELIGIOUS1,500,0003
9F015Lantern FestivalTaiwanFebruaryCULTURAL1,200,0002
10F006Mardi GrasUSAFebruaryCULTURAL1,000,0009
11F008Harbin Ice FestivalChinaJanuarySEASONAL1,000,00060
12F012CoachellaUSAAprilMUSIC250,0003
13F007La TomatinaSpainAugustCULTURAL220,0001
14F011GlastonburyUnited KingdomJuneMUSIC210,0005
15F014Running of the BullsSpainJulyCULTURAL80,0003


4) Sort datasets for printing and reporting

proc sort data=work.world_festivals out=work.world_festivals_1;

    by descending Visitors_per_Year Festival_Name;

run;

proc print data=work.world_festivals_1;

run;

OUTPUT:

ObsFestival_IDFestival_NameCountryMonthTypeVisitors_per_YearDuration_Days
1F013Eid al-FitrVariousVariesRELIGIOUS500,000,0003
2F001DiwaliIndiaOctoberRELIGIOUS100,000,0005
3F004Cherry BlossomJapanAprilSEASONAL30,000,00014
4F009HoliIndiaMarchRELIGIOUS25,000,0002
5F003OktoberfestGermanySeptemberCULTURAL6,000,00016
6F002Carnival (Rio)BrazilFebruaryCULTURAL2,000,0007
7F005SongkranThailandAprilRELIGIOUS2,000,0003
8F010Day of the DeadMexicoNovemberRELIGIOUS1,500,0003
9F015Lantern FestivalTaiwanFebruaryCULTURAL1,200,0002
10F008Harbin Ice FestivalChinaJanuarySEASONAL1,000,00060
11F006Mardi GrasUSAFebruaryCULTURAL1,000,0009
12F012CoachellaUSAAprilMUSIC250,0003
13F007La TomatinaSpainAugustCULTURAL220,0001
14F011GlastonburyUnited KingdomJuneMUSIC210,0005
15F014Running of the BullsSpainJulyCULTURAL80,0003


proc sort data=work.world_festivals_summary out=work.world_festivals_summary_sorted;

    by descending Total_Visitors;

run;

proc print data=work.world_festivals_summary_sorted;

run;

OUTPUT:

ObsTypeN_FestivalsMean_VisitorsMedian_VisitorsMin_VisitorsMax_VisitorsTotal_Visitors
1RELIGIOUS5125,700,00025,000,0001,500,000500,000,000628,500,000
2SEASONAL215,500,00015,500,0001,000,00030,000,00031,000,000
3CULTURAL61,750,0001,100,00080,0006,000,00010,500,000
4MUSIC2230,000230,000210,000250,000460,000


5) Basic print of the master dataset (PROC PRINT)

title "WORLD FESTIVALS MASTER LIST (sorted by Visitors)";

proc print data=work.world_festivals_1 label noobs;

    var Festival_ID Festival_Name Country Month Duration_Days Type Visitors_per_Year;

    format Visitors_per_Year comma15.;

    label Festival_ID='ID'

          Festival_Name='Festival'

          Country='Country'

          Month='Month'

          Duration_Days='Duration (days)'

          Type='Type'

          Visitors_per_Year='Visitors/Year';

run;

title;

OUTPUT:

WORLD FESTIVALS MASTER LIST (sorted by Visitors)

IDFestivalCountryMonthDuration (days)TypeVisitors/Year
F013Eid al-FitrVariousVaries3RELIGIOUS500,000,000
F001DiwaliIndiaOctober5RELIGIOUS100,000,000
F004Cherry BlossomJapanApril14SEASONAL30,000,000
F009HoliIndiaMarch2RELIGIOUS25,000,000
F003OktoberfestGermanySeptember16CULTURAL6,000,000
F002Carnival (Rio)BrazilFebruary7CULTURAL2,000,000
F005SongkranThailandApril3RELIGIOUS2,000,000
F010Day of the DeadMexicoNovember3RELIGIOUS1,500,000
F015Lantern FestivalTaiwanFebruary2CULTURAL1,200,000
F008Harbin Ice FestivalChinaJanuary60SEASONAL1,000,000
F006Mardi GrasUSAFebruary9CULTURAL1,000,000
F012CoachellaUSAApril3MUSIC250,000
F007La TomatinaSpainAugust1CULTURAL220,000
F011GlastonburyUnited KingdomJune5MUSIC210,000
F014Running of the BullsSpainJuly3CULTURAL80,000

6) Summary statistics (PROC SUMMARY) - overall and by Month/Type

proc summary data=work.world_festivals_1 nway;

    class Type;

    var Visitors_per_Year Duration_Days;

    output out=work.summary_by_type

        mean=Mean_Visitors Mean_Duration

        median=Median_Visitors Median_Duration

        sum=Total_Visitors Total_Duration

        min=Min_Visitors Min_Duration

        max=Max_Visitors Max_Duration

        / autoname;

run;

proc print data=work.summary_by_type;

run;

OUTPUT:

ObsType_TYPE__FREQ_Mean_VisitorsMean_DurationMedian_VisitorsMedian_DurationTotal_VisitorsTotal_DurationMin_VisitorsMin_DurationMax_VisitorsMax_Duration
1CULTURAL161,750,0006.33331,100,000510,500,0003880,00016,000,00016
2MUSIC12230,0004.0000230,0004460,0008210,0003250,0005
3RELIGIOUS15125,700,0003.200025,000,0003628,500,000161,500,0002500,000,0005
4SEASONAL1215,500,00037.000015,500,0003731,000,000741,000,0001430,000,00060


proc summary data=work.world_festivals nway;

    class Month;

    var Visitors_per_Year;

    output out=work.summary_by_month

        mean=Mean_Visitors

        sum=Total_Visitors

        n=Count

        / autoname;

run;

proc print data=work.summary_by_month;

run;

OUTPUT:

ObsMonth_TYPE__FREQ_Mean_VisitorsTotal_VisitorsCount
1April1310,750,00032,250,0003
2August11220,000220,0001
3February131,400,0004,200,0003
4January111,000,0001,000,0001
5July1180,00080,0001
6June11210,000210,0001
7March1125,000,00025,000,0001
8November111,500,0001,500,0001
9October11100,000,000100,000,0001
10September116,000,0006,000,0001
11Varies11500,000,000500,000,0001


7) Create a visitors-size categorical variable and add to dataset using PROC SQL

proc sql;

    create table work.world_festivals_enriched as

    select a.*,

           put(a.Visitors_per_Year, visitors_fmt.) as Visitor_Size,

           put(a.Type, $typefmt.) as Type_Label

    from work.world_festivals as a;

quit;

proc print data=work.world_festivals_enriched;

run;

OUTPUT:

ObsFestival_IDFestival_NameCountryMonthTypeVisitors_per_YearDuration_DaysVisitor_SizeType_Label
1F001DiwaliIndiaOctoberRELIGIOUS100,000,0005Mega (20M+)Religious
2F002Carnival (Rio)BrazilFebruaryCULTURAL2,000,0007Very Large (2M-20M)Cultural
3F003OktoberfestGermanySeptemberCULTURAL6,000,00016Very Large (2M-20M)Cultural
4F004Cherry BlossomJapanAprilSEASONAL30,000,00014Mega (20M+)Seasonal
5F005SongkranThailandAprilRELIGIOUS2,000,0003Very Large (2M-20M)Religious
6F006Mardi GrasUSAFebruaryCULTURAL1,000,0009Large (500k-2M)Cultural
7F007La TomatinaSpainAugustCULTURAL220,0001Medium (100k-500k)Cultural
8F008Harbin Ice FestivalChinaJanuarySEASONAL1,000,00060Large (500k-2M)Seasonal
9F009HoliIndiaMarchRELIGIOUS25,000,0002Mega (20M+)Religious
10F010Day of the DeadMexicoNovemberRELIGIOUS1,500,0003Large (500k-2M)Religious
11F011GlastonburyUnited KingdomJuneMUSIC210,0005Medium (100k-500k)Music
12F012CoachellaUSAAprilMUSIC250,0003Medium (100k-500k)Music
13F013Eid al-FitrVariousVariesRELIGIOUS500,000,0003Mega (20M+)Religious
14F014Running of the BullsSpainJulyCULTURAL80,0003Small (<100k)Cultural
15F015Lantern FestivalTaiwanFebruaryCULTURAL1,200,0002Large (500k-2M)Cultural


8) Macros for automated reporting

%macro report_by_type(type=);

    %local t;

    %let t = %upcase(&type);

    %put NOTE: Generating report for TYPE = &t;


    title "Festival Report - Type: &t";

    proc print data=work.world_festivals_enriched;

        where Type = "&t";

        var Festival_ID Festival_Name Country Month Duration_Days Visitor_Size

            Visitors_per_Year;

        format Visitors_per_Year comma15.;

        label Visitors_per_Year='Visitors/Year';

    run;


    proc summary data=work.world_festivals_enriched nway;

        where Type = "&t";

        var Visitors_per_Year Duration_Days;

        output out=work._summary_&t mean= meanDuration meanVisitors

               sum= totalDuration totalVisitors / autoname;

    run;


    proc print data=work._summary_&t noobs;

        title2 "Summary Statistics for &t";

    run;

    title;

%mend report_by_type;


%macro report_by_month(month=);

    %local m;

    %let m = %sysfunc(propcase(&month));

    %put NOTE: Generating report for MONTH = &m;


    title "Festival Report - Month: &m";

    proc print data=work.world_festivals_enriched;

        where Month = "&m";

        var Festival_ID Festival_Name Country Type Duration_Days Visitor_Size Visitors_per_Year;

        format Visitors_per_Year comma15.;

    run;


    proc summary data=work.world_festivals_enriched nway;

        where Month = "&m";

        var Visitors_per_Year Duration_Days;

        output out=work._summary_&m mean= meanDuration meanVisitors

               sum= totalDuration totalVisitors / autoname;

    run;


    proc print data=work._summary_&m noobs;

        title2 "Summary Statistics for &m";

    run;

    title;

%mend report_by_month;


%macro full_report(outlib=work);

    %put NOTE: Generating full automated report...;


    ods html file="&outlib..world_festivals_report.html" style=statistical;

    title "World Festivals — Comprehensive Report";


    /* Print master dataset */

    proc print data=work.world_festivals_enriched label;

        var Festival_ID Festival_Name Country Month Duration_Days Type_Label Visitor_Size Visitors_per_Year;

        format Visitors_per_Year comma15.;

    run;


    /* Print summary by Type */

    proc print data=work.world_festivals_summary_sorted label;

    run;


    /* Print monthly summary */

    proc print data=work.summary_by_month label;

    run;


    ods html close;

    title;

    %put NOTE: Report saved to &outlib..world_festivals_report.html ;

%mend full_report;


 %report_by_type(type=Religious); 

OUTPUT:

Festival Report - Type: RELIGIOUS

ObsFestival_IDFestival_NameCountryMonthDuration_DaysVisitor_SizeVisitors_per_Year
1F001DiwaliIndiaOctober5Mega (20M+)100,000,000
5F005SongkranThailandApril3Very Large (2M-20M)2,000,000
9F009HoliIndiaMarch2Mega (20M+)25,000,000
10F010Day of the DeadMexicoNovember3Large (500k-2M)1,500,000
13F013Eid al-FitrVariousVaries3Mega (20M+)500,000,000

Festival Report - Type: RELIGIOUS

Summary Statistics for RELIGIOUS

_TYPE__FREQ_meanDurationmeanVisitorstotalDurationtotalVisitors
05125,700,0003.2628,500,00016

 %report_by_month(month=April); 

OUTPUT:

Festival Report - Month: April

ObsFestival_IDFestival_NameCountryTypeDuration_DaysVisitor_SizeVisitors_per_Year
4F004Cherry BlossomJapanSEASONAL14Mega (20M+)30,000,000
5F005SongkranThailandRELIGIOUS3Very Large (2M-20M)2,000,000
12F012CoachellaUSAMUSIC3Medium (100k-500k)250,000

Festival Report - Month: April

Summary Statistics for April

_TYPE__FREQ_meanDurationmeanVisitorstotalDurationtotalVisitors
0310,750,0006.6666732,250,00020

 %full_report(outlib=/tmp); 

OUTPUT:

World Festivals — Comprehensive Report

ObsFestival_IDFestival_NameCountryMonthDuration_DaysType_LabelVisitor_SizeVisitors_per_Year
1F001DiwaliIndiaOctober5ReligiousMega (20M+)100,000,000
2F002Carnival (Rio)BrazilFebruary7CulturalVery Large (2M-20M)2,000,000
3F003OktoberfestGermanySeptember16CulturalVery Large (2M-20M)6,000,000
4F004Cherry BlossomJapanApril14SeasonalMega (20M+)30,000,000
5F005SongkranThailandApril3ReligiousVery Large (2M-20M)2,000,000
6F006Mardi GrasUSAFebruary9CulturalLarge (500k-2M)1,000,000
7F007La TomatinaSpainAugust1CulturalMedium (100k-500k)220,000
8F008Harbin Ice FestivalChinaJanuary60SeasonalLarge (500k-2M)1,000,000
9F009HoliIndiaMarch2ReligiousMega (20M+)25,000,000
10F010Day of the DeadMexicoNovember3ReligiousLarge (500k-2M)1,500,000
11F011GlastonburyUnited KingdomJune5MusicMedium (100k-500k)210,000
12F012CoachellaUSAApril3MusicMedium (100k-500k)250,000
13F013Eid al-FitrVariousVaries3ReligiousMega (20M+)500,000,000
14F014Running of the BullsSpainJuly3CulturalSmall (<100k)80,000
15F015Lantern FestivalTaiwanFebruary2CulturalLarge (500k-2M)1,200,000

World Festivals — Comprehensive Report

ObsTypeN_FestivalsMean_VisitorsMedian_VisitorsMin_VisitorsMax_VisitorsTotal_Visitors
1RELIGIOUS5125,700,00025,000,0001,500,000500,000,000628,500,000
2SEASONAL215,500,00015,500,0001,000,00030,000,00031,000,000
3CULTURAL61,750,0001,100,00080,0006,000,00010,500,000
4MUSIC2230,000230,000210,000250,000460,000

World Festivals — Comprehensive Report

ObsMonth_TYPE__FREQ_Mean_VisitorsTotal_VisitorsCount
1April1310,750,00032,250,0003
2August11220,000220,0001
3February131,400,0004,200,0003
4January111,000,0001,000,0001
5July1180,00080,0001
6June11210,000210,0001
7March1125,000,00025,000,0001
8November111,500,0001,500,0001
9October11100,000,000100,000,0001
10September116,000,0006,000,0001
11Varies11500,000,000500,000,0001




To Visit My Previous Different Types Of Oils Dataset:Click Here
To Visit My Previous Different Types Of Series 2025 Dataset:Click Here
To Visit My Previous Analyzing Yoga Asanas Worldwide Dataset:Click Here
To Visit My Previous Analyzing Indian Languages Dataset:Click Here  



Follow Us On : 


 


--- FOLLOW OUR BLOG FOR MORE INFORMATION.

--->PLEASE DO COMMENTS AND SHARE OUR BLOG.



No comments:

Post a Comment