REALWORLD VIBE DATASET - PROC PRINT | PROC MEANS | PROC FREQ | PROC SQL | PROC SORT | PROC REPORT | PROC TABULATE | PROC SGPLOT | PROC CONTENTS | PROC FORMAT | MACRO | AUTOMATION & ANALYSIS
options nodate nonumber validvarname=any;
options nocenter;
1) Create the dataset with 7 variables and 12 observations
data work.vibe;
infile datalines dlm='|' dsd truncover;
length VibeID $8 Location $30 Genre $20 EventDate 8;
format TicketPrice dollar8.2 EventDate yymmdd10.;
/* Variables:*/
/* VibeID - unique ID (string)*/
/* Location - city/venue*/
/* Genre - music/style (e.g., Indie, EDM, Jazz)*/
/* Energy - numeric scale 1-10 (higher = more energetic)*/
/* CrowdSize - numeric count of attendees*/
/* AvgAge - average attendee age (years)*/
/* TicketPrice - numeric (INR/USD formatting)*/
/* EventDate - date of event (formatted)*/
/* */
input VibeID $ Location :$30. Genre :$20. Energy CrowdSize AvgAge TicketPrice EventDate :yymmdd10.;
datalines;
V001|Hyderabad - Warehouse|EDM|9|1200|24|25.00|2025-07-10
V002|Mumbai - OpenGround|Bollywood|8|5000|28|12.00|2025-08-05
V003|Bengaluru - Cafe 9|Indie|6|180|26|8.00|2025-03-21
V004|Chennai - Beach Fest|Fusion|7|2200|29|15.00|2024-12-14
V005|Kolkata - Jazz Club|Jazz|5|140|42|18.00|2025-02-10
V006|Pune - Craft Hall|Metal|8|800|22|20.00|2025-06-02
V007|Goa - Sunset Stage|Reggae|7|3500|30|10.00|2025-01-20
V008|Delhi - Rooftop|Pop|7|900|27|14.00|2025-04-18
V009|Lucknow - Heritage|Folk|4|300|36|6.00|2024-11-24
V010|Jaipur - Palace Grounds|Classical|3|200|48|22.00|2025-09-01
V011|Hyderabad - IndieHouse|Indie|6|220|25|9.00|2025-05-15
V012|Mumbai - NightMarket|EDM|9|4500|23|18.00|2025-08-22
;
run;
Purpose: display the dataset rows for quick inspection
proc print data=work.vibe noobs label;
title "VIBE DATA - RAW";
var VibeID Location Genre EventDate Energy CrowdSize AvgAge TicketPrice;
run;
Output:
| VibeID | Location | Genre | EventDate | Energy | CrowdSize | AvgAge | TicketPrice |
|---|---|---|---|---|---|---|---|
| V001 | Hyderabad - Warehouse | EDM | 2025-07-10 | 9 | 1200 | 24 | $25.00 |
| V002 | Mumbai - OpenGround | Bollywood | 2025-08-05 | 8 | 5000 | 28 | $12.00 |
| V003 | Bengaluru - Cafe 9 | Indie | 2025-03-21 | 6 | 180 | 26 | $8.00 |
| V004 | Chennai - Beach Fest | Fusion | 2024-12-14 | 7 | 2200 | 29 | $15.00 |
| V005 | Kolkata - Jazz Club | Jazz | 2025-02-10 | 5 | 140 | 42 | $18.00 |
| V006 | Pune - Craft Hall | Metal | 2025-06-02 | 8 | 800 | 22 | $20.00 |
| V007 | Goa - Sunset Stage | Reggae | 2025-01-20 | 7 | 3500 | 30 | $10.00 |
| V008 | Delhi - Rooftop | Pop | 2025-04-18 | 7 | 900 | 27 | $14.00 |
| V009 | Lucknow - Heritage | Folk | 2024-11-24 | 4 | 300 | 36 | $6.00 |
| V010 | Jaipur - Palace Grounds | Classical | 2025-09-01 | 3 | 200 | 48 | $22.00 |
| V011 | Hyderabad - IndieHouse | Indie | 2025-05-15 | 6 | 220 | 25 | $9.00 |
| V012 | Mumbai - NightMarket | EDM | 2025-08-22 | 9 | 4500 | 23 | $18.00 |
2) PROC MEANS for numeric summary
Purpose: compute central tendency and spread for numeric variables
proc means data=work.vibe n mean median std min max maxdec=2;
var Energy CrowdSize AvgAge TicketPrice;
title "NUMERIC SUMMARY - PROC MEANS";
run;
Output:
The MEANS Procedure
| Variable | N | Mean | Median | Std Dev | Minimum | Maximum |
|---|---|---|---|---|---|---|
Energy CrowdSize AvgAge TicketPrice | 12 12 12 12 | 6.58 1595.00 30.00 14.75 | 7.00 850.00 27.50 14.50 | 1.88 1782.03 8.02 5.97 | 3.00 140.00 22.00 6.00 | 9.00 5000.00 48.00 25.00 |
3) PROC FREQ for categorical distributions
Purpose: count frequencies of categorical variables
proc freq data=work.vibe;
tables Genre Location / nocum nopercent;
title "FREQUENCY DISTRIBUTIONS - PROC FREQ";
run;
Output:
The FREQ Procedure
| Genre | Frequency |
|---|---|
| Bollywood | 1 |
| Classical | 1 |
| EDM | 2 |
| Folk | 1 |
| Fusion | 1 |
| Indie | 2 |
| Jazz | 1 |
| Metal | 1 |
| Pop | 1 |
| Reggae | 1 |
| Location | Frequency |
|---|---|
| Bengaluru - Cafe 9 | 1 |
| Chennai - Beach Fest | 1 |
| Delhi - Rooftop | 1 |
| Goa - Sunset Stage | 1 |
| Hyderabad - IndieHouse | 1 |
| Hyderabad - Warehouse | 1 |
| Jaipur - Palace Grounds | 1 |
| Kolkata - Jazz Club | 1 |
| Lucknow - Heritage | 1 |
| Mumbai - NightMarket | 1 |
| Mumbai - OpenGround | 1 |
| Pune - Craft Hall | 1 |
4) PROC SQL to create a summary table and flagged records
Purpose: use SQL to create aggregated summaries and conditional subsets
proc sql;
create table work.vibe_summary as
select Genre,
count(*) as N,
round(mean(Energy),.01) as AvgEnergy,
round(mean(CrowdSize),1) as AvgCrowd,
round(mean(TicketPrice),.01) as AvgTicket
from work.vibe
group by Genre
order by AvgEnergy desc;
quit;
proc print;run;
Output:
| Obs | Genre | N | AvgEnergy | AvgCrowd | AvgTicket |
|---|---|---|---|---|---|
| 1 | EDM | 2 | 9 | 2850 | 21.5 |
| 2 | Bollywood | 1 | 8 | 5000 | 12.0 |
| 3 | Metal | 1 | 8 | 800 | 20.0 |
| 4 | Reggae | 1 | 7 | 3500 | 10.0 |
| 5 | Fusion | 1 | 7 | 2200 | 15.0 |
| 6 | Pop | 1 | 7 | 900 | 14.0 |
| 7 | Indie | 2 | 6 | 200 | 8.5 |
| 8 | Jazz | 1 | 5 | 140 | 18.0 |
| 9 | Folk | 1 | 4 | 300 | 6.0 |
| 10 | Classical | 1 | 3 | 200 | 22.0 |
5) Create flagged high-energy/popular events using SQL
proc sql;
create table work.high_vibes as
select *,
case when Energy >= 8 and CrowdSize >= 800 then 'HIGH'
when Energy >=7 and CrowdSize >=300 then 'MEDIUM' else 'LOW' end as VibeLevel
from work.vibe
order by Energy desc, CrowdSize desc;
quit;
proc print;run;
Output:
| Obs | VibeID | Location | Genre | EventDate | TicketPrice | Energy | CrowdSize | AvgAge | VibeLevel |
|---|---|---|---|---|---|---|---|---|---|
| 1 | V012 | Mumbai - NightMarket | EDM | 2025-08-22 | $18.00 | 9 | 4500 | 23 | HIGH |
| 2 | V001 | Hyderabad - Warehouse | EDM | 2025-07-10 | $25.00 | 9 | 1200 | 24 | HIGH |
| 3 | V002 | Mumbai - OpenGround | Bollywood | 2025-08-05 | $12.00 | 8 | 5000 | 28 | HIGH |
| 4 | V006 | Pune - Craft Hall | Metal | 2025-06-02 | $20.00 | 8 | 800 | 22 | HIGH |
| 5 | V007 | Goa - Sunset Stage | Reggae | 2025-01-20 | $10.00 | 7 | 3500 | 30 | MEDIUM |
| 6 | V004 | Chennai - Beach Fest | Fusion | 2024-12-14 | $15.00 | 7 | 2200 | 29 | MEDIUM |
| 7 | V008 | Delhi - Rooftop | Pop | 2025-04-18 | $14.00 | 7 | 900 | 27 | MEDIUM |
| 8 | V011 | Hyderabad - IndieHouse | Indie | 2025-05-15 | $9.00 | 6 | 220 | 25 | LOW |
| 9 | V003 | Bengaluru - Cafe 9 | Indie | 2025-03-21 | $8.00 | 6 | 180 | 26 | LOW |
| 10 | V005 | Kolkata - Jazz Club | Jazz | 2025-02-10 | $18.00 | 5 | 140 | 42 | LOW |
| 11 | V009 | Lucknow - Heritage | Folk | 2024-11-24 | $6.00 | 4 | 300 | 36 | LOW |
| 12 | V010 | Jaipur - Palace Grounds | Classical | 2025-09-01 | $22.00 | 3 | 200 | 48 | LOW |
6) PROC SORT for preparing reports
Purpose: sort the dataset by EventDate to produce chronological outputs
proc sort data=work.vibe out=work.vibe_sorted;
by EventDate;
run;
proc print;run;
Output:
| Obs | VibeID | Location | Genre | EventDate | TicketPrice | Energy | CrowdSize | AvgAge |
|---|---|---|---|---|---|---|---|---|
| 1 | V009 | Lucknow - Heritage | Folk | 2024-11-24 | $6.00 | 4 | 300 | 36 |
| 2 | V004 | Chennai - Beach Fest | Fusion | 2024-12-14 | $15.00 | 7 | 2200 | 29 |
| 3 | V007 | Goa - Sunset Stage | Reggae | 2025-01-20 | $10.00 | 7 | 3500 | 30 |
| 4 | V005 | Kolkata - Jazz Club | Jazz | 2025-02-10 | $18.00 | 5 | 140 | 42 |
| 5 | V003 | Bengaluru - Cafe 9 | Indie | 2025-03-21 | $8.00 | 6 | 180 | 26 |
| 6 | V008 | Delhi - Rooftop | Pop | 2025-04-18 | $14.00 | 7 | 900 | 27 |
| 7 | V011 | Hyderabad - IndieHouse | Indie | 2025-05-15 | $9.00 | 6 | 220 | 25 |
| 8 | V006 | Pune - Craft Hall | Metal | 2025-06-02 | $20.00 | 8 | 800 | 22 |
| 9 | V001 | Hyderabad - Warehouse | EDM | 2025-07-10 | $25.00 | 9 | 1200 | 24 |
| 10 | V002 | Mumbai - OpenGround | Bollywood | 2025-08-05 | $12.00 | 8 | 5000 | 28 |
| 11 | V012 | Mumbai - NightMarket | EDM | 2025-08-22 | $18.00 | 9 | 4500 | 23 |
| 12 | V010 | Jaipur - Palace Grounds | Classical | 2025-09-01 | $22.00 | 3 | 200 | 48 |
7) PROC REPORT to produce a formatted report
Purpose: show a nicely formatted summary report with computed columns
proc report data=work.vibe nowd split='|' headline headskip;
column Genre VibeID Location EventDate Energy CrowdSize AvgAge TicketPrice;
define Genre / group 'GENRE';
define VibeID / display 'ID';
define Location / display 'LOCATION';
define EventDate / display 'EVENT DATE' format=yymmdd10.;
define Energy / analysis mean 'ENERGY';
define CrowdSize / analysis sum format=8. 'TOTAL CROWD';
define AvgAge / analysis mean 'AVG AGE';
define TicketPrice / analysis mean format=dollar8.2 'AVG TICKET';
title "VIBE SUMMARY REPORT - PROC REPORT";
run;
Output:
| GENRE | ID | LOCATION | EVENT DATE | ENERGY | TOTAL CROWD | AVG AGE | AVG TICKET |
|---|---|---|---|---|---|---|---|
| Bollywood | V002 | Mumbai - OpenGround | 2025-08-05 | 8 | 5000 | 28 | $12.00 |
| Classical | V010 | Jaipur - Palace Grounds | 2025-09-01 | 3 | 200 | 48 | $22.00 |
| EDM | V001 | Hyderabad - Warehouse | 2025-07-10 | 9 | 1200 | 24 | $25.00 |
| V012 | Mumbai - NightMarket | 2025-08-22 | 9 | 4500 | 23 | $18.00 | |
| Folk | V009 | Lucknow - Heritage | 2024-11-24 | 4 | 300 | 36 | $6.00 |
| Fusion | V004 | Chennai - Beach Fest | 2024-12-14 | 7 | 2200 | 29 | $15.00 |
| Indie | V003 | Bengaluru - Cafe 9 | 2025-03-21 | 6 | 180 | 26 | $8.00 |
| V011 | Hyderabad - IndieHouse | 2025-05-15 | 6 | 220 | 25 | $9.00 | |
| Jazz | V005 | Kolkata - Jazz Club | 2025-02-10 | 5 | 140 | 42 | $18.00 |
| Metal | V006 | Pune - Craft Hall | 2025-06-02 | 8 | 800 | 22 | $20.00 |
| Pop | V008 | Delhi - Rooftop | 2025-04-18 | 7 | 900 | 27 | $14.00 |
| Reggae | V007 | Goa - Sunset Stage | 2025-01-20 | 7 | 3500 | 30 | $10.00 |
8) PROC TABULATE for multi-dimensional summaries
Purpose: compact multi-variable table of counts and means
proc tabulate data=work.vibe;
class Genre;
var Energy CrowdSize TicketPrice;
table Genre,
N='Count'
Energy*mean='Mean Energy'*f=6.2
CrowdSize*mean='Mean Crowd'*f=8.1
TicketPrice*mean='Mean Ticket'*f=dollar8.2;
title "MULTI-DIMENSIONAL SUMMARY - PROC TABULATE";
run;
Output:
| Count | Energy | CrowdSize | TicketPrice | |
|---|---|---|---|---|
| Mean Energy | Mean Crowd | Mean Ticket | ||
| Genre | 1 | 8.00 | 5000.0 | $12.00 |
| Bollywood | ||||
| Classical | 1 | 3.00 | 200.0 | $22.00 |
| EDM | 2 | 9.00 | 2850.0 | $21.50 |
| Folk | 1 | 4.00 | 300.0 | $6.00 |
| Fusion | 1 | 7.00 | 2200.0 | $15.00 |
| Indie | 2 | 6.00 | 200.0 | $8.50 |
| Jazz | 1 | 5.00 | 140.0 | $18.00 |
| Metal | 1 | 8.00 | 800.0 | $20.00 |
| Pop | 1 | 7.00 | 900.0 | $14.00 |
| Reggae | 1 | 7.00 | 3500.0 | $10.00 |
9) Simple PROC SGPLOT for a scatter of Energy vs CrowdSize
Purpose: visualize relationship between energy and crowd size
proc sgplot data=work.vibe;
title "ENERGY vs CROWD SIZE - PROC SGPLOT";
scatter x=Energy y=CrowdSize / markerattrs=(symbol=CircleFilled size=8);
reg x=Energy y=CrowdSize; /* trend line */
xaxis label="Energy (1-10)";
yaxis label="Crowd Size";
run;
Output:
10) Macro to produce repeated analysis for a given genre
Purpose: reusable macro to filter by genre and run print+means
%macro vibe_by_genre(g=Indie);
%put NOTE: Running Vibe analysis for genre=&g;
proc sql;
create table work.vibe_&g as
select * from work.vibe where upcase(Genre)=upcase("&g");
quit;
proc print data=work.vibe_&g noobs;
title "Vibe - Genre: &g";
run;
proc means data=work.vibe_&g n mean std min max maxdec=2;
var Energy CrowdSize AvgAge TicketPrice;
title "Numeric Summary - Genre: &g";
run;
%mend vibe_by_genre;
11) Demonstrate using the macro to run for 'EDM' and 'Indie'
%vibe_by_genre(g=EDM);
Output:
| VibeID | Location | Genre | EventDate | TicketPrice | Energy | CrowdSize | AvgAge |
|---|---|---|---|---|---|---|---|
| V001 | Hyderabad - Warehouse | EDM | 2025-07-10 | $25.00 | 9 | 1200 | 24 |
| V012 | Mumbai - NightMarket | EDM | 2025-08-22 | $18.00 | 9 | 4500 | 23 |
The MEANS Procedure
| Variable | N | Mean | Std Dev | Minimum | Maximum |
|---|---|---|---|---|---|
Energy CrowdSize AvgAge TicketPrice | 2 2 2 2 | 9.00 2850.00 23.50 21.50 | 0.00 2333.45 0.71 4.95 | 9.00 1200.00 23.00 18.00 | 9.00 4500.00 24.00 25.00 |
%vibe_by_genre(g=Indie);
Output:
| VibeID | Location | Genre | EventDate | TicketPrice | Energy | CrowdSize | AvgAge |
|---|---|---|---|---|---|---|---|
| V003 | Bengaluru - Cafe 9 | Indie | 2025-03-21 | $8.00 | 6 | 180 | 26 |
| V011 | Hyderabad - IndieHouse | Indie | 2025-05-15 | $9.00 | 6 | 220 | 25 |
The MEANS Procedure
| Variable | N | Mean | Std Dev | Minimum | Maximum |
|---|---|---|---|---|---|
Energy CrowdSize AvgAge TicketPrice | 2 2 2 2 | 6.00 200.00 25.50 8.50 | 0.00 28.28 0.71 0.71 | 6.00 180.00 25.00 8.00 | 6.00 220.00 26.00 9.00 |
12) Macro to auto-generate summary CSV for all genres via PROC SQL loop
Purpose: loop over distinct genres and output summaries into work.genre_summaries
proc sql noprint;
select distinct Genre into :g1-:g99 from work.vibe;
%let gcount=&sqlobs;
quit;
%macro genre_summaries;
%if &gcount = 0 %then %do;
%put WARNING: No genres found!;
%end;
%else %do i=1 %to &gcount;
%let thisg=&&g&i;
proc sql;
create table work.summary_&i as
select "&thisg" as Genre,
count(*) as N,
round(mean(Energy),.01) as AvgEnergy,
round(mean(CrowdSize),1) as AvgCrowd,
round(mean(TicketPrice),.01) as AvgTicket
from work.vibe
where Genre = "&thisg";
quit;
proc print;run;
%end;
Combine
data work.genre_summaries;
set work.summary_:;
run;
proc print;run;
%mend genre_summaries;
%genre_summaries
Output:
| Obs | Genre | N | AvgEnergy | AvgCrowd | AvgTicket |
|---|---|---|---|---|---|
| 1 | Bollywood | 1 | 8 | 5000 | 12 |
| Obs | Genre | N | AvgEnergy | AvgCrowd | AvgTicket |
|---|---|---|---|---|---|
| 1 | Classical | 1 | 3 | 200 | 22 |
| Obs | Genre | N | AvgEnergy | AvgCrowd | AvgTicket |
|---|---|---|---|---|---|
| 1 | EDM | 2 | 9 | 2850 | 21.5 |
| Obs | Genre | N | AvgEnergy | AvgCrowd | AvgTicket |
|---|---|---|---|---|---|
| 1 | Folk | 1 | 4 | 300 | 6 |
| Obs | Genre | N | AvgEnergy | AvgCrowd | AvgTicket |
|---|---|---|---|---|---|
| 1 | Fusion | 1 | 7 | 2200 | 15 |
| Obs | Genre | N | AvgEnergy | AvgCrowd | AvgTicket |
|---|---|---|---|---|---|
| 1 | Indie | 2 | 6 | 200 | 8.5 |
| Obs | Genre | N | AvgEnergy | AvgCrowd | AvgTicket |
|---|---|---|---|---|---|
| 1 | Jazz | 1 | 5 | 140 | 18 |
| Obs | Genre | N | AvgEnergy | AvgCrowd | AvgTicket |
|---|---|---|---|---|---|
| 1 | Metal | 1 | 8 | 800 | 20 |
| Obs | Genre | N | AvgEnergy | AvgCrowd | AvgTicket |
|---|---|---|---|---|---|
| 1 | Pop | 1 | 7 | 900 | 14 |
| Obs | Genre | N | AvgEnergy | AvgCrowd | AvgTicket |
|---|---|---|---|---|---|
| 1 | Reggae | 1 | 7 | 3500 | 10 |
| Obs | Genre | N | AvgEnergy | AvgCrowd | AvgTicket |
|---|---|---|---|---|---|
| 1 | Reggae | 1 | 7 | 3500 | 10 |
No comments:
Post a Comment