FAMOUS DELHI CELEBRITIES AND THEIR BUSINESS FAMILY DATA ANALYSIS USING ARRAYS | RETAIN | CONDITIONAL LOGIC | PROC SUMMARY | PROC UNIVARIATE | PROC FREQ | PROC SQL | AND %MALL_REPORT MACRO
options nocenter;
1) Create simulated dataset of 12 Delhi celebrities and their businesses
data del_celeb_bus;
infile datalines dsd dlm= ',';
length CelebID 8 CelebName $40 BusinessName $60 BizType $30 FamilyOwner $40 NumBranches 8;
format CelebID 8.;
input CelebID CelebName :$40. BusinessName :$60. BizType :$30. FamilyOwner :$40. NumBranches;
datalines;
1,"Arjun Mehra","Mehra Mall & Co","Luxury Mall","Mehra Family",6
2,"Riya Chopra","Chopra Neighborhood Mall","Neighborhood Mall","Chopra Family",3
3,"Kabir Singh","Singh Outlets","Outlet Mall","Singh Brothers",8
4,"Tara Bhandari","Bhandari Multiplexes","Multiplex","Bhandari Family",4
5,"Nikhil Grover","Grover Grocers","Grocery","Grover Family",12
6,"Ananya Rao","Rao Electronics Bazaar","Electronics Bazaar","Rao Family",7
7,"Vikram Sethi","Sethi Luxury Plaza","Luxury Mall","Sethi Family",5
8,"Meena Khurana","Khurana Neighborhood","Neighborhood Mall","Khurana Family",2
9,"Rohit Ahuja","Ahuja Outlets","Outlet Mall","Ahuja Group",6
10,"Pooja Malhotra","Malhotra Multiplex","Multiplex","Malhotra Family",3
11,"Ishaan Verma","Verma Supergrocers","Grocery","Verma Family",9
12,"Sana Bedi","Bedi Electronics Hub","Electronics Bazaar","Bedi Family",1
;
run;
proc print data = del_celeb_bus;run;
OUTPUT:
| Obs | CelebID | CelebName | BusinessName | BizType | FamilyOwner | NumBranches |
|---|---|---|---|---|---|---|
| 1 | 1 | Arjun Mehra | Mehra Mall & Co | Luxury Mall | Mehra Family | 6 |
| 2 | 2 | Riya Chopra | Chopra Neighborhood Mall | Neighborhood Mall | Chopra Family | 3 |
| 3 | 3 | Kabir Singh | Singh Outlets | Outlet Mall | Singh Brothers | 8 |
| 4 | 4 | Tara Bhandari | Bhandari Multiplexes | Multiplex | Bhandari Family | 4 |
| 5 | 5 | Nikhil Grover | Grover Grocers | Grocery | Grover Family | 12 |
| 6 | 6 | Ananya Rao | Rao Electronics Bazaar | Electronics Bazaar | Rao Family | 7 |
| 7 | 7 | Vikram Sethi | Sethi Luxury Plaza | Luxury Mall | Sethi Family | 5 |
| 8 | 8 | Meena Khurana | Khurana Neighborhood | Neighborhood Mall | Khurana Family | 2 |
| 9 | 9 | Rohit Ahuja | Ahuja Outlets | Outlet Mall | Ahuja Group | 6 |
| 10 | 10 | Pooja Malhotra | Malhotra Multiplex | Multiplex | Malhotra Family | 3 |
| 11 | 11 | Ishaan Verma | Verma Supergrocers | Grocery | Verma Family | 9 |
| 12 | 12 | Sana Bedi | Bedi Electronics Hub | Electronics Bazaar | Bedi Family | 1 |
2) DATA step using ARRAY, RETAIN and conditional logic to create indicators and running totals
data del_celeb_bus_flags;
set del_celeb_bus;
/* Create numeric indicator variables for each BizType using an array */
array mall_flag[6] Mall_Luxury Mall_Neighborhood Mall_Outlet Mall_Multiplex Mall_Grocery Mall_Electronics;
/* Initialize flags to 0 for each observation */
do i = 1 to dim(mall_flag);
mall_flag[i] = 0;
end;
/* Set the appropriate flag based on BizType */
if upcase(strip(BizType)) = "LUXURY MALL" then Mall_Luxury = 1;
else if upcase(strip(BizType)) = "NEIGHBORHOOD MALL" then Mall_Neighborhood = 1;
else if upcase(strip(BizType)) = "OUTLET MALL" then Mall_Outlet = 1;
else if upcase(strip(BizType)) = "MULTIPLEX" then Mall_Multiplex = 1;
else if upcase(strip(BizType)) = "GROCERY" then Mall_Grocery = 1;
else if upcase(strip(BizType)) = "ELECTRONICS BAZAAR" then Mall_Electronics = 1;
/* Use RETAIN to make a running total of branches seen so far (ordered by dataset) */
retain RunningTotalBranches 0;
RunningTotalBranches + NumBranches;
/* Create a size category for the business based on number of branches */
length SizeCategory $12;
if NumBranches <= 2 then SizeCategory = "Micro";
else if 3 <= NumBranches <= 5 then SizeCategory = "Small";
else if 6 <= NumBranches <= 9 then SizeCategory = "Medium";
else SizeCategory = "Large";
drop i;
run;
proc print data=del_celeb_bus_flags;
run;
OUTPUT:
| Obs | CelebID | CelebName | BusinessName | BizType | FamilyOwner | NumBranches | Mall_Luxury | Mall_Neighborhood | Mall_Outlet | Mall_Multiplex | Mall_Grocery | Mall_Electronics | RunningTotalBranches | SizeCategory |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 1 | Arjun Mehra | Mehra Mall & Co | Luxury Mall | Mehra Family | 6 | 1 | 0 | 0 | 0 | 0 | 0 | 6 | Medium |
| 2 | 2 | Riya Chopra | Chopra Neighborhood Mall | Neighborhood Mall | Chopra Family | 3 | 0 | 1 | 0 | 0 | 0 | 0 | 9 | Small |
| 3 | 3 | Kabir Singh | Singh Outlets | Outlet Mall | Singh Brothers | 8 | 0 | 0 | 1 | 0 | 0 | 0 | 17 | Medium |
| 4 | 4 | Tara Bhandari | Bhandari Multiplexes | Multiplex | Bhandari Family | 4 | 0 | 0 | 0 | 1 | 0 | 0 | 21 | Small |
| 5 | 5 | Nikhil Grover | Grover Grocers | Grocery | Grover Family | 12 | 0 | 0 | 0 | 0 | 1 | 0 | 33 | Large |
| 6 | 6 | Ananya Rao | Rao Electronics Bazaar | Electronics Bazaar | Rao Family | 7 | 0 | 0 | 0 | 0 | 0 | 1 | 40 | Medium |
| 7 | 7 | Vikram Sethi | Sethi Luxury Plaza | Luxury Mall | Sethi Family | 5 | 1 | 0 | 0 | 0 | 0 | 0 | 45 | Small |
| 8 | 8 | Meena Khurana | Khurana Neighborhood | Neighborhood Mall | Khurana Family | 2 | 0 | 1 | 0 | 0 | 0 | 0 | 47 | Micro |
| 9 | 9 | Rohit Ahuja | Ahuja Outlets | Outlet Mall | Ahuja Group | 6 | 0 | 0 | 1 | 0 | 0 | 0 | 53 | Medium |
| 10 | 10 | Pooja Malhotra | Malhotra Multiplex | Multiplex | Malhotra Family | 3 | 0 | 0 | 0 | 1 | 0 | 0 | 56 | Small |
| 11 | 11 | Ishaan Verma | Verma Supergrocers | Grocery | Verma Family | 9 | 0 | 0 | 0 | 0 | 1 | 0 | 65 | Medium |
| 12 | 12 | Sana Bedi | Bedi Electronics Hub | Electronics Bazaar | Bedi Family | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 66 | Micro |
3) PROC SUMMARY: summarize number of observations and branch totals by BizType
proc summary data=del_celeb_bus nway;
class BizType;
var NumBranches;
output out=summary_by_type
n(NumBranches)=CountObs
sum(NumBranches)=TotalBranches
mean(NumBranches)=AvgBranches
median(NumBranches)=MedianBranches;
run;
proc print data=del_celeb_bus;
run;
OUTPUT:
| Obs | CelebID | CelebName | BusinessName | BizType | FamilyOwner | NumBranches |
|---|---|---|---|---|---|---|
| 1 | 1 | Arjun Mehra | Mehra Mall & Co | Luxury Mall | Mehra Family | 6 |
| 2 | 2 | Riya Chopra | Chopra Neighborhood Mall | Neighborhood Mall | Chopra Family | 3 |
| 3 | 3 | Kabir Singh | Singh Outlets | Outlet Mall | Singh Brothers | 8 |
| 4 | 4 | Tara Bhandari | Bhandari Multiplexes | Multiplex | Bhandari Family | 4 |
| 5 | 5 | Nikhil Grover | Grover Grocers | Grocery | Grover Family | 12 |
| 6 | 6 | Ananya Rao | Rao Electronics Bazaar | Electronics Bazaar | Rao Family | 7 |
| 7 | 7 | Vikram Sethi | Sethi Luxury Plaza | Luxury Mall | Sethi Family | 5 |
| 8 | 8 | Meena Khurana | Khurana Neighborhood | Neighborhood Mall | Khurana Family | 2 |
| 9 | 9 | Rohit Ahuja | Ahuja Outlets | Outlet Mall | Ahuja Group | 6 |
| 10 | 10 | Pooja Malhotra | Malhotra Multiplex | Multiplex | Malhotra Family | 3 |
| 11 | 11 | Ishaan Verma | Verma Supergrocers | Grocery | Verma Family | 9 |
| 12 | 12 | Sana Bedi | Bedi Electronics Hub | Electronics Bazaar | Bedi Family | 1 |
4) PROC UNIVARIATE on NumBranches to see distribution (skew, moments, basic stats)
proc univariate data=del_celeb_bus cibasic;
var NumBranches;
histogram NumBranches / kernel;
run;
OUTPUT:
The UNIVARIATE Procedure
Variable: NumBranches
| Moments | |||
|---|---|---|---|
| N | 12 | Sum Weights | 12 |
| Mean | 5.5 | Sum Observations | 66 |
| Std Deviation | 3.17661913 | Variance | 10.0909091 |
| Skewness | 0.57174504 | Kurtosis | 0.02535508 |
| Uncorrected SS | 474 | Corrected SS | 111 |
| Coeff Variation | 57.7567114 | Std Error Mean | 0.91701095 |
| Basic Statistical Measures | |||
|---|---|---|---|
| Location | Variability | ||
| Mean | 5.500000 | Std Deviation | 3.17662 |
| Median | 5.500000 | Variance | 10.09091 |
| Mode | 3.000000 | Range | 11.00000 |
| Interquartile Range | 4.50000 | ||
Note: The mode displayed is the smallest of 2 modes with a count of 2.
| Basic Confidence Limits Assuming Normality | |||
|---|---|---|---|
| Parameter | Estimate | 95% Confidence Limits | |
| Mean | 5.50000 | 3.48167 | 7.51833 |
| Std Deviation | 3.17662 | 2.25030 | 5.39351 |
| Variance | 10.09091 | 5.06386 | 29.08997 |
| Tests for Location: Mu0=0 | ||||
|---|---|---|---|---|
| Test | Statistic | p Value | ||
| Student's t | t | 5.997747 | Pr > |t| | <.0001 |
| Sign | M | 6 | Pr >= |M| | 0.0005 |
| Signed Rank | S | 39 | Pr >= |S| | 0.0005 |
| Quantiles (Definition 5) | |
|---|---|
| Level | Quantile |
| 100% Max | 12.0 |
| 99% | 12.0 |
| 95% | 12.0 |
| 90% | 9.0 |
| 75% Q3 | 7.5 |
| 50% Median | 5.5 |
| 25% Q1 | 3.0 |
| 10% | 2.0 |
| 5% | 1.0 |
| 1% | 1.0 |
| 0% Min | 1.0 |
| Extreme Observations | |||
|---|---|---|---|
| Lowest | Highest | ||
| Value | Obs | Value | Obs |
| 1 | 12 | 6 | 9 |
| 2 | 8 | 7 | 6 |
| 3 | 10 | 8 | 3 |
| 3 | 2 | 9 | 11 |
| 4 | 4 | 12 | 5 |
The UNIVARIATE Procedure
proc freq data=del_celeb_bus;
tables BizType FamilyOwner / nocum nopercent;
run;
OUTPUT:
The FREQ Procedure
| BizType | Frequency |
|---|---|
| Electronics Bazaar | 2 |
| Grocery | 2 |
| Luxury Mall | 2 |
| Multiplex | 2 |
| Neighborhood Mall | 2 |
| Outlet Mall | 2 |
| FamilyOwner | Frequency |
|---|---|
| Ahuja Group | 1 |
| Bedi Family | 1 |
| Bhandari Family | 1 |
| Chopra Family | 1 |
| Grover Family | 1 |
| Khurana Family | 1 |
| Malhotra Family | 1 |
| Mehra Family | 1 |
| Rao Family | 1 |
| Sethi Family | 1 |
| Singh Brothers | 1 |
| Verma Family | 1 |
6) PROC SQL with subquery: find celebrities whose NumBranches > average for their BizType
proc sql;
create table above_avg_per_type as
select a.*
from del_celeb_bus as a
where a.NumBranches > (
select mean(b.NumBranches)
from del_celeb_bus as b
where b.BizType = a.BizType
)
order by a.BizType, a.NumBranches desc;
quit;
proc print data=above_avg_per_type;
run;
OUTPUT:
| Obs | CelebID | CelebName | BusinessName | BizType | FamilyOwner | NumBranches |
|---|---|---|---|---|---|---|
| 1 | 6 | Ananya Rao | Rao Electronics Bazaar | Electronics Bazaar | Rao Family | 7 |
| 2 | 5 | Nikhil Grover | Grover Grocers | Grocery | Grover Family | 12 |
| 3 | 1 | Arjun Mehra | Mehra Mall & Co | Luxury Mall | Mehra Family | 6 |
| 4 | 4 | Tara Bhandari | Bhandari Multiplexes | Multiplex | Bhandari Family | 4 |
| 5 | 2 | Riya Chopra | Chopra Neighborhood Mall | Neighborhood Mall | Chopra Family | 3 |
| 6 | 3 | Kabir Singh | Singh Outlets | Outlet Mall | Singh Brothers | 8 |
7) PROC SQL: find maximum branches and the celebrity owning that business per type (subquery + join)
proc sql;
create table max_branches_per_type as
select t.BizType, t.CelebName, t.BusinessName, t.NumBranches
from del_celeb_bus as t
where t.NumBranches = (
select max(s.NumBranches)
from del_celeb_bus as s
where s.BizType = t.BizType
)
order by t.BizType;
quit;
proc print data=max_branches_per_type;
run;
OUTPUT:
| Obs | BizType | CelebName | BusinessName | NumBranches |
|---|---|---|---|---|
| 1 | Electronics Bazaar | Ananya Rao | Rao Electronics Bazaar | 7 |
| 2 | Grocery | Nikhil Grover | Grover Grocers | 12 |
| 3 | Luxury Mall | Arjun Mehra | Mehra Mall & Co | 6 |
| 4 | Multiplex | Tara Bhandari | Bhandari Multiplexes | 4 |
| 5 | Neighborhood Mall | Riya Chopra | Chopra Neighborhood Mall | 3 |
| 6 | Outlet Mall | Kabir Singh | Singh Outlets | 8 |
8) Macro to create PDF report for a given mall type: %mall_report(type=)
%macro mall_report(type=);
/* sanitize type variable for matching */
%let _type = %upcase(%sysfunc(strip(&type.)));
/* create PDF filename dynamically (placed in current working directory) */
ods pdf file="mall_report_&_type..pdf" style=journal;
title "Mall Report for &type.";
footnote "Generated by %sysfunc(date(), date9.)";
proc print data=del_celeb_bus;
where upcase(BizType) = "&_type.";
var CelebID CelebName BusinessName FamilyOwner NumBranches;
title2 "Businesses for type: &type.";
run;
proc sql;
select count(*) as CountObs,
sum(NumBranches) as TotalBranches,
mean(NumBranches) as AvgBranches,
median(NumBranches) as MedianBranches
from del_celeb_bus
where upcase(BizType) = "&_type.";
quit;
proc sql;
select a.CelebName, a.BusinessName, a.NumBranches
from del_celeb_bus as a
where upcase(a.BizType) = "&_type." and a.NumBranches > (
select mean(b.NumBranches)
from del_celeb_bus as b
where upcase(b.BizType) = "&_type."
);
quit;
ods pdf close;
title;
footnote;
%mend mall_report;
Example of running the macro for 'Outlet Mall'
%mall_report(type=Outlet Mall);
| Obs | CelebID | CelebName | BusinessName | FamilyOwner | NumBranches |
|---|---|---|---|---|---|
| 3 | 3 | Kabir Singh | Singh Outlets | Singh Brothers | 8 |
| 9 | 9 | Rohit Ahuja | Ahuja Outlets | Ahuja Group | 6 |
| CountObs | TotalBranches | AvgBranches | MedianBranches |
|---|---|---|---|
| 2 | 14 | 7 | 7 |
| CelebName | BusinessName | NumBranches |
|---|---|---|
| Kabir Singh | Singh Outlets | 8 |
No comments:
Post a Comment