Monday, 1 September 2025

276.REAL WORLD DIFFERENT TYPES OF PEOPLE IN INDIA DATASET CREATION AND ANALYSIS USING PROC FORMAT | PROC CONTENTS | PROC PRINT | PROC SORT | PROC FREQ | PROC MEANS | PROC SUMMARY | PROC UNIVARIATE | PROC TABULATE | PROC REPORT | PROC TRANSPOSE | PROC SQL | PROC RANK | PROC SGPLOT | MACROS IN SAS

REAL WORLD DIFFERENT TYPES OF PEOPLE IN INDIA DATASET CREATION AND ANALYSIS USING PROC FORMAT | PROC CONTENTS | PROC PRINT | PROC SORT | PROC FREQ | PROC MEANS | PROC SUMMARY | PROC UNIVARIATE | PROC TABULATE | PROC REPORT | PROC TRANSPOSE | PROC SQL | PROC RANK | PROC SGPLOT | MACROS IN SAS

 /*CREATING THE REAL-WORLD "TYPES OF PEOPLE IN INDIA" DATASET*/

1) FORMATS & LABELS

Purpose: Define user-friendly categories for readability in outputs. 

proc format;

  value $genderF  'M'='Male' 'F'='Female' 'O'='Other/Non-binary';

  value $marF     'S'='Single' 'M'='Married' 'D'='Divorced' 'W'='Widowed';

  value $ruF      'Urban'='Urban' 'Rural'='Rural' 'SemiUrban'='Semi-Urban';

  value yesnoF    0='No' 1='Yes';

  value agebandF  low-17='0-17' 18-24='18-24' 25-34='25-34' 35-44='35-44'

                  45-54='45-54' 55-64='55-64' 65-high='65+';

  value incomeF   low-100000='<= 1L' 100001-300000='1L-3L' 300001-600000='3L-6L'

                  600001-1200000='6L-12L' 1200001-high='> 12L';

run;

Log:

NOTE: Format $GENDERF has been output.
NOTE: Format $MARF has been output.
NOTE: Format $RUF has been output.

NOTE: Format YESNOF has been output.

NOTE: Format AGEBANDF has been output.

NOTE: Format INCOMEF has been output.


2) CORE DATA CREATION 

Purpose: Create the base PEOPLE_INDIA dataset with realistic attributes. 

data people_india;

  length Person_ID 8 Name $28 Gender $1 Age 8 City $20 State $20

         Education_Level $20 Occupation $24 Sector $16

         Income_INR 8 Household_Size 8 Marital_Status $1

         Language $18 Smartphone_User 8 Internet_Hours_Per_Day 8

         Commute_Mode $14 Commute_Minutes 8 Fitness_Mins_Per_Week 8

         Has_Health_Insurance 8 Voter_ID_Flag 8 Credit_Score 8

         UPI_Transactions_Month 8 Festival_Celebrated $18

         Cuisine_Pref $16 Veg_Flag 8 Digital_Literacy_Score 8

         Rural_Urban $10 Disability_Flag 8 Travel_Trips_Year 8

         Pollution_Concern_Score 8 Blood_Group $3;

  infile datalines dsd truncover;

  input Person_ID Name :$28. Gender :$1. Age City :$20. State :$20.

        Education_Level :$20. Occupation :$24. Sector :$16.

        Income_INR Household_Size Marital_Status :$1.

        Language :$18. Smartphone_User Internet_Hours_Per_Day

        Commute_Mode :$14. Commute_Minutes Fitness_Mins_Per_Week

        Has_Health_Insurance Voter_ID_Flag Credit_Score

        UPI_Transactions_Month Festival_Celebrated :$18.

        Cuisine_Pref :$16. Veg_Flag Digital_Literacy_Score

        Rural_Urban :$10. Disability_Flag Travel_Trips_Year

        Pollution_Concern_Score Blood_Group :$3.;

  format Gender $genderF. Marital_Status $marF. Rural_Urban $ruF.

         Smartphone_User yesnoF. Has_Health_Insurance yesnoF.

         Voter_ID_Flag yesnoF. Veg_Flag yesnoF. Age agebandF.

         Income_INR incomeF.;

  label Education_Level = "Highest Education"

        Income_INR      = "Annual Income (INR)"

        Internet_Hours_Per_Day = "Daily Internet Hours"

        Commute_Mode    = "Primary Commute Mode"

        Fitness_Mins_Per_Week = "Weekly Fitness Minutes"

        Digital_Literacy_Score = "Digital Literacy (0-100)"

        UPI_Transactions_Month = "Monthly UPI Transactions"

        Pollution_Concern_Score = "Pollution Concern (1-10)";

datalines;

1,Arjun Mehta,M,27,Mumbai,Maharashtra,Graduate,Software Engineer,IT,900000,3,S,Hindi,1,4,Metro,60,120,1,1,760,35,Diwali,North Indian,0,85,Urban,0,4,7,O+

2,Priya Iyer,F,31,Chennai,Tamil Nadu,Postgraduate,Data Analyst,IT,1100000,4,M,Tamil,1,3,Bike,40,90,1,1,780,28,Pongal,South Indian,1,88,Urban,0,3,6,B+

3,Sameer Khan,M,24,Hyderabad,Telangana,Graduate,Inside Sales,Private,450000,5,S,Urdu,1,5,Bike,35,60,0,1,720,22,Eid,Hyderabadi,0,72,Urban,0,2,5,A+

4,Neha Sharma,F,39,Delhi,Delhi,Postgraduate,Marketing Manager,Private,1400000,3,M,Hindi,1,2,Car,55,75,1,1,805,18,Holi,North Indian,0,83,Urban,0,5,8,AB+

5,Rohan Das,M,45,Kolkata,West Bengal,Graduate,School Teacher,Public,650000,4,M,Bengali,1,2,Bus,50,80,1,1,768,20,Durga Puja,Bengali,1,78,Urban,0,1,7,O-

6,Ananya Roy,F,22,Kolkata,West Bengal,Undergraduate,Student,NA,0,5,S,Bengali,1,6,Metro,30,100,0,0,0,12,Durga Puja,Continental,0,70,Urban,0,1,6,A-

7,Amit Patil,M,34,Pune,Maharashtra,Diploma,Mechanic,Private,380000,6,M,Marathi,1,2,Bike,45,40,0,1,700,15,Ganesh Chaturthi,Maharashtrian,1,68,SemiUrban,0,1,5,O+

8,Sana Parveen,F,28,Patna,Bihar,Graduate,Nurse,Healthcare,480000,5,S,Hindi,1,3,Auto,35,120,1,1,730,20,Eid,North Indian,0,77,SemiUrban,0,1,6,B+

9,Ritesh Verma,M,52,Lucknow,Uttar Pradesh,Graduate,Shop Owner,Informal,550000,6,M,Hindi,1,1,Car,25,30,0,1,690,40,Diwali,North Indian,0,60,Urban,0,1,6,A+

10,Keerthi R,F,26,Visakhapatnam,Andhra Pradesh,Graduate,Graphic Designer,Media,520000,4,S,Telugu,1,5,Bike,30,90,0,0,740,24,Ugadi,South Indian,0,82,Urban,0,3,7,B-

11,Gurpreet Singh,M,33,Amritsar,Punjab,Graduate,Logistics Supervisor,Logistics,600000,5,M,Punjabi,1,2,Car,40,60,0,1,715,18,Gurpurab,Punjabi,0,66,Urban,0,2,6,O+

12,Sonali Kulkarni,F,41,Nagpur,Maharashtra,Postgraduate,HR Lead,Private,1200000,3,M,Marathi,1,2,Car,35,120,1,1,790,26,Diwali,Maharashtrian,0,84,Urban,0,2,8,AB-

13,Faizan Ali,M,29,Jaipur,Rajasthan,Graduate,Hotel Front Office,Hospitality,360000,4,S,Hindi,1,4,Bus,25,45,0,1,705,20,Diwali,Rajasthani,0,64,Urban,0,1,5,A+

14,Kavya Nair,F,35,Kochi,Kerala,Postgraduate,Physiotherapist,Healthcare,900000,3,M,Malayalam,1,2,Car,30,180,1,1,775,32,Onam,South Indian,1,86,Urban,0,2,6,O+

15,Manoj Kumar,M,47,Gurugram,Haryana,Graduate,Project Manager,IT,1800000,4,M,Hindi,1,2,Car,60,60,1,1,820,38,Diwali,North Indian,0,90,Urban,0,4,9,B+

16,Anusha S,F,23,Mysuru,Karnataka,Undergraduate,Student,NA,0,5,S,Kannada,1,6,Bus,20,110,0,0,0,14,Dasara,South Indian,1,74,Urban,0,1,6,A-

17,Deepak Yadav,M,30,Indore,Madhya Pradesh,Graduate,Field Sales,Private,420000,6,M,Hindi,1,5,Bike,50,50,0,1,710,25,Diwali,North Indian,0,69,Urban,0,1,6,O+

18,Trisha Dey,F,27,Silchar,Assam,Graduate,Content Writer,Media,460000,4,S,Assamese,1,4,Auto,20,70,0,1,735,22,Bihu,North East,0,80,SemiUrban,0,2,6,B+

19,Vikram Rao,M,38,Bengaluru,Karnataka,Postgraduate,Data Scientist,IT,2200000,3,M,Kannada,1,3,Metro,50,150,1,1,835,45,Ugadi,South Indian,0,92,Urban,0,3,9,O+

20,Sapna Jain,F,29,Bhopal,Madhya Pradesh,Graduate,Accountant,Private,500000,5,S,Hindi,1,3,Bus,30,60,0,1,725,20,Diwali,North Indian,1,76,SemiUrban,0,1,7,A+

21,Harish Chandra,M,56,Varanasi,Uttar Pradesh,Secondary,Priest,Religious,240000,5,M,Hindi,0,0,Walk,10,15,0,1,650,8,Diwali,North Indian,1,50,Urban,0,0,6,O+

22,Rekha Gupta,F,43,Kanpur,Uttar Pradesh,Graduate,Bank Officer,Public,980000,4,M,Hindi,1,2,Car,30,90,1,1,790,30,Diwali,North Indian,1,88,Urban,0,2,8,AB+

23,Aakash Jain,M,25,Ahmedabad,Gujarat,Graduate,Entrepreneur,Startup,1200000,4,S,Gujarati,1,5,Car,35,70,0,1,780,60,Navratri,Gujarati,0,85,Urban,0,4,8,B+

24,Meera Pillai,F,48,Thiruvananthapuram,Kerala,Postgraduate,School Principal,Public,1250000,3,M,Malayalam,1,1,Car,25,100,1,1,815,20,Onam,South Indian,1,90,Urban,0,2,8,A+

25,Rajeev Ranjan,M,32,Ranchi,Jharkhand,Graduate,Police Sub-Inspector,Public,700000,4,M,Hindi,1,2,Bike,20,60,1,1,740,25,Chhath,North Indian,0,72,Urban,0,1,7,O+

26,Nisha B,F,27,Coimbatore,Tamil Nadu,Graduate,Quality Analyst,Manufacturing,520000,4,S,Tamil,1,3,Bike,25,80,0,1,735,22,Pongal,South Indian,1,82,Urban,0,2,7,B-

27,Arvind Sinha,M,44,Bhubaneswar,Odisha,Graduate,Government Clerk,Public,580000,5,M,Odia,1,2,Bike,15,45,1,1,720,18,Raja Parba,Odia,1,65,Urban,0,1,7,A+

28,Shruti Joshi,F,36,Surat,Gujarat,Postgraduate,Fashion Buyer,Private,1300000,3,M,Gujarati,1,3,Car,35,120,0,1,800,35,Navratri,Gujarati,0,88,Urban,0,3,8,O-

29,Noor Zoya,F,21,Aligarh,Uttar Pradesh,Undergraduate,Student,NA,0,6,S,Urdu,1,6,Bus,30,80,0,0,0,10,Eid,North Indian,0,72,SemiUrban,0,1,6,B+

30,Anil Kumar,M,28,Chandigarh,Chandigarh,Graduate,Civil Engineer,Construction,850000,3,S,Hindi,1,2,Car,30,100,0,1,770,25,Holi,North Indian,0,81,Urban,0,2,8,A-

31,Devika Rao,F,33,Mangalore,Karnataka,Graduate,Product Manager,IT,1600000,3,M,Kannada,1,3,Car,35,100,1,1,825,40,Ugadi,South Indian,0,89,Urban,0,3,8,AB+

32,Shivam Patel,M,26,Vadodara,Gujarat,Graduate,Mechanical Engineer,Manufacturing,780000,4,S,Gujarati,1,2,Bike,25,70,0,1,760,20,Navratri,Gujarati,1,80,Urban,0,2,7,O+

;

run;

proc print;run;

Output:

ObsPerson_IDNameGenderAgeCityStateEducation_LevelOccupationSectorIncome_INRHousehold_SizeMarital_StatusLanguageSmartphone_UserInternet_Hours_Per_DayCommute_ModeCommute_MinutesFitness_Mins_Per_WeekHas_Health_InsuranceVoter_ID_FlagCredit_ScoreUPI_Transactions_MonthFestival_CelebratedCuisine_PrefVeg_FlagDigital_Literacy_ScoreRural_UrbanDisability_FlagTravel_Trips_YearPollution_Concern_ScoreBlood_Group
11Arjun MehtaMale25-34MumbaiMaharashtraGraduateSoftware EngineerIT6L-12L3SingleHindiYes4Metro60120YesYes76035DiwaliNorth IndianNo85Urban047O+
22Priya IyerFemale25-34ChennaiTamil NaduPostgraduateData AnalystIT6L-12L4MarriedTamilYes3Bike4090YesYes78028PongalSouth IndianYes88Urban036B+
33Sameer KhanMale18-24HyderabadTelanganaGraduateInside SalesPrivate3L-6L5SingleUrduYes5Bike3560NoYes72022EidHyderabadiNo72Urban025A+
44Neha SharmaFemale35-44DelhiDelhiPostgraduateMarketing ManagerPrivate> 12L3MarriedHindiYes2Car5575YesYes80518HoliNorth IndianNo83Urban058AB+
55Rohan DasMale45-54KolkataWest BengalGraduateSchool TeacherPublic6L-12L4MarriedBengaliYes2Bus5080YesYes76820Durga PujaBengaliYes78Urban017O-
66Ananya RoyFemale18-24KolkataWest BengalUndergraduateStudentNA<= 1L5SingleBengaliYes6Metro30100NoNo012Durga PujaContinentalNo70Urban016A-
77Amit PatilMale25-34PuneMaharashtraDiplomaMechanicPrivate3L-6L6MarriedMarathiYes2Bike4540NoYes70015Ganesh ChaturthiMaharashtrianYes68Semi-Urban015O+
88Sana ParveenFemale25-34PatnaBiharGraduateNurseHealthcare3L-6L5SingleHindiYes3Auto35120YesYes73020EidNorth IndianNo77Semi-Urban016B+
99Ritesh VermaMale45-54LucknowUttar PradeshGraduateShop OwnerInformal3L-6L6MarriedHindiYes1Car2530NoYes69040DiwaliNorth IndianNo60Urban016A+
1010Keerthi RFemale25-34VisakhapatnamAndhra PradeshGraduateGraphic DesignerMedia3L-6L4SingleTeluguYes5Bike3090NoNo74024UgadiSouth IndianNo82Urban037B-
1111Gurpreet SinghMale25-34AmritsarPunjabGraduateLogistics SupervisorLogistics3L-6L5MarriedPunjabiYes2Car4060NoYes71518GurpurabPunjabiNo66Urban026O+
1212Sonali KulkarniFemale35-44NagpurMaharashtraPostgraduateHR LeadPrivate6L-12L3MarriedMarathiYes2Car35120YesYes79026DiwaliMaharashtrianNo84Urban028AB-
1313Faizan AliMale25-34JaipurRajasthanGraduateHotel Front OfficeHospitality3L-6L4SingleHindiYes4Bus2545NoYes70520DiwaliRajasthaniNo64Urban015A+
1414Kavya NairFemale35-44KochiKeralaPostgraduatePhysiotherapistHealthcare6L-12L3MarriedMalayalamYes2Car30180YesYes77532OnamSouth IndianYes86Urban026O+
1515Manoj KumarMale45-54GurugramHaryanaGraduateProject ManagerIT> 12L4MarriedHindiYes2Car6060YesYes82038DiwaliNorth IndianNo90Urban049B+
1616Anusha SFemale18-24MysuruKarnatakaUndergraduateStudentNA<= 1L5SingleKannadaYes6Bus20110NoNo014DasaraSouth IndianYes74Urban016A-
1717Deepak YadavMale25-34IndoreMadhya PradeshGraduateField SalesPrivate3L-6L6MarriedHindiYes5Bike5050NoYes71025DiwaliNorth IndianNo69Urban016O+
1818Trisha DeyFemale25-34SilcharAssamGraduateContent WriterMedia3L-6L4SingleAssameseYes4Auto2070NoYes73522BihuNorth EastNo80Semi-Urban026B+
1919Vikram RaoMale35-44BengaluruKarnatakaPostgraduateData ScientistIT> 12L3MarriedKannadaYes3Metro50150YesYes83545UgadiSouth IndianNo92Urban039O+
2020Sapna JainFemale25-34BhopalMadhya PradeshGraduateAccountantPrivate3L-6L5SingleHindiYes3Bus3060NoYes72520DiwaliNorth IndianYes76Semi-Urban017A+
2121Harish ChandraMale55-64VaranasiUttar PradeshSecondaryPriestReligious1L-3L5MarriedHindiNo0Walk1015NoYes6508DiwaliNorth IndianYes50Urban006O+
2222Rekha GuptaFemale35-44KanpurUttar PradeshGraduateBank OfficerPublic6L-12L4MarriedHindiYes2Car3090YesYes79030DiwaliNorth IndianYes88Urban028AB+
2323Aakash JainMale25-34AhmedabadGujaratGraduateEntrepreneurStartup6L-12L4SingleGujaratiYes5Car3570NoYes78060NavratriGujaratiNo85Urban048B+
2424Meera PillaiFemale45-54ThiruvananthapuramKeralaPostgraduateSchool PrincipalPublic> 12L3MarriedMalayalamYes1Car25100YesYes81520OnamSouth IndianYes90Urban028A+
2525Rajeev RanjanMale25-34RanchiJharkhandGraduatePolice Sub-InspectorPublic6L-12L4MarriedHindiYes2Bike2060YesYes74025ChhathNorth IndianNo72Urban017O+
2626Nisha BFemale25-34CoimbatoreTamil NaduGraduateQuality AnalystManufacturing3L-6L4SingleTamilYes3Bike2580NoYes73522PongalSouth IndianYes82Urban027B-
2727Arvind SinhaMale35-44BhubaneswarOdishaGraduateGovernment ClerkPublic3L-6L5MarriedOdiaYes2Bike1545YesYes72018Raja ParbaOdiaYes65Urban017A+
2828Shruti JoshiFemale35-44SuratGujaratPostgraduateFashion BuyerPrivate> 12L3MarriedGujaratiYes3Car35120NoYes80035NavratriGujaratiNo88Urban038O-
2929Noor ZoyaFemale18-24AligarhUttar PradeshUndergraduateStudentNA<= 1L6SingleUrduYes6Bus3080NoNo010EidNorth IndianNo72Semi-Urban016B+
3030Anil KumarMale25-34ChandigarhChandigarhGraduateCivil EngineerConstruction6L-12L3SingleHindiYes2Car30100NoYes77025HoliNorth IndianNo81Urban028A-
3131Devika RaoFemale25-34MangaloreKarnatakaGraduateProduct ManagerIT> 12L3MarriedKannadaYes3Car35100YesYes82540UgadiSouth IndianNo89Urban038AB+
3232Shivam PatelMale25-34VadodaraGujaratGraduateMechanical EngineerManufacturing6L-12L4SingleGujaratiYes2Bike2570NoYes76020NavratriGujaratiYes80Urban027O+


3) DATA QUALITY & CONTENTS / BASIC OVERVIEW 

Purpose: Quick schema and metadata overview for the dataset. 

proc contents data=people_india varnum;

  title "PEOPLE_INDIA — STRUCTURE & ATTRIBUTES";

run;

title;

Output:

PEOPLE_INDIA — STRUCTURE & ATTRIBUTES

The CONTENTS Procedure

Data Set NameWORK.PEOPLE_INDIAObservations32
Member TypeDATAVariables31
EngineV9Indexes0
Created09/01/2025 17:54:07Observation Length352
Last Modified09/01/2025 17:54:07Deleted Observations0
Protection CompressedNO
Data Set Type SortedNO
Label   
Data RepresentationSOLARIS_X86_64, LINUX_X86_64, ALPHA_TRU64, LINUX_IA64  
Encodingutf-8 Unicode (UTF-8)  
Engine/Host Dependent Information
Data Set Page Size131072
Number of Data Set Pages1
First Data Page1
Max Obs per Page372
Obs in First Data Page32
Number of Data Set Repairs0
Filename/saswork/SAS_work638500008593_odaws01-apse1-2.oda.sas.com/SAS_work8DCE00008593_odaws01-apse1-2.oda.sas.com/people_india.sas7bdat
Release Created9.0401M8
Host CreatedLinux
Inode Number67165383
Access Permissionrw-r--r--
Owner Nameu63247146
File Size256KB
File Size (bytes)262144
Variables in Creation Order
#VariableTypeLenFormatLabel
1Person_IDNum8  
2NameChar28  
3GenderChar1$GENDERF. 
4AgeNum8AGEBANDF. 
5CityChar20  
6StateChar20  
7Education_LevelChar20 Highest Education
8OccupationChar24  
9SectorChar16  
10Income_INRNum8INCOMEF.Annual Income (INR)
11Household_SizeNum8  
12Marital_StatusChar1$MARF. 
13LanguageChar18  
14Smartphone_UserNum8YESNOF. 
15Internet_Hours_Per_DayNum8 Daily Internet Hours
16Commute_ModeChar14 Primary Commute Mode
17Commute_MinutesNum8  
18Fitness_Mins_Per_WeekNum8 Weekly Fitness Minutes
19Has_Health_InsuranceNum8YESNOF. 
20Voter_ID_FlagNum8YESNOF. 
21Credit_ScoreNum8  
22UPI_Transactions_MonthNum8 Monthly UPI Transactions
23Festival_CelebratedChar18  
24Cuisine_PrefChar16  
25Veg_FlagNum8YESNOF. 
26Digital_Literacy_ScoreNum8 Digital Literacy (0-100)
27Rural_UrbanChar10$RUF. 
28Disability_FlagNum8  
29Travel_Trips_YearNum8  
30Pollution_Concern_ScoreNum8 Pollution Concern (1-10)
31Blood_GroupChar3  

Purpose: Inspect a small sample to eyeball plausibility of values. 

proc print data=people_india (obs=10) label noobs;

  title "First 10 Rows for Sanity Check";

run;

title;

Output:

First 10 Rows for Sanity Check

Person_IDNameGenderAgeCityStateHighest EducationOccupationSectorAnnual Income (INR)Household_SizeMarital_StatusLanguageSmartphone_UserDaily Internet HoursPrimary Commute ModeCommute_MinutesWeekly Fitness MinutesHas_Health_InsuranceVoter_ID_FlagCredit_ScoreMonthly UPI TransactionsFestival_CelebratedCuisine_PrefVeg_FlagDigital Literacy (0-100)Rural_UrbanDisability_FlagTravel_Trips_YearPollution Concern (1-10)Blood_Group
1Arjun MehtaMale25-34MumbaiMaharashtraGraduateSoftware EngineerIT6L-12L3SingleHindiYes4Metro60120YesYes76035DiwaliNorth IndianNo85Urban047O+
2Priya IyerFemale25-34ChennaiTamil NaduPostgraduateData AnalystIT6L-12L4MarriedTamilYes3Bike4090YesYes78028PongalSouth IndianYes88Urban036B+
3Sameer KhanMale18-24HyderabadTelanganaGraduateInside SalesPrivate3L-6L5SingleUrduYes5Bike3560NoYes72022EidHyderabadiNo72Urban025A+
4Neha SharmaFemale35-44DelhiDelhiPostgraduateMarketing ManagerPrivate> 12L3MarriedHindiYes2Car5575YesYes80518HoliNorth IndianNo83Urban058AB+
5Rohan DasMale45-54KolkataWest BengalGraduateSchool TeacherPublic6L-12L4MarriedBengaliYes2Bus5080YesYes76820Durga PujaBengaliYes78Urban017O-
6Ananya RoyFemale18-24KolkataWest BengalUndergraduateStudentNA<= 1L5SingleBengaliYes6Metro30100NoNo012Durga PujaContinentalNo70Urban016A-
7Amit PatilMale25-34PuneMaharashtraDiplomaMechanicPrivate3L-6L6MarriedMarathiYes2Bike4540NoYes70015Ganesh ChaturthiMaharashtrianYes68Semi-Urban015O+
8Sana ParveenFemale25-34PatnaBiharGraduateNurseHealthcare3L-6L5SingleHindiYes3Auto35120YesYes73020EidNorth IndianNo77Semi-Urban016B+
9Ritesh VermaMale45-54LucknowUttar PradeshGraduateShop OwnerInformal3L-6L6MarriedHindiYes1Car2530NoYes69040DiwaliNorth IndianNo60Urban016A+
10Keerthi RFemale25-34VisakhapatnamAndhra PradeshGraduateGraphic DesignerMedia3L-6L4SingleTeluguYes5Bike3090NoNo74024UgadiSouth IndianNo82Urban037B-

Purpose: Detect duplicates by Person_ID (none expected). 

proc sort data=people_india out=people_india_sorted nodupkey;

  by Person_ID;

run;

proc print data=people_india_sorted (obs=10);run;

Output:

ObsPerson_IDNameGenderAgeCityStateEducation_LevelOccupationSectorIncome_INRHousehold_SizeMarital_StatusLanguageSmartphone_UserInternet_Hours_Per_DayCommute_ModeCommute_MinutesFitness_Mins_Per_WeekHas_Health_InsuranceVoter_ID_FlagCredit_ScoreUPI_Transactions_MonthFestival_CelebratedCuisine_PrefVeg_FlagDigital_Literacy_ScoreRural_UrbanDisability_FlagTravel_Trips_YearPollution_Concern_ScoreBlood_Group
11Arjun MehtaMale25-34MumbaiMaharashtraGraduateSoftware EngineerIT6L-12L3SingleHindiYes4Metro60120YesYes76035DiwaliNorth IndianNo85Urban047O+
22Priya IyerFemale25-34ChennaiTamil NaduPostgraduateData AnalystIT6L-12L4MarriedTamilYes3Bike4090YesYes78028PongalSouth IndianYes88Urban036B+
33Sameer KhanMale18-24HyderabadTelanganaGraduateInside SalesPrivate3L-6L5SingleUrduYes5Bike3560NoYes72022EidHyderabadiNo72Urban025A+
44Neha SharmaFemale35-44DelhiDelhiPostgraduateMarketing ManagerPrivate> 12L3MarriedHindiYes2Car5575YesYes80518HoliNorth IndianNo83Urban058AB+
55Rohan DasMale45-54KolkataWest BengalGraduateSchool TeacherPublic6L-12L4MarriedBengaliYes2Bus5080YesYes76820Durga PujaBengaliYes78Urban017O-
66Ananya RoyFemale18-24KolkataWest BengalUndergraduateStudentNA<= 1L5SingleBengaliYes6Metro30100NoNo012Durga PujaContinentalNo70Urban016A-
77Amit PatilMale25-34PuneMaharashtraDiplomaMechanicPrivate3L-6L6MarriedMarathiYes2Bike4540NoYes70015Ganesh ChaturthiMaharashtrianYes68Semi-Urban015O+
88Sana ParveenFemale25-34PatnaBiharGraduateNurseHealthcare3L-6L5SingleHindiYes3Auto35120YesYes73020EidNorth IndianNo77Semi-Urban016B+
99Ritesh VermaMale45-54LucknowUttar PradeshGraduateShop OwnerInformal3L-6L6MarriedHindiYes1Car2530NoYes69040DiwaliNorth IndianNo60Urban016A+
1010Keerthi RFemale25-34VisakhapatnamAndhra PradeshGraduateGraphic DesignerMedia3L-6L4SingleTeluguYes5Bike3090NoNo74024UgadiSouth IndianNo82Urban037B-


4) FREQUENCIES, DISTRIBUTIONS & SUMMARIES 

Purpose: Frequency tables to see categorical distributions at a glance. 

proc freq data=people_india;

  tables Gender Marital_Status Rural_Urban Veg_Flag Smartphone_User

         Has_Health_Insurance Sector Education_Level Language

         Cuisine_Pref Festival_Celebrated / missing;

  title "Categorical Distribution Snapshots";

run;

title;

Output:

Categorical Distribution Snapshots

The FREQ Procedure

GenderFrequencyPercentCumulative
Frequency
Cumulative
Percent
Female1650.001650.00
Male1650.0032100.00
Marital_StatusFrequencyPercentCumulative
Frequency
Cumulative
Percent
Married1856.251856.25
Single1443.7532100.00
Rural_UrbanFrequencyPercentCumulative
Frequency
Cumulative
Percent
Semi-Urban515.63515.63
Urban2784.3832100.00
Veg_FlagFrequencyPercentCumulative
Frequency
Cumulative
Percent
No2062.502062.50
Yes1237.5032100.00
Smartphone_UserFrequencyPercentCumulative
Frequency
Cumulative
Percent
No13.1313.13
Yes3196.8832100.00
Has_Health_InsuranceFrequencyPercentCumulative
Frequency
Cumulative
Percent
No1856.251856.25
Yes1443.7532100.00
SectorFrequencyPercentCumulative
Frequency
Cumulative
Percent
Construction13.1313.13
Healthcare26.2539.38
Hospitality13.13412.50
IT515.63928.13
Informal13.131031.25
Logistics13.131134.38
Manufacturing26.251340.63
Media26.251546.88
NA39.381856.25
Private721.882578.13
Public515.633093.75
Religious13.133196.88
Startup13.1332100.00
Highest Education
Education_LevelFrequencyPercentCumulative
Frequency
Cumulative
Percent
Diploma13.1313.13
Graduate2062.502165.63
Postgraduate721.882887.50
Secondary13.132990.63
Undergraduate39.3832100.00
LanguageFrequencyPercentCumulative
Frequency
Cumulative
Percent
Assamese13.1313.13
Bengali26.2539.38
Gujarati39.38618.75
Hindi1237.501856.25
Kannada39.382165.63
Malayalam26.252371.88
Marathi26.252578.13
Odia13.132681.25
Punjabi13.132784.38
Tamil26.252990.63
Telugu13.133093.75
Urdu26.2532100.00
Cuisine_PrefFrequencyPercentCumulative
Frequency
Cumulative
Percent
Bengali13.1313.13
Continental13.1326.25
Gujarati39.38515.63
Hyderabadi13.13618.75
Maharashtrian26.25825.00
North East13.13928.13
North Indian1237.502165.63
Odia13.132268.75
Punjabi13.132371.88
Rajasthani13.132475.00
South Indian825.0032100.00
Festival_CelebratedFrequencyPercentCumulative
Frequency
Cumulative
Percent
Bihu13.1313.13
Chhath13.1326.25
Dasara13.1339.38
Diwali928.131237.50
Durga Puja26.251443.75
Eid39.381753.13
Ganesh Chaturthi13.131856.25
Gurpurab13.131959.38
Holi26.252165.63
Navratri39.382475.00
Onam26.252681.25
Pongal26.252887.50
Raja Parba13.132990.63
Ugadi39.3832100.00

Purpose: Descriptive stats for key numeric variables. 

proc means data=people_india mean std min p25 median p75 max maxdec=1;

  var Age Income_INR Household_Size Internet_Hours_Per_Day

      Commute_Minutes Fitness_Mins_Per_Week Credit_Score

      UPI_Transactions_Month Digital_Literacy_Score Travel_Trips_Year

      Pollution_Concern_Score;

  title "Descriptive Statistics — Numerics";

run;

title;

Output:

Descriptive Statistics — Numerics

The MEANS Procedure

VariableLabelMeanStd DevMinimum25th PctlMedian75th PctlMaximum
Age
Income_INR
Household_Size
Internet_Hours_Per_Day
Commute_Minutes
Fitness_Mins_Per_Week
Credit_Score
UPI_Transactions_Month
Digital_Literacy_Score
Travel_Trips_Year
Pollution_Concern_Score
 
Annual Income (INR)
 
Daily Internet Hours
 
Weekly Fitness Minutes
 
Monthly UPI Transactions
Digital Literacy (0-100)
 
Pollution Concern (1-10)
33.7
777187.5
4.2
3.0
33.8
82.5
684.0
25.2
77.7
2.0
6.8
9.2
519920.8
1.0
1.6
12.4
35.0
227.7
11.0
10.1
1.2
1.1
21.0
0.0
3.0
0.0
10.0
15.0
0.0
8.0
50.0
0.0
5.0
27.0
455000.0
3.0
2.0
25.0
60.0
712.5
19.0
71.0
1.0
6.0
31.5
625000.0
4.0
3.0
30.0
80.0
740.0
22.0
80.0
2.0
7.0
40.0
1150000.0
5.0
4.0
40.0
100.0
785.0
31.0
85.5
3.0
8.0
56.0
2200000.0
6.0
6.0
60.0
180.0
835.0
60.0
92.0
5.0
9.0

Purpose: Distributional diagnostics for selected metrics. 

proc univariate data=people_india noprint;

  var Income_INR Credit_Score Digital_Literacy_Score UPI_Transactions_Month;

  histogram;

  inset n mean std min max / pos=ne;

  title "Distribution Diagnostics (Histograms)";

run;

title;

Output:

Distribution Diagnostics (Histograms)

The UNIVARIATE Procedure

Histogram for Income_INR


Distribution Diagnostics (Histograms)

The UNIVARIATE Procedure

Histogram for Credit_Score


Distribution Diagnostics (Histograms)

The UNIVARIATE Procedure

Histogram for Digital_Literacy_Score


Distribution Diagnostics (Histograms)

The UNIVARIATE Procedure

Histogram for UPI_Transactions_Month

5) GROUPED SUMMARIES & TABULATED REPORTING  

Purpose: Summaries by State and Sector for planning headcount/market views. 

proc summary data=people_india nway;

  class State Sector;

  var Income_INR Credit_Score UPI_Transactions_Month;

  output out=state_sector_summary

    n()=N_Records

    mean(Income_INR)=Avg_Income

    mean(Credit_Score)=Avg_Credit

    mean(UPI_Transactions_Month)=Avg_UPI;

  title "Summary by State x Sector";

run;

title;

proc print;run;

Output:

ObsStateSector_TYPE__FREQ_N_RecordsAvg_IncomeAvg_CreditAvg_UPI
1Andhra PradeshMedia3113L-6L740.024.0
2AssamMedia3113L-6L735.022.0
3BiharHealthcare3113L-6L730.020.0
4ChandigarhConstruction3116L-12L770.025.0
5DelhiPrivate311> 12L805.018.0
6GujaratManufacturing3116L-12L760.020.0
7GujaratPrivate311> 12L800.035.0
8GujaratStartup3116L-12L780.060.0
9HaryanaIT311> 12L820.038.0
10JharkhandPublic3116L-12L740.025.0
11KarnatakaIT322> 12L830.042.5
12KarnatakaNA311<= 1L0.014.0
13KeralaHealthcare3116L-12L775.032.0
14KeralaPublic311> 12L815.020.0
15Madhya PradeshPrivate3223L-6L717.522.5
16MaharashtraIT3116L-12L760.035.0
17MaharashtraPrivate3226L-12L745.020.5
18OdishaPublic3113L-6L720.018.0
19PunjabLogistics3113L-6L715.018.0
20RajasthanHospitality3113L-6L705.020.0
21Tamil NaduIT3116L-12L780.028.0
22Tamil NaduManufacturing3113L-6L735.022.0
23TelanganaPrivate3113L-6L720.022.0
24Uttar PradeshInformal3113L-6L690.040.0
25Uttar PradeshNA311<= 1L0.010.0
26Uttar PradeshPublic3116L-12L790.030.0
27Uttar PradeshReligious3111L-3L650.08.0
28West BengalNA311<= 1L0.012.0
29West BengalPublic3116L-12L768.020.0


Purpose: Cross-tabs in a single compact table for leadership review. 

proc tabulate data=people_india format=8.1;

  class State Gender Rural_Urban;

  var Income_INR Digital_Literacy_Score;

  table State,

        Gender * (n colpctn)

        Rural_Urban * (n colpctn)

        (Income_INR Digital_Literacy_Score) * mean;

  title "Multi-Dimension Snapshot (TABULATE)";

run;

title;

Output:

Multi-Dimension Snapshot (TABULATE)

 GenderRural_UrbanAnnual Income (INR)Digital Literacy (0-100)
FemaleMaleSemi-UrbanUrban
NColPctNNColPctNNColPctNNColPctNMeanMean
State16.3....13.7520000.082.0
Andhra Pradesh
Assam16.3..120.0..460000.080.0
Bihar16.3..120.0..480000.077.0
Chandigarh..16.3..13.7850000.081.0
Delhi16.3....13.7140000083.0
Gujarat16.3212.5..311.1109333384.3
Haryana..16.3..13.7180000090.0
Jharkhand..16.3..13.7700000.072.0
Karnataka212.516.3..311.1126666785.0
Kerala212.5....27.4107500088.0
Madhya Pradesh16.316.3120.013.7460000.072.5
Maharashtra16.3212.5120.027.4826666.779.0
Odisha..16.3..13.7580000.065.0
Punjab..16.3..13.7600000.066.0
Rajasthan..16.3..13.7360000.064.0
Tamil Nadu212.5....27.4810000.085.0
Telangana..16.3..13.7450000.072.0
Uttar Pradesh212.5212.5120.0311.1442500.067.5
West Bengal16.316.3..27.4325000.074.0

Purpose: Polished tabular report with custom columns and labels. 

proc report data=people_india nowd;

  columns State City Name Gender Age Education_Level Occupation Sector Income_INR Credit_Score Digital_Literacy_Score;

  define State / group 'State';

  define City  / display 'City';

  define Name  / display 'Name';

  define Gender / display 'Gender';

  define Age / display 'Age';

  define Education_Level / display 'Education';

  define Occupation / display 'Occupation';

  define Sector / display 'Sector';

  define Income_INR / analysis mean format=comma12. 'Avg Income';

  define Credit_Score / analysis mean 'Avg Credit';

  define Digital_Literacy_Score / analysis mean 'Avg Digital Lit';

  title "People of India — City & Career Lens (REPORT)";

run;

title;

Output:

People of India — City & Career Lens (REPORT)

StateCityNameGenderAgeEducationOccupationSectorAvg IncomeAvg CreditAvg Digital Lit
Andhra PradeshVisakhapatnamKeerthi RFemale25-34GraduateGraphic DesignerMedia520,00074082
AssamSilcharTrisha DeyFemale25-34GraduateContent WriterMedia460,00073580
BiharPatnaSana ParveenFemale25-34GraduateNurseHealthcare480,00073077
ChandigarhChandigarhAnil KumarMale25-34GraduateCivil EngineerConstruction850,00077081
DelhiDelhiNeha SharmaFemale35-44PostgraduateMarketing ManagerPrivate1,400,00080583
GujaratAhmedabadAakash JainMale25-34GraduateEntrepreneurStartup1,200,00078085
 SuratShruti JoshiFemale35-44PostgraduateFashion BuyerPrivate1,300,00080088
 VadodaraShivam PatelMale25-34GraduateMechanical EngineerManufacturing780,00076080
HaryanaGurugramManoj KumarMale45-54GraduateProject ManagerIT1,800,00082090
JharkhandRanchiRajeev RanjanMale25-34GraduatePolice Sub-InspectorPublic700,00074072
KarnatakaMysuruAnusha SFemale18-24UndergraduateStudentNA0074
 BengaluruVikram RaoMale35-44PostgraduateData ScientistIT2,200,00083592
 MangaloreDevika RaoFemale25-34GraduateProduct ManagerIT1,600,00082589
KeralaKochiKavya NairFemale35-44PostgraduatePhysiotherapistHealthcare900,00077586
 ThiruvananthapuramMeera PillaiFemale45-54PostgraduateSchool PrincipalPublic1,250,00081590
Madhya PradeshIndoreDeepak YadavMale25-34GraduateField SalesPrivate420,00071069
 BhopalSapna JainFemale25-34GraduateAccountantPrivate500,00072576
MaharashtraMumbaiArjun MehtaMale25-34GraduateSoftware EngineerIT900,00076085
 PuneAmit PatilMale25-34DiplomaMechanicPrivate380,00070068
 NagpurSonali KulkarniFemale35-44PostgraduateHR LeadPrivate1,200,00079084
OdishaBhubaneswarArvind SinhaMale35-44GraduateGovernment ClerkPublic580,00072065
PunjabAmritsarGurpreet SinghMale25-34GraduateLogistics SupervisorLogistics600,00071566
RajasthanJaipurFaizan AliMale25-34GraduateHotel Front OfficeHospitality360,00070564
Tamil NaduChennaiPriya IyerFemale25-34PostgraduateData AnalystIT1,100,00078088
 CoimbatoreNisha BFemale25-34GraduateQuality AnalystManufacturing520,00073582
TelanganaHyderabadSameer KhanMale18-24GraduateInside SalesPrivate450,00072072
Uttar PradeshLucknowRitesh VermaMale45-54GraduateShop OwnerInformal550,00069060
 VaranasiHarish ChandraMale55-64SecondaryPriestReligious240,00065050
 KanpurRekha GuptaFemale35-44GraduateBank OfficerPublic980,00079088
 AligarhNoor ZoyaFemale18-24UndergraduateStudentNA0072
West BengalKolkataRohan DasMale45-54GraduateSchool TeacherPublic650,00076878
 KolkataAnanya RoyFemale18-24UndergraduateStudentNA0070

6) RANKING & BUCKETS 

Purpose: Rank individuals by income to identify top earners and bands. 

proc rank data=people_india out=people_ranked groups=4 ties=low;

  var Income_INR;

  ranks Income_Quartile;

run;proc print;run;

Output:

ObsPerson_IDNameGenderAgeCityStateEducation_LevelOccupationSectorIncome_INRHousehold_SizeMarital_StatusLanguageSmartphone_UserInternet_Hours_Per_DayCommute_ModeCommute_MinutesFitness_Mins_Per_WeekHas_Health_InsuranceVoter_ID_FlagCredit_ScoreUPI_Transactions_MonthFestival_CelebratedCuisine_PrefVeg_FlagDigital_Literacy_ScoreRural_UrbanDisability_FlagTravel_Trips_YearPollution_Concern_ScoreBlood_GroupIncome_Quartile
11Arjun MehtaMale25-34MumbaiMaharashtraGraduateSoftware EngineerIT6L-12L3SingleHindiYes4Metro60120YesYes76035DiwaliNorth IndianNo85Urban047O+2
22Priya IyerFemale25-34ChennaiTamil NaduPostgraduateData AnalystIT6L-12L4MarriedTamilYes3Bike4090YesYes78028PongalSouth IndianYes88Urban036B+2
33Sameer KhanMale18-24HyderabadTelanganaGraduateInside SalesPrivate3L-6L5SingleUrduYes5Bike3560NoYes72022EidHyderabadiNo72Urban025A+0
44Neha SharmaFemale35-44DelhiDelhiPostgraduateMarketing ManagerPrivate> 12L3MarriedHindiYes2Car5575YesYes80518HoliNorth IndianNo83Urban058AB+3
55Rohan DasMale45-54KolkataWest BengalGraduateSchool TeacherPublic6L-12L4MarriedBengaliYes2Bus5080YesYes76820Durga PujaBengaliYes78Urban017O-2
66Ananya RoyFemale18-24KolkataWest BengalUndergraduateStudentNA<= 1L5SingleBengaliYes6Metro30100NoNo012Durga PujaContinentalNo70Urban016A-0
77Amit PatilMale25-34PuneMaharashtraDiplomaMechanicPrivate3L-6L6MarriedMarathiYes2Bike4540NoYes70015Ganesh ChaturthiMaharashtrianYes68Semi-Urban015O+0
88Sana ParveenFemale25-34PatnaBiharGraduateNurseHealthcare3L-6L5SingleHindiYes3Auto35120YesYes73020EidNorth IndianNo77Semi-Urban016B+1
99Ritesh VermaMale45-54LucknowUttar PradeshGraduateShop OwnerInformal3L-6L6MarriedHindiYes1Car2530NoYes69040DiwaliNorth IndianNo60Urban016A+1
1010Keerthi RFemale25-34VisakhapatnamAndhra PradeshGraduateGraphic DesignerMedia3L-6L4SingleTeluguYes5Bike3090NoNo74024UgadiSouth IndianNo82Urban037B-1
1111Gurpreet SinghMale25-34AmritsarPunjabGraduateLogistics SupervisorLogistics3L-6L5MarriedPunjabiYes2Car4060NoYes71518GurpurabPunjabiNo66Urban026O+1
1212Sonali KulkarniFemale35-44NagpurMaharashtraPostgraduateHR LeadPrivate6L-12L3MarriedMarathiYes2Car35120YesYes79026DiwaliMaharashtrianNo84Urban028AB-3
1313Faizan AliMale25-34JaipurRajasthanGraduateHotel Front OfficeHospitality3L-6L4SingleHindiYes4Bus2545NoYes70520DiwaliRajasthaniNo64Urban015A+0
1414Kavya NairFemale35-44KochiKeralaPostgraduatePhysiotherapistHealthcare6L-12L3MarriedMalayalamYes2Car30180YesYes77532OnamSouth IndianYes86Urban026O+2
1515Manoj KumarMale45-54GurugramHaryanaGraduateProject ManagerIT> 12L4MarriedHindiYes2Car6060YesYes82038DiwaliNorth IndianNo90Urban049B+3
1616Anusha SFemale18-24MysuruKarnatakaUndergraduateStudentNA<= 1L5SingleKannadaYes6Bus20110NoNo014DasaraSouth IndianYes74Urban016A-0
1717Deepak YadavMale25-34IndoreMadhya PradeshGraduateField SalesPrivate3L-6L6MarriedHindiYes5Bike5050NoYes71025DiwaliNorth IndianNo69Urban016O+0
1818Trisha DeyFemale25-34SilcharAssamGraduateContent WriterMedia3L-6L4SingleAssameseYes4Auto2070NoYes73522BihuNorth EastNo80Semi-Urban026B+1
1919Vikram RaoMale35-44BengaluruKarnatakaPostgraduateData ScientistIT> 12L3MarriedKannadaYes3Metro50150YesYes83545UgadiSouth IndianNo92Urban039O+3
2020Sapna JainFemale25-34BhopalMadhya PradeshGraduateAccountantPrivate3L-6L5SingleHindiYes3Bus3060NoYes72520DiwaliNorth IndianYes76Semi-Urban017A+1
2121Harish ChandraMale55-64VaranasiUttar PradeshSecondaryPriestReligious1L-3L5MarriedHindiNo0Walk1015NoYes6508DiwaliNorth IndianYes50Urban006O+0
2222Rekha GuptaFemale35-44KanpurUttar PradeshGraduateBank OfficerPublic6L-12L4MarriedHindiYes2Car3090YesYes79030DiwaliNorth IndianYes88Urban028AB+2
2323Aakash JainMale25-34AhmedabadGujaratGraduateEntrepreneurStartup6L-12L4SingleGujaratiYes5Car3570NoYes78060NavratriGujaratiNo85Urban048B+3
2424Meera PillaiFemale45-54ThiruvananthapuramKeralaPostgraduateSchool PrincipalPublic> 12L3MarriedMalayalamYes1Car25100YesYes81520OnamSouth IndianYes90Urban028A+3
2525Rajeev RanjanMale25-34RanchiJharkhandGraduatePolice Sub-InspectorPublic6L-12L4MarriedHindiYes2Bike2060YesYes74025ChhathNorth IndianNo72Urban017O+2
2626Nisha BFemale25-34CoimbatoreTamil NaduGraduateQuality AnalystManufacturing3L-6L4SingleTamilYes3Bike2580NoYes73522PongalSouth IndianYes82Urban027B-1
2727Arvind SinhaMale35-44BhubaneswarOdishaGraduateGovernment ClerkPublic3L-6L5MarriedOdiaYes2Bike1545YesYes72018Raja ParbaOdiaYes65Urban017A+1
2828Shruti JoshiFemale35-44SuratGujaratPostgraduateFashion BuyerPrivate> 12L3MarriedGujaratiYes3Car35120NoYes80035NavratriGujaratiNo88Urban038O-3
2929Noor ZoyaFemale18-24AligarhUttar PradeshUndergraduateStudentNA<= 1L6SingleUrduYes6Bus3080NoNo010EidNorth IndianNo72Semi-Urban016B+0
3030Anil KumarMale25-34ChandigarhChandigarhGraduateCivil EngineerConstruction6L-12L3SingleHindiYes2Car30100NoYes77025HoliNorth IndianNo81Urban028A-2
3131Devika RaoFemale25-34MangaloreKarnatakaGraduateProduct ManagerIT> 12L3MarriedKannadaYes3Car35100YesYes82540UgadiSouth IndianNo89Urban038AB+3
3232Shivam PatelMale25-34VadodaraGujaratGraduateMechanical EngineerManufacturing6L-12L4SingleGujaratiYes2Bike2570NoYes76020NavratriGujaratiYes80Urban027O+2


Purpose: Frequency check of ranks for sanity. 

proc freq data=people_ranked;

  tables Income_Quartile;

  title "Income Quartile Distribution";

run;

title;

Output:

Income Quartile Distribution

The FREQ Procedure

Rank for Variable Income_INR
Income_QuartileFrequencyPercentCumulative
Frequency
Cumulative
Percent
0825.00825.00
1825.001650.00
2825.002475.00
3825.0032100.00

7) SIMPLE DATA DERIVEDS 

Purpose: Add derived KPIs used across reports (e.g., Income per Capita). 

data people_enriched;

  set people_ranked;

  Income_per_Capita = ifn(Household_Size>0, Income_INR/Household_Size, .);

  High_Digital = (Digital_Literacy_Score >= 80);

  Busy_Internet_User = (Internet_Hours_Per_Day >= 4);

  Long_Commute = (Commute_Minutes >= 45);

  Active_Lifestyle = (Fitness_Mins_Per_Week >= 90);

  format High_Digital Busy_Internet_User Long_Commute Active_Lifestyle yesnoF.;

run;

proc print;run;

Output:

ObsPerson_IDNameGenderAgeCityStateEducation_LevelOccupationSectorIncome_INRHousehold_SizeMarital_StatusLanguageSmartphone_UserInternet_Hours_Per_DayCommute_ModeCommute_MinutesFitness_Mins_Per_WeekHas_Health_InsuranceVoter_ID_FlagCredit_ScoreUPI_Transactions_MonthFestival_CelebratedCuisine_PrefVeg_FlagDigital_Literacy_ScoreRural_UrbanDisability_FlagTravel_Trips_YearPollution_Concern_ScoreBlood_GroupIncome_QuartileIncome_per_CapitaHigh_DigitalBusy_Internet_UserLong_CommuteActive_Lifestyle
11Arjun MehtaMale25-34MumbaiMaharashtraGraduateSoftware EngineerIT6L-12L3SingleHindiYes4Metro60120YesYes76035DiwaliNorth IndianNo85Urban047O+2300000.00YesYesYesYes
22Priya IyerFemale25-34ChennaiTamil NaduPostgraduateData AnalystIT6L-12L4MarriedTamilYes3Bike4090YesYes78028PongalSouth IndianYes88Urban036B+2275000.00YesNoNoYes
33Sameer KhanMale18-24HyderabadTelanganaGraduateInside SalesPrivate3L-6L5SingleUrduYes5Bike3560NoYes72022EidHyderabadiNo72Urban025A+090000.00NoYesNoNo
44Neha SharmaFemale35-44DelhiDelhiPostgraduateMarketing ManagerPrivate> 12L3MarriedHindiYes2Car5575YesYes80518HoliNorth IndianNo83Urban058AB+3466666.67YesNoYesNo
55Rohan DasMale45-54KolkataWest BengalGraduateSchool TeacherPublic6L-12L4MarriedBengaliYes2Bus5080YesYes76820Durga PujaBengaliYes78Urban017O-2162500.00NoNoYesNo
66Ananya RoyFemale18-24KolkataWest BengalUndergraduateStudentNA<= 1L5SingleBengaliYes6Metro30100NoNo012Durga PujaContinentalNo70Urban016A-00.00NoYesNoYes
77Amit PatilMale25-34PuneMaharashtraDiplomaMechanicPrivate3L-6L6MarriedMarathiYes2Bike4540NoYes70015Ganesh ChaturthiMaharashtrianYes68Semi-Urban015O+063333.33NoNoYesNo
88Sana ParveenFemale25-34PatnaBiharGraduateNurseHealthcare3L-6L5SingleHindiYes3Auto35120YesYes73020EidNorth IndianNo77Semi-Urban016B+196000.00NoNoNoYes
99Ritesh VermaMale45-54LucknowUttar PradeshGraduateShop OwnerInformal3L-6L6MarriedHindiYes1Car2530NoYes69040DiwaliNorth IndianNo60Urban016A+191666.67NoNoNoNo
1010Keerthi RFemale25-34VisakhapatnamAndhra PradeshGraduateGraphic DesignerMedia3L-6L4SingleTeluguYes5Bike3090NoNo74024UgadiSouth IndianNo82Urban037B-1130000.00YesYesNoYes
1111Gurpreet SinghMale25-34AmritsarPunjabGraduateLogistics SupervisorLogistics3L-6L5MarriedPunjabiYes2Car4060NoYes71518GurpurabPunjabiNo66Urban026O+1120000.00NoNoNoNo
1212Sonali KulkarniFemale35-44NagpurMaharashtraPostgraduateHR LeadPrivate6L-12L3MarriedMarathiYes2Car35120YesYes79026DiwaliMaharashtrianNo84Urban028AB-3400000.00YesNoNoYes
1313Faizan AliMale25-34JaipurRajasthanGraduateHotel Front OfficeHospitality3L-6L4SingleHindiYes4Bus2545NoYes70520DiwaliRajasthaniNo64Urban015A+090000.00NoYesNoNo
1414Kavya NairFemale35-44KochiKeralaPostgraduatePhysiotherapistHealthcare6L-12L3MarriedMalayalamYes2Car30180YesYes77532OnamSouth IndianYes86Urban026O+2300000.00YesNoNoYes
1515Manoj KumarMale45-54GurugramHaryanaGraduateProject ManagerIT> 12L4MarriedHindiYes2Car6060YesYes82038DiwaliNorth IndianNo90Urban049B+3450000.00YesNoYesNo
1616Anusha SFemale18-24MysuruKarnatakaUndergraduateStudentNA<= 1L5SingleKannadaYes6Bus20110NoNo014DasaraSouth IndianYes74Urban016A-00.00NoYesNoYes
1717Deepak YadavMale25-34IndoreMadhya PradeshGraduateField SalesPrivate3L-6L6MarriedHindiYes5Bike5050NoYes71025DiwaliNorth IndianNo69Urban016O+070000.00NoYesYesNo
1818Trisha DeyFemale25-34SilcharAssamGraduateContent WriterMedia3L-6L4SingleAssameseYes4Auto2070NoYes73522BihuNorth EastNo80Semi-Urban026B+1115000.00YesYesNoNo
1919Vikram RaoMale35-44BengaluruKarnatakaPostgraduateData ScientistIT> 12L3MarriedKannadaYes3Metro50150YesYes83545UgadiSouth IndianNo92Urban039O+3733333.33YesNoYesYes
2020Sapna JainFemale25-34BhopalMadhya PradeshGraduateAccountantPrivate3L-6L5SingleHindiYes3Bus3060NoYes72520DiwaliNorth IndianYes76Semi-Urban017A+1100000.00NoNoNoNo
2121Harish ChandraMale55-64VaranasiUttar PradeshSecondaryPriestReligious1L-3L5MarriedHindiNo0Walk1015NoYes6508DiwaliNorth IndianYes50Urban006O+048000.00NoNoNoNo
2222Rekha GuptaFemale35-44KanpurUttar PradeshGraduateBank OfficerPublic6L-12L4MarriedHindiYes2Car3090YesYes79030DiwaliNorth IndianYes88Urban028AB+2245000.00YesNoNoYes
2323Aakash JainMale25-34AhmedabadGujaratGraduateEntrepreneurStartup6L-12L4SingleGujaratiYes5Car3570NoYes78060NavratriGujaratiNo85Urban048B+3300000.00YesYesNoNo
2424Meera PillaiFemale45-54ThiruvananthapuramKeralaPostgraduateSchool PrincipalPublic> 12L3MarriedMalayalamYes1Car25100YesYes81520OnamSouth IndianYes90Urban028A+3416666.67YesNoNoYes
2525Rajeev RanjanMale25-34RanchiJharkhandGraduatePolice Sub-InspectorPublic6L-12L4MarriedHindiYes2Bike2060YesYes74025ChhathNorth IndianNo72Urban017O+2175000.00NoNoNoNo
2626Nisha BFemale25-34CoimbatoreTamil NaduGraduateQuality AnalystManufacturing3L-6L4SingleTamilYes3Bike2580NoYes73522PongalSouth IndianYes82Urban027B-1130000.00YesNoNoNo
2727Arvind SinhaMale35-44BhubaneswarOdishaGraduateGovernment ClerkPublic3L-6L5MarriedOdiaYes2Bike1545YesYes72018Raja ParbaOdiaYes65Urban017A+1116000.00NoNoNoNo
2828Shruti JoshiFemale35-44SuratGujaratPostgraduateFashion BuyerPrivate> 12L3MarriedGujaratiYes3Car35120NoYes80035NavratriGujaratiNo88Urban038O-3433333.33YesNoNoYes
2929Noor ZoyaFemale18-24AligarhUttar PradeshUndergraduateStudentNA<= 1L6SingleUrduYes6Bus3080NoNo010EidNorth IndianNo72Semi-Urban016B+00.00NoYesNoNo
3030Anil KumarMale25-34ChandigarhChandigarhGraduateCivil EngineerConstruction6L-12L3SingleHindiYes2Car30100NoYes77025HoliNorth IndianNo81Urban028A-2283333.33YesNoNoYes
3131Devika RaoFemale25-34MangaloreKarnatakaGraduateProduct ManagerIT> 12L3MarriedKannadaYes3Car35100YesYes82540UgadiSouth IndianNo89Urban038AB+3533333.33YesNoNoYes
3232Shivam PatelMale25-34VadodaraGujaratGraduateMechanical EngineerManufacturing6L-12L4SingleGujaratiYes2Bike2570NoYes76020NavratriGujaratiYes80Urban027O+2195000.00YesNoNoNo


Purpose: Quick stats on derived KPIs. 

proc means data=people_enriched mean maxdec=2;

  var Income_per_Capita;

  class State;

  title "Income per Capita by State — Mean";

run;

title;

Output:

Income per Capita by State — Mean

The MEANS Procedure

Analysis Variable : Income_per_Capita
StateN ObsMean
Andhra Pradesh1130000.00
Assam1115000.00
Bihar196000.00
Chandigarh1283333.33
Delhi1466666.67
Gujarat3309444.44
Haryana1450000.00
Jharkhand1175000.00
Karnataka3422222.22
Kerala2358333.33
Madhya Pradesh285000.00
Maharashtra3254444.44
Odisha1116000.00
Punjab1120000.00
Rajasthan190000.00
Tamil Nadu2202500.00
Telangana190000.00
Uttar Pradesh496166.67
West Bengal281250.00

8) PROC TRANSPOSE

Purpose: Reshape summary from long to wide for dashboard ingestion. 

proc summary data=people_enriched nway;

  class State;

  var Income_INR Credit_Score Digital_Literacy_Score;

  output out=state_means (drop=_type_ _freq_)

    mean(Income_INR)=Avg_Income

    mean(Credit_Score)=Avg_Credit

    mean(Digital_Literacy_Score)=Avg_DigiLit;

run;

proc print;run;

Output:

ObsStateAvg_IncomeAvg_CreditAvg_DigiLit
1Andhra Pradesh3L-6L740.00082.0000
2Assam3L-6L735.00080.0000
3Bihar3L-6L730.00077.0000
4Chandigarh6L-12L770.00081.0000
5Delhi> 12L805.00083.0000
6Gujarat6L-12L780.00084.3333
7Haryana> 12L820.00090.0000
8Jharkhand6L-12L740.00072.0000
9Karnataka> 12L553.33385.0000
10Kerala6L-12L795.00088.0000
11Madhya Pradesh3L-6L717.50072.5000
12Maharashtra6L-12L750.00079.0000
13Odisha3L-6L720.00065.0000
14Punjab3L-6L715.00066.0000
15Rajasthan3L-6L705.00064.0000
16Tamil Nadu6L-12L757.50085.0000
17Telangana3L-6L720.00072.0000
18Uttar Pradesh3L-6L532.50067.5000
19West Bengal3L-6L384.00074.0000

proc transpose data=state_means out=state_means_t prefix=Mean_;

   id State;           /* Each state becomes a column */

   var Avg_Income;    /* Variable you want to transpose */

run;

proc print data=state_means_t; 

run;

Output:

Obs_NAME__LABEL_Mean_Andhra PradeshMean_AssamMean_BiharMean_ChandigarhMean_DelhiMean_GujaratMean_HaryanaMean_JharkhandMean_KarnatakaMean_KeralaMean_Madhya PradeshMean_MaharashtraMean_OdishaMean_PunjabMean_RajasthanMean_Tamil NaduMean_TelanganaMean_Uttar PradeshMean_West Bengal
1Avg_IncomeAnnual Income (INR)3L-6L3L-6L3L-6L6L-12L> 12L6L-12L> 12L6L-12L> 12L6L-12L3L-6L6L-12L3L-6L3L-6L3L-6L6L-12L3L-6L3L-6L3L-6L


9) PROC SQL SHOWCASE

Purpose: Create a city-level aggregate table for BI consumers. 

proc sql;

  create table city_agg as

  select State, City,

         count(*) as N,

         mean(Age) as Avg_Age format=8.1,

         mean(Income_INR) as Avg_Income format=comma12.,

         mean(Credit_Score) as Avg_Credit format=8.1,

         mean(Digital_Literacy_Score) as Avg_DigiLit format=8.1,

         sum(UPI_Transactions_Month) as Total_UPI

  from people_enriched

  group by State, City

  order by State, Avg_Income desc;

quit;

proc print data=city_agg (obs=10);run;

Output:

ObsStateCityNAvg_AgeAvg_IncomeAvg_CreditAvg_DigiLitTotal_UPI
1Andhra PradeshVisakhapatnam126.0520,000740.082.024
2AssamSilchar127.0460,000735.080.022
3BiharPatna128.0480,000730.077.020
4ChandigarhChandigarh128.0850,000770.081.025
5DelhiDelhi139.01,400,000805.083.018
6GujaratSurat136.01,300,000800.088.035
7GujaratAhmedabad125.01,200,000780.085.060
8GujaratVadodara126.0780,000760.080.020
9HaryanaGurugram147.01,800,000820.090.038
10JharkhandRanchi132.0700,000740.072.025


Purpose: Identify top 10 earners with additional flags using subquery. 

proc sql;

  create table top10_earners as

  select *

  from people_enriched

  order by Income_INR desc;

quit;

proc print data=top10_earners (obs=10);run;

Output:

ObsPerson_IDNameGenderAgeCityStateEducation_LevelOccupationSectorIncome_INRHousehold_SizeMarital_StatusLanguageSmartphone_UserInternet_Hours_Per_DayCommute_ModeCommute_MinutesFitness_Mins_Per_WeekHas_Health_InsuranceVoter_ID_FlagCredit_ScoreUPI_Transactions_MonthFestival_CelebratedCuisine_PrefVeg_FlagDigital_Literacy_ScoreRural_UrbanDisability_FlagTravel_Trips_YearPollution_Concern_ScoreBlood_GroupIncome_QuartileIncome_per_CapitaHigh_DigitalBusy_Internet_UserLong_CommuteActive_Lifestyle
119Vikram RaoMale35-44BengaluruKarnatakaPostgraduateData ScientistIT> 12L3MarriedKannadaYes3Metro50150YesYes83545UgadiSouth IndianNo92Urban039O+3733333.33YesNoYesYes
215Manoj KumarMale45-54GurugramHaryanaGraduateProject ManagerIT> 12L4MarriedHindiYes2Car6060YesYes82038DiwaliNorth IndianNo90Urban049B+3450000.00YesNoYesNo
331Devika RaoFemale25-34MangaloreKarnatakaGraduateProduct ManagerIT> 12L3MarriedKannadaYes3Car35100YesYes82540UgadiSouth IndianNo89Urban038AB+3533333.33YesNoNoYes
44Neha SharmaFemale35-44DelhiDelhiPostgraduateMarketing ManagerPrivate> 12L3MarriedHindiYes2Car5575YesYes80518HoliNorth IndianNo83Urban058AB+3466666.67YesNoYesNo
528Shruti JoshiFemale35-44SuratGujaratPostgraduateFashion BuyerPrivate> 12L3MarriedGujaratiYes3Car35120NoYes80035NavratriGujaratiNo88Urban038O-3433333.33YesNoNoYes
624Meera PillaiFemale45-54ThiruvananthapuramKeralaPostgraduateSchool PrincipalPublic> 12L3MarriedMalayalamYes1Car25100YesYes81520OnamSouth IndianYes90Urban028A+3416666.67YesNoNoYes
712Sonali KulkarniFemale35-44NagpurMaharashtraPostgraduateHR LeadPrivate6L-12L3MarriedMarathiYes2Car35120YesYes79026DiwaliMaharashtrianNo84Urban028AB-3400000.00YesNoNoYes
823Aakash JainMale25-34AhmedabadGujaratGraduateEntrepreneurStartup6L-12L4SingleGujaratiYes5Car3570NoYes78060NavratriGujaratiNo85Urban048B+3300000.00YesYesNoNo
92Priya IyerFemale25-34ChennaiTamil NaduPostgraduateData AnalystIT6L-12L4MarriedTamilYes3Bike4090YesYes78028PongalSouth IndianYes88Urban036B+2275000.00YesNoNoYes
1022Rekha GuptaFemale35-44KanpurUttar PradeshGraduateBank OfficerPublic6L-12L4MarriedHindiYes2Car3090YesYes79030DiwaliNorth IndianYes88Urban028AB+2245000.00YesNoNoYes

data top10_earners;

  set top10_earners(obs=10);

run;

proc print;run;

Output:

ObsPerson_IDNameGenderAgeCityStateEducation_LevelOccupationSectorIncome_INRHousehold_SizeMarital_StatusLanguageSmartphone_UserInternet_Hours_Per_DayCommute_ModeCommute_MinutesFitness_Mins_Per_WeekHas_Health_InsuranceVoter_ID_FlagCredit_ScoreUPI_Transactions_MonthFestival_CelebratedCuisine_PrefVeg_FlagDigital_Literacy_ScoreRural_UrbanDisability_FlagTravel_Trips_YearPollution_Concern_ScoreBlood_GroupIncome_QuartileIncome_per_CapitaHigh_DigitalBusy_Internet_UserLong_CommuteActive_Lifestyle
119Vikram RaoMale35-44BengaluruKarnatakaPostgraduateData ScientistIT> 12L3MarriedKannadaYes3Metro50150YesYes83545UgadiSouth IndianNo92Urban039O+3733333.33YesNoYesYes
215Manoj KumarMale45-54GurugramHaryanaGraduateProject ManagerIT> 12L4MarriedHindiYes2Car6060YesYes82038DiwaliNorth IndianNo90Urban049B+3450000.00YesNoYesNo
331Devika RaoFemale25-34MangaloreKarnatakaGraduateProduct ManagerIT> 12L3MarriedKannadaYes3Car35100YesYes82540UgadiSouth IndianNo89Urban038AB+3533333.33YesNoNoYes
44Neha SharmaFemale35-44DelhiDelhiPostgraduateMarketing ManagerPrivate> 12L3MarriedHindiYes2Car5575YesYes80518HoliNorth IndianNo83Urban058AB+3466666.67YesNoYesNo
528Shruti JoshiFemale35-44SuratGujaratPostgraduateFashion BuyerPrivate> 12L3MarriedGujaratiYes3Car35120NoYes80035NavratriGujaratiNo88Urban038O-3433333.33YesNoNoYes
624Meera PillaiFemale45-54ThiruvananthapuramKeralaPostgraduateSchool PrincipalPublic> 12L3MarriedMalayalamYes1Car25100YesYes81520OnamSouth IndianYes90Urban028A+3416666.67YesNoNoYes
712Sonali KulkarniFemale35-44NagpurMaharashtraPostgraduateHR LeadPrivate6L-12L3MarriedMarathiYes2Car35120YesYes79026DiwaliMaharashtrianNo84Urban028AB-3400000.00YesNoNoYes
823Aakash JainMale25-34AhmedabadGujaratGraduateEntrepreneurStartup6L-12L4SingleGujaratiYes5Car3570NoYes78060NavratriGujaratiNo85Urban048B+3300000.00YesYesNoNo
92Priya IyerFemale25-34ChennaiTamil NaduPostgraduateData AnalystIT6L-12L4MarriedTamilYes3Bike4090YesYes78028PongalSouth IndianYes88Urban036B+2275000.00YesNoNoYes
1022Rekha GuptaFemale35-44KanpurUttar PradeshGraduateBank OfficerPublic6L-12L4MarriedHindiYes2Car3090YesYes79030DiwaliNorth IndianYes88Urban028AB+2245000.00YesNoNoYes

proc sql;

  create table top10_earners_flags as

  select t.*, 

         case when t.High_Digital=1 and t.Active_Lifestyle=1 then 'Digital+Active'

              when t.High_Digital=1 then 'Digital'

              when t.Active_Lifestyle=1 then 'Active'

              else 'Other' end as Persona length=16

  from top10_earners t;

quit;

proc print data=top10_earners_flags (obs=10);run;

Output:

ObsPerson_IDNameGenderAgeCityStateEducation_LevelOccupationSectorIncome_INRHousehold_SizeMarital_StatusLanguageSmartphone_UserInternet_Hours_Per_DayCommute_ModeCommute_MinutesFitness_Mins_Per_WeekHas_Health_InsuranceVoter_ID_FlagCredit_ScoreUPI_Transactions_MonthFestival_CelebratedCuisine_PrefVeg_FlagDigital_Literacy_ScoreRural_UrbanDisability_FlagTravel_Trips_YearPollution_Concern_ScoreBlood_GroupIncome_QuartileIncome_per_CapitaHigh_DigitalBusy_Internet_UserLong_CommuteActive_LifestylePersona
119Vikram RaoMale35-44BengaluruKarnatakaPostgraduateData ScientistIT> 12L3MarriedKannadaYes3Metro50150YesYes83545UgadiSouth IndianNo92Urban039O+3733333.33YesNoYesYesDigital+Active
215Manoj KumarMale45-54GurugramHaryanaGraduateProject ManagerIT> 12L4MarriedHindiYes2Car6060YesYes82038DiwaliNorth IndianNo90Urban049B+3450000.00YesNoYesNoDigital
331Devika RaoFemale25-34MangaloreKarnatakaGraduateProduct ManagerIT> 12L3MarriedKannadaYes3Car35100YesYes82540UgadiSouth IndianNo89Urban038AB+3533333.33YesNoNoYesDigital+Active
44Neha SharmaFemale35-44DelhiDelhiPostgraduateMarketing ManagerPrivate> 12L3MarriedHindiYes2Car5575YesYes80518HoliNorth IndianNo83Urban058AB+3466666.67YesNoYesNoDigital
528Shruti JoshiFemale35-44SuratGujaratPostgraduateFashion BuyerPrivate> 12L3MarriedGujaratiYes3Car35120NoYes80035NavratriGujaratiNo88Urban038O-3433333.33YesNoNoYesDigital+Active
624Meera PillaiFemale45-54ThiruvananthapuramKeralaPostgraduateSchool PrincipalPublic> 12L3MarriedMalayalamYes1Car25100YesYes81520OnamSouth IndianYes90Urban028A+3416666.67YesNoNoYesDigital+Active
712Sonali KulkarniFemale35-44NagpurMaharashtraPostgraduateHR LeadPrivate6L-12L3MarriedMarathiYes2Car35120YesYes79026DiwaliMaharashtrianNo84Urban028AB-3400000.00YesNoNoYesDigital+Active
823Aakash JainMale25-34AhmedabadGujaratGraduateEntrepreneurStartup6L-12L4SingleGujaratiYes5Car3570NoYes78060NavratriGujaratiNo85Urban048B+3300000.00YesYesNoNoDigital
92Priya IyerFemale25-34ChennaiTamil NaduPostgraduateData AnalystIT6L-12L4MarriedTamilYes3Bike4090YesYes78028PongalSouth IndianYes88Urban036B+2275000.00YesNoNoYesDigital+Active
1022Rekha GuptaFemale35-44KanpurUttar PradeshGraduateBank OfficerPublic6L-12L4MarriedHindiYes2Car3090YesYes79030DiwaliNorth IndianYes88Urban028AB+2245000.00YesNoNoYesDigital+Active


Purpose: Join aggregates to individuals to provide “context columns”. 

proc sql;

  create table people_with_city_context as

  select p.*, c.N as City_Count, c.Avg_Income as City_Avg_Income,

         c.Avg_DigiLit as City_Avg_DigiLit

  from people_enriched as p

  left join city_agg as c

    on p.State=c.State and p.City=c.City;

quit;

proc print data=people_with_city_context (obs=10);run;

Output:

ObsPerson_IDNameGenderAgeCityStateEducation_LevelOccupationSectorIncome_INRHousehold_SizeMarital_StatusLanguageSmartphone_UserInternet_Hours_Per_DayCommute_ModeCommute_MinutesFitness_Mins_Per_WeekHas_Health_InsuranceVoter_ID_FlagCredit_ScoreUPI_Transactions_MonthFestival_CelebratedCuisine_PrefVeg_FlagDigital_Literacy_ScoreRural_UrbanDisability_FlagTravel_Trips_YearPollution_Concern_ScoreBlood_GroupIncome_QuartileIncome_per_CapitaHigh_DigitalBusy_Internet_UserLong_CommuteActive_LifestyleCity_CountCity_Avg_IncomeCity_Avg_DigiLit
110Keerthi RFemale25-34VisakhapatnamAndhra PradeshGraduateGraphic DesignerMedia3L-6L4SingleTeluguYes5Bike3090NoNo74024UgadiSouth IndianNo82Urban037B-1130000.00YesYesNoYes1520,00082.0
218Trisha DeyFemale25-34SilcharAssamGraduateContent WriterMedia3L-6L4SingleAssameseYes4Auto2070NoYes73522BihuNorth EastNo80Semi-Urban026B+1115000.00YesYesNoNo1460,00080.0
38Sana ParveenFemale25-34PatnaBiharGraduateNurseHealthcare3L-6L5SingleHindiYes3Auto35120YesYes73020EidNorth IndianNo77Semi-Urban016B+196000.00NoNoNoYes1480,00077.0
430Anil KumarMale25-34ChandigarhChandigarhGraduateCivil EngineerConstruction6L-12L3SingleHindiYes2Car30100NoYes77025HoliNorth IndianNo81Urban028A-2283333.33YesNoNoYes1850,00081.0
54Neha SharmaFemale35-44DelhiDelhiPostgraduateMarketing ManagerPrivate> 12L3MarriedHindiYes2Car5575YesYes80518HoliNorth IndianNo83Urban058AB+3466666.67YesNoYesNo11,400,00083.0
623Aakash JainMale25-34AhmedabadGujaratGraduateEntrepreneurStartup6L-12L4SingleGujaratiYes5Car3570NoYes78060NavratriGujaratiNo85Urban048B+3300000.00YesYesNoNo11,200,00085.0
728Shruti JoshiFemale35-44SuratGujaratPostgraduateFashion BuyerPrivate> 12L3MarriedGujaratiYes3Car35120NoYes80035NavratriGujaratiNo88Urban038O-3433333.33YesNoNoYes11,300,00088.0
832Shivam PatelMale25-34VadodaraGujaratGraduateMechanical EngineerManufacturing6L-12L4SingleGujaratiYes2Bike2570NoYes76020NavratriGujaratiYes80Urban027O+2195000.00YesNoNoNo1780,00080.0
915Manoj KumarMale45-54GurugramHaryanaGraduateProject ManagerIT> 12L4MarriedHindiYes2Car6060YesYes82038DiwaliNorth IndianNo90Urban049B+3450000.00YesNoYesNo11,800,00090.0
1025Rajeev RanjanMale25-34RanchiJharkhandGraduatePolice Sub-InspectorPublic6L-12L4MarriedHindiYes2Bike2060YesYes74025ChhathNorth IndianNo72Urban017O+2175000.00NoNoNoNo1700,00072.0


10) GRAPHING (PROC SGPLOT)  

Purpose: Visualize relationships for storytelling (income vs digital). 

ods graphics on;

proc sgplot data=people_enriched;

  reg x=Digital_Literacy_Score y=Income_INR;

  scatter x=Digital_Literacy_Score y=Income_INR / group=Sector;

  yaxis label="Annual Income (INR)";

  xaxis label="Digital Literacy Score";

  title "Income vs Digital Literacy by Sector";

run;

ods graphics off;

Output:



Purpose: Show distribution of commute times segmented by Rural/Urban. 

ods graphics on;

proc sgplot data=people_enriched;

  vbox Commute_Minutes / category=Rural_Urban;

  title "Commute Minutes by Rural/Urban Category";

run;

ods graphics off;

Output:



11) REUSABLE MACROS

Purpose: %make_formats — Reapply formats/labels after transformations. 

%macro make_formats;

  proc datasets lib=work nolist;

    modify people_enriched;

      format Gender $genderF. Marital_Status $marF. Rural_Urban $ruF.

             Smartphone_User yesnoF. Has_Health_Insurance yesnoF.

             Voter_ID_Flag yesnoF. Veg_Flag yesnoF.;

  quit;

%mend;


Purpose: %summary_by(state or sector) — Instant grouped KPI table. 

%macro summary_by(classvar);

  proc summary data=people_enriched nway;

    class &classvar.;

    var Income_INR Credit_Score Digital_Literacy_Score UPI_Transactions_Month;

    output out=summary_&classvar.

      n()=N

      mean(Income_INR)=Avg_Income

      mean(Credit_Score)=Avg_Credit

      mean(Digital_Literacy_Score)=Avg_DigiLit

      mean(UPI_Transactions_Month)=Avg_UPI;

    title "Auto Summary by &classvar.";

  run;

  title;

%mend;


Purpose: %add_person — Append one more record (for demonstrations). 

%macro add_person(

  Person_ID, Name, Gender, Age, City, State, Education_Level, Occupation, Sector,

  Income_INR, Household_Size, Marital_Status, Language, Smartphone_User,

  Internet_Hours_Per_Day, Commute_Mode, Commute_Minutes, Fitness_Mins_Per_Week,

  Has_Health_Insurance, Voter_ID_Flag, Credit_Score, UPI_Transactions_Month,

  Festival_Celebrated, Cuisine_Pref, Veg_Flag, Digital_Literacy_Score, Rural_Urban,

  Disability_Flag, Travel_Trips_Year, Pollution_Concern_Score, Blood_Group);

data one_more;

    length Name $28 City $20 State $20 Education_Level $20 Occupation $24 Sector $16

           Marital_Status $1 Language $18 Commute_Mode $14 Festival_Celebrated $18

           Cuisine_Pref $16 Rural_Urban $10 Blood_Group $3 Gender $1;

    Person_ID=&Person_ID.;

    Name="&Name."; Gender="&Gender."; Age=&Age.;

    City="&City."; State="&State."; Education_Level="&Education_Level.";

    Occupation="&Occupation."; Sector="&Sector."; Income_INR=&Income_INR.;

    Household_Size=&Household_Size.; Marital_Status="&Marital_Status.";

    Language="&Language."; Smartphone_User=&Smartphone_User.;

    Internet_Hours_Per_Day=&Internet_Hours_Per_Day.;

    Commute_Mode="&Commute_Mode."; Commute_Minutes=&Commute_Minutes.;

    Fitness_Mins_Per_Week=&Fitness_Mins_Per_Week.;

    Has_Health_Insurance=&Has_Health_Insurance.; Voter_ID_Flag=&Voter_ID_Flag.;

    Credit_Score=&Credit_Score.; UPI_Transactions_Month=&UPI_Transactions_Month.;

    Festival_Celebrated="&Festival_Celebrated.";

    Cuisine_Pref="&Cuisine_Pref."; Veg_Flag=&Veg_Flag.;

    Digital_Literacy_Score=&Digital_Literacy_Score.; Rural_Urban="&Rural_Urban.";

    Disability_Flag=&Disability_Flag.; Travel_Trips_Year=&Travel_Trips_Year.;

    Pollution_Concern_Score=&Pollution_Concern_Score.; Blood_Group="&Blood_Group.";

    format Gender $genderF. Marital_Status $marF. Rural_Urban $ruF.

           Smartphone_User yesnoF. Has_Health_Insurance yesnoF.

           Voter_ID_Flag yesnoF. Veg_Flag yesnoF. Age agebandF.

           Income_INR incomeF.;

  run;

 proc print;run;

  proc append base=people_enriched data=one_more force; run;

%mend;


Purpose: %qc_minmax — Quick QC: check value ranges and flag outliers. 

%macro qc_minmax(ds,var,low,high);

  data qc_&var.;

    set &ds.;

    length QC_Flag $40;

    if &var. < &low. then QC_Flag="Below Min";

    else if &var. > &high. then QC_Flag="Above Max";

    else QC_Flag="OK";

  run;


  proc freq data=qc_&var.;

    tables QC_Flag / missing;

    title "QC for &var.: Range [&low., &high.]";

  run;

  title;

%mend;


%summary_by(State);

Output:

Auto Summary by State

ObsState_TYPE__FREQ_NAvg_IncomeAvg_CreditAvg_DigiLitAvg_UPI
1Andhra Pradesh1113L-6L740.00082.000024.0000
2Assam1113L-6L735.00080.000022.0000
3Bihar1113L-6L730.00077.000020.0000
4Chandigarh1116L-12L770.00081.000025.0000
5Delhi111> 12L805.00083.000018.0000
6Gujarat1336L-12L780.00084.333338.3333
7Haryana111> 12L820.00090.000038.0000
8Jharkhand1116L-12L740.00072.000025.0000
9Karnataka133> 12L553.33385.000033.0000
10Kerala1226L-12L795.00088.000026.0000
11Madhya Pradesh1223L-6L717.50072.500022.5000
12Maharashtra1336L-12L750.00079.000025.3333
13Odisha1113L-6L720.00065.000018.0000
14Punjab1113L-6L715.00066.000018.0000
15Rajasthan1113L-6L705.00064.000020.0000
16Tamil Nadu1226L-12L757.50085.000025.0000
17Telangana1113L-6L720.00072.000022.0000
18Uttar Pradesh1443L-6L532.50067.500022.0000
19West Bengal1223L-6L384.00074.000016.0000

%summary_by(Sector);

Output:

Auto Summary by State

ObsState_TYPE__FREQ_NAvg_IncomeAvg_CreditAvg_DigiLitAvg_UPI
1Andhra Pradesh1113L-6L740.00082.000024.0000
2Assam1113L-6L735.00080.000022.0000
3Bihar1113L-6L730.00077.000020.0000
4Chandigarh1116L-12L770.00081.000025.0000
5Delhi111> 12L805.00083.000018.0000
6Gujarat1336L-12L780.00084.333338.3333
7Haryana111> 12L820.00090.000038.0000
8Jharkhand1116L-12L740.00072.000025.0000
9Karnataka133> 12L553.33385.000033.0000
10Kerala1226L-12L795.00088.000026.0000
11Madhya Pradesh1223L-6L717.50072.500022.5000
12Maharashtra1336L-12L750.00079.000025.3333
13Odisha1113L-6L720.00065.000018.0000
14Punjab1113L-6L715.00066.000018.0000
15Rajasthan1113L-6L705.00064.000020.0000
16Tamil Nadu1226L-12L757.50085.000025.0000
17Telangana1113L-6L720.00072.000022.0000
18Uttar Pradesh1443L-6L532.50067.500022.0000
19West Bengal1223L-6L384.00074.000016.0000

%make_formats;

Log:

NOTE: MODIFY was successful for WORK.PEOPLE_ENRICHED.DATA.
NOTE: PROCEDURE DATASETS used (Total process time):
real time 0.00 seconds
user cpu time 0.00 seconds
system cpu time 0.00 seconds
memory 562.15k
OS Memory 23972.00k
Timestamp 09/01/2025 12:54:05 PM
Step Count 299 Switch Count 0
Page Faults 0
Page Reclaims 85
Page Swaps 0
Voluntary Context Switches 0
Involuntary Context Switches 1
Block Input Operations 0
Block Output Operations 272

%add_person(99,Rehan Qureshi,M,34,Noida,Uttar Pradesh,Graduate,UX Designer,IT,

  1050000,3,S,Hindi,1,4,Metro,50,80,1,1,790,30,Diwali,North Indian,0,88,Urban,0,2,8,B+);

Output:

ObsNameCityStateEducation_LevelOccupationSectorMarital_StatusLanguageCommute_ModeFestival_CelebratedCuisine_PrefRural_UrbanBlood_GroupGenderPerson_IDAgeIncome_INRHousehold_SizeSmartphone_UserInternet_Hours_Per_DayCommute_MinutesFitness_Mins_Per_WeekHas_Health_InsuranceVoter_ID_FlagCredit_ScoreUPI_Transactions_MonthVeg_FlagDigital_Literacy_ScoreDisability_FlagTravel_Trips_YearPollution_Concern_Score
1Rehan QureshiNoidaUttar PradeshGraduateUX DesignerITSingleHindiMetroDiwaliNorth IndianUrbanB+Male9925-346L-12L3Yes45080YesYes79030No88028

12) TARGETED QC CHECKS 

Purpose: Use macro QC checker for critical numeric fields. 

%qc_minmax(people_enriched, Credit_Score, 300, 900);

Output:

QC for Credit_Score: Range [300, 900]

The FREQ Procedure

QC_FlagFrequencyPercentCumulative
Frequency
Cumulative
Percent
Below Min38.8238.82
OK3191.1834100.00

%qc_minmax(people_enriched, Digital_Literacy_Score, 0, 100);

Output:

QC for Digital_Literacy_Score: Range [0, 100]

The FREQ Procedure

QC_FlagFrequencyPercentCumulative
Frequency
Cumulative
Percent
OK34100.0034100.00

%qc_minmax(people_enriched, Income_INR, 0, 10000000);

Output:

QC for Income_INR: Range [0, 10000000]

The FREQ Procedure

QC_FlagFrequencyPercentCumulative
Frequency
Cumulative
Percent
OK34100.0034100.00

13) AD-HOC BUSINESS QUERIES 

Purpose: Identify "digital champions" cities for a pilot (SQL filter). 

proc sql;

  create table digital_champion_cities as

  select State, City,

         mean(Digital_Literacy_Score) as Avg_DigiLit format=8.1,

         mean(Income_INR) as Avg_Income format=comma12.,

         sum(UPI_Transactions_Month) as Total_UPI

  from people_enriched

  group by State, City

  having Avg_DigiLit >= 80 and Total_UPI >= 200

  order by Avg_DigiLit desc, Total_UPI desc;

quit;

proc print;run;

Log

NOTE: No observations in data set WORK.DIGITAL_CHAMPION_CITIES.

Purpose: Extract target segment: high-income, high-digital, insured. 

data target_segment;

  set people_enriched;

  if Income_INR >= 1200000 and High_Digital=1 and Has_Health_Insurance=1;

run;

proc print;run;

Output:

ObsPerson_IDNameGenderAgeCityStateEducation_LevelOccupationSectorIncome_INRHousehold_SizeMarital_StatusLanguageSmartphone_UserInternet_Hours_Per_DayCommute_ModeCommute_MinutesFitness_Mins_Per_WeekHas_Health_InsuranceVoter_ID_FlagCredit_ScoreUPI_Transactions_MonthFestival_CelebratedCuisine_PrefVeg_FlagDigital_Literacy_ScoreRural_UrbanDisability_FlagTravel_Trips_YearPollution_Concern_ScoreBlood_GroupIncome_QuartileIncome_per_CapitaHigh_DigitalBusy_Internet_UserLong_CommuteActive_Lifestyle
14Neha SharmaFemale35-44DelhiDelhiPostgraduateMarketing ManagerPrivate> 12L3MarriedHindiYes2Car5575YesYes80518HoliNorth IndianNo83Urban058AB+3466666.67YesNoYesNo
212Sonali KulkarniFemale35-44NagpurMaharashtraPostgraduateHR LeadPrivate6L-12L3MarriedMarathiYes2Car35120YesYes79026DiwaliMaharashtrianNo84Urban028AB-3400000.00YesNoNoYes
315Manoj KumarMale45-54GurugramHaryanaGraduateProject ManagerIT> 12L4MarriedHindiYes2Car6060YesYes82038DiwaliNorth IndianNo90Urban049B+3450000.00YesNoYesNo
419Vikram RaoMale35-44BengaluruKarnatakaPostgraduateData ScientistIT> 12L3MarriedKannadaYes3Metro50150YesYes83545UgadiSouth IndianNo92Urban039O+3733333.33YesNoYesYes
524Meera PillaiFemale45-54ThiruvananthapuramKeralaPostgraduateSchool PrincipalPublic> 12L3MarriedMalayalamYes1Car25100YesYes81520OnamSouth IndianYes90Urban028A+3416666.67YesNoNoYes
631Devika RaoFemale25-34MangaloreKarnatakaGraduateProduct ManagerIT> 12L3MarriedKannadaYes3Car35100YesYes82540UgadiSouth IndianNo89Urban038AB+3533333.33YesNoNoYes


Purpose: Side-by-side compare of segment vs overall means. 

proc means data=people_enriched noprint;

  var Income_INR Credit_Score Digital_Literacy_Score UPI_Transactions_Month;

  output out=overall_means mean=;

run;

proc print;run;

Output:

Obs_TYPE__FREQ_Income_INRCredit_ScoreDigital_Literacy_ScoreUPI_Transactions_Month
10346L-12L690.23578.294125.5

proc means data=target_segment noprint;

  var Income_INR Credit_Score Digital_Literacy_Score UPI_Transactions_Month;

  output out=segment_means mean=;

run;

proc print;run;

Output:

Obs_TYPE__FREQ_Income_INRCredit_ScoreDigital_Literacy_ScoreUPI_Transactions_Month
106> 12L8158831.1667

Purpose: Merge comparator rows to feed a simple delta calc. 

data compare_means;

  merge overall_means(rename=(Income_INR=All_Income Credit_Score=All_Credit

                              Digital_Literacy_Score=All_Digi UPI_Transactions_Month=All_UPI))

        segment_means(rename=(Income_INR=Seg_Income Credit_Score=Seg_Credit

                              Digital_Literacy_Score=Seg_Digi UPI_Transactions_Month=Seg_UPI));

run;


proc print data=compare_means label noobs;

  label All_Income="All Income" Seg_Income="Segment Income"

        All_Credit="All Credit" Seg_Credit="Segment Credit"

        All_Digi="All Digital Lit" Seg_Digi="Segment Digital Lit"

        All_UPI="All UPI" Seg_UPI="Segment UPI";

  title "Segment vs Overall Averages";

run;

title;

Output:

Segment vs Overall Averages

_TYPE__FREQ_All IncomeAll CreditAll Digital LitAll UPISegment IncomeSegment CreditSegment Digital LitSegment UPI
066L-12L690.23578.294125.5> 12L8158831.1667









To Visit My Previous E-Commerce Dataset:Click Here
To Visit My Previous Length,Input,Retain Statements:Click Here
To Visit My Previous Urban Traffic Dataset:Click Here
To Visit My Previous Home Energy Consumption Dataset:Click Here



Follow Us On : 


 


--- FOLLOW OUR BLOG FOR MORE INFORMATION.

--->PLEASE DO COMMENTS AND SHARE OUR BLOG.


No comments:

Post a Comment