Saturday, 5 July 2025

231.ANALYZING INDIAN LANGUAGES USING PROC FORMAT | PROC PRINT | PROC SORT | PROC MEANS | PROC FREQ | PROC SQL | PROC SUMMARY | PROC TABULATE | PROC UNIVARIATE | PROC REPORT | PROC TRANSPOSE | MACROS

ANALYZING INDIAN LANGUAGES USING PROC FORMAT | PROC PRINT | PROC SORT | PROC MEANS | PROC FREQ | PROC SQL | PROC SUMMARY | PROC TABULATE | PROC UNIVARIATE | PROC REPORT | PROC TRANSPOSE | MACROS

 /*Creating A Dataset Of Languages In India*/

1. Global OPTIONS and custom FORMATS

options nocenter fullstimer yearcutoff=1900

        pagesize=60 linesize=120

        missing='.' /* uniform numeric missings */

        dkricond=error; /* error on dup‑key updates */


proc format;

   value $famfmt

      'IA' = 'Indo‑Aryan'

      'DR' = 'Dravidian'

      'AST' = 'Austro‑Asiatic'

      'ST' = 'Sino‑Tibetan'

      'ISO' = 'Isolate/Other';

   value riskfmt

      1 = 'Safe'

      2 = 'Vulnerable'

      3 = 'Definitely Endangered'

      4 = 'Severely Endangered';

run;


2. DATA step

data lang_india;

   infile datalines dsd dlm='|';

   input Language :$18.

         Family   :$5.

         Lang_ID              

         Speakers_Millions     

         Official_States      

         Dialects              

         Script_Count        

         Literacy_Rate        

         Recognition_Year     

         Digital_Index        

         Endanger_Score       

         ;

format Family $famfmt. Endanger_Score riskfmt.;

datalines;

Hindi|IA|101|568|11|23|2|74|1950|88|1

Bengali|IA|102|97|2|18|2|71|1950|84|1

Marathi|IA|103|83|1|15|1|76|1950|72|1

Telugu|DR|104|82|2|21|2|70|1950|75|1

Tamil|DR|105|75|2|22|2|78|1950|80|1

Gujarati|IA|106|56|1|10|1|79|1950|65|1

Urdu|IA|107|50|2|13|2|69|1950|67|2

Kannada|DR|108|45|1|20|1|76|1950|68|1

Odia|IA|109|37|1|14|1|73|1950|60|1

Malayalam|DR|110|35|1|16|2|97|1950|77|1

Punjabi|IA|111|33|2|9|2|75|1992|70|1

Maithili|IA|112|30|1|12|1|65|2003|40|2

Santali|AST|113|8|2|7|2|55|2003|35|3

Kashmiri|IA|114|7|1|6|2|62|1950|38|3

Nepali|IA|115|6|1|5|1|68|1950|50|2

Konkani|IA|116|4|1|4|2|81|1992|55|2

Dogri|IA|117|3|1|3|2|61|2003|30|3

Sindhi|IA|118|3|0|3|1|75|1967|25|4

Bodo|ST|119|2|1|2|1|60|2003|20|2

Manipuri|ST|120|1.8|1|2|2|70|1992|30|2

Assamese|IA|121|15|1|11|2|72|1950|58|1

Tulu|DR|122|2|0|3|2|69|.|25|3

Bhili|IA|123|9|0|4|1|45|.|15|3

Gondi|DR|124|3|0|6|2|50|.|10|4

Mundari|AST|125|2|0|4|1|40|.|12|3

Khasi|AST|126|1.4|0|2|2|68|.|18|2

Lepcha|ST|127|0.05|0|2|1|55|.|5|4

Mizo|ST|128|0.83|1|2|1|91|.|40|1

Bhutia|ST|129|0.07|0|1|1|53|.|4|3

Nishi|ST|130|0.33|0|2|1|35|.|6|3

;

run;

proc print;run;

Output:

Obs Language Family Lang_ID Speakers_Millions Official_States Dialects Script_Count Literacy_Rate Recognition_Year Digital_Index Endanger_Score
1 Hindi Indo-Aryan 101 568.00 11 23 2 74 1950 88 Safe
2 Bengali Indo-Aryan 102 97.00 2 18 2 71 1950 84 Safe
3 Marathi Indo-Aryan 103 83.00 1 15 1 76 1950 72 Safe
4 Telugu Dravidian 104 82.00 2 21 2 70 1950 75 Safe
5 Tamil Dravidian 105 75.00 2 22 2 78 1950 80 Safe
6 Gujarati Indo-Aryan 106 56.00 1 10 1 79 1950 65 Safe
7 Urdu Indo-Aryan 107 50.00 2 13 2 69 1950 67 Vulnerable
8 Kannada Dravidian 108 45.00 1 20 1 76 1950 68 Safe
9 Odia Indo-Aryan 109 37.00 1 14 1 73 1950 60 Safe
10 Malayalam Dravidian 110 35.00 1 16 2 97 1950 77 Safe
11 Punjabi Indo-Aryan 111 33.00 2 9 2 75 1992 70 Safe
12 Maithili Indo-Aryan 112 30.00 1 12 1 65 2003 40 Vulnerable
13 Santali Austro-Asiatic 113 8.00 2 7 2 55 2003 35 Definitely Endangered
14 Kashmiri Indo-Aryan 114 7.00 1 6 2 62 1950 38 Definitely Endangered
15 Nepali Indo-Aryan 115 6.00 1 5 1 68 1950 50 Vulnerable
16 Konkani Indo-Aryan 116 4.00 1 4 2 81 1992 55 Vulnerable
17 Dogri Indo-Aryan 117 3.00 1 3 2 61 2003 30 Definitely Endangered
18 Sindhi Indo-Aryan 118 3.00 0 3 1 75 1967 25 Severely Endangered
19 Bodo Sino-Tibetan 119 2.00 1 2 1 60 2003 20 Vulnerable
20 Manipuri Sino-Tibetan 120 1.80 1 2 2 70 1992 30 Vulnerable
21 Assamese Indo-Aryan 121 15.00 1 11 2 72 1950 58 Safe
22 Tulu Dravidian 122 2.00 0 3 2 69 . 25 Definitely Endangered
23 Bhili Indo-Aryan 123 9.00 0 4 1 45 . 15 Definitely Endangered
24 Gondi Dravidian 124 3.00 0 6 2 50 . 10 Severely Endangered
25 Mundari Austro-Asiatic 125 2.00 0 4 1 40 . 12 Definitely Endangered
26 Khasi Austro-Asiatic 126 1.40 0 2 2 68 . 18 Vulnerable
27 Lepcha Sino-Tibetan 127 0.05 0 2 1 55 . 5 Severely Endangered
28 Mizo Sino-Tibetan 128 0.83 1 2 1 91 . 40 Safe
29 Bhutia Sino-Tibetan 129 0.07 0 1 1 53 . 4 Definitely Endangered
30 Nishi Sino-Tibetan 130 0.33 0 2 1 35 . 6 Definitely Endangered


3. First look—PROC PRINT and PROC SORT

proc sort data=lang_india out=lang_sorted;

   by descending Speakers_Millions;

run;


title "TOP 10 LANGUAGES BY NATIVE SPEAKER POPULATION";

proc print data=lang_sorted (obs=10) label noobs;

   var Language Speakers_Millions Official_States Script_Count;

   label Speakers_Millions = "Millions of Native Speakers";

run;

title;

Output:

TOP 10 LANGUAGES BY NATIVE SPEAKER POPULATION

Language Millions
of Native
Speakers
Official_States Script_Count
Hindi 568 11 2
Bengali 97 2 2
Marathi 83 1 1
Telugu 82 2 2
Tamil 75 2 2
Gujarati 56 1 1
Urdu 50 2 2
Kannada 45 1 1
Odia 37 1 1
Malayalam 35 1 2


4. Macro #1—Reusable descriptives 

%macro meanstat(var=);

   %put NOTE‑: Descriptive statistics for &var ;

   proc means data=lang_india n mean min max maxdec=2;

      var &var ;

   run;

%mend meanstat;


%meanstat(var=Speakers_Millions)

Output:

The MEANS Procedure

Analysis Variable : Speakers_Millions
N Mean Minimum Maximum
30 41.98 0.05 568.00


%meanstat(var=DialectS)         

Output:

The MEANS Procedure

Analysis Variable : Dialects
N Mean Minimum Maximum
30 8.73 1.00 23.00


%meanstat(var=Digital_Index)

Output:

The MEANS Procedure

Analysis Variable : Digital_Index
N Mean Minimum Maximum
30 44.07 4.00 88.00

5. Macro #2—Family filter & dynamic SQL

%macro famview(code=);

   proc sql;

      create  view fam_&code as

      select *

      from   lang_india

      where  Family = "&code";

   quit;

      title "SUMMARY FOR FAMILY &code (Auto‑generated)";

   proc summary data=fam_&code nway;

      var Speakers_Millions Official_States Dialects;

      output out=fam_sum_&code (drop=_type_ _freq_)

             sum= / autoname;

   run;

   

   proc print data=fam_sum_&code noobs label;

   run;

%mend famview;


%famview(code=IA)

Output:

SUMMARY FOR FAMILY IA (Auto-generated)

Speakers_Millions_Sum Official_States_Sum Dialects_Sum
1001 26 150


%famview(code=DR)

Output:

SUMMARY FOR FAMILY DR (Auto-generated)

Speakers_Millions_Sum Official_States_Sum Dialects_Sum
242 6 88


6. Descriptive PROC MEANS, PROC FREQ, and PROC UNIVARIATE

proc means data=lang_india mean median q1 q3 min max maxdec=1;

   var Speakers_Millions;

   class Family;

run;

Output:

The MEANS Procedure

Analysis Variable : Speakers_Millions
Family N Obs Mean Median Lower Quartile Upper Quartile Minimum Maximum
Austro-Asiatic 3 3.8 2.0 1.4 8.0 1.4 8.0
Dravidian 6 40.3 40.0 3.0 75.0 2.0 82.0
Indo-Aryan 15 66.7 30.0 6.0 56.0 3.0 568.0
Sino-Tibetan 6 0.8 0.6 0.1 1.8 0.1 2.0


proc freq data=lang_india;

   tables Endanger_Score Family*Endanger_Score / nopercent norow nocol;

run;

Output:

The FREQ Procedure

Endanger_Score Frequency Cumulative
Frequency
Safe 12 12
Vulnerable 7 19
Definitely Endangered 8 27
Severely Endangered 3 30


Frequency
Table of Family by Endanger_Score
Family Endanger_Score
Safe Vulnerable Definitely Endangered Severely Endangered Total
Austro-Asiatic
0
1
2
0
3
Dravidian
4
0
1
1
6
Indo-Aryan
7
4
3
1
15
Sino-Tibetan
1
2
2
1
6
Total
12
7
8
3
30


proc univariate data=lang_india normal plot;

   var Digital_Index;

   histogram / midpoints=0 to 100 by 20;

   inset n mean std skewness kurtosis / position=ne;

run;

Output:

The UNIVARIATE Procedure
Variable: Digital_Index

Moments
N 30 Sum Weights 30
Mean 44.0666667 Sum Observations 1322
Std Deviation 26.6509149 Variance 710.271264
Skewness 0.02787991 Kurtosis -1.3801751
Uncorrected SS 78854 Corrected SS 20597.8667
Coeff Variation 60.4786268 Std Error Mean 4.86576909


Basic Statistical Measures
Location Variability
Mean 44.06667 Std Deviation 26.65091
Median 40.00000 Variance 710.27126
Mode 25.00000 Range 84.00000
    Interquartile Range 48.00000

Note: The mode displayed is the smallest of 3 modes with a count of 2.


Tests for Location: Mu0=0
Test Statistic p Value
Student's t t 9.056465 Pr > |t| <.0001
Sign M 15 Pr >= |M| <.0001
Signed Rank S 232.5 Pr >= |S| <.0001


Tests for Normality
Test Statistic p Value
Shapiro-Wilk W 0.936574 Pr < W 0.0736
Kolmogorov-Smirnov D 0.117243 Pr > D >0.1500
Cramer-von Mises W-Sq 0.094867 Pr > W-Sq 0.1290
Anderson-Darling A-Sq 0.595495 Pr > A-Sq 0.1137


Quantiles (Definition 5)
Level Quantile
100% Max 88.0
99% 88.0
95% 84.0
90% 78.5
75% Q3 68.0
50% Median 40.0
25% Q1 20.0
10% 8.0
5% 5.0
1% 4.0
0% Min 4.0


Extreme Observations
Lowest Highest
Value Obs Value Obs
4 29 75 4
5 27 77 10
6 30 80 5
10 24 84 2
12 25 88 1

7. PROC SQL jewels—derived metrics & in‑line functions

proc sql;

   create table lit_mass as

   select Language,

          Speakers_Millions * (Literacy_Rate/100) as Literate_Millions format=8.1,

          Endanger_Score label='Risk'

   from   lang_india

   order by Literate_Millions desc;

quit;

proc print;run;

Output:

Obs Language Literate_Millions Endanger_Score
1 Hindi 420.3 Safe
2 Bengali 68.9 Safe
3 Marathi 63.1 Safe
4 Tamil 58.5 Safe
5 Telugu 57.4 Safe
6 Gujarati 44.2 Safe
7 Urdu 34.5 Vulnerable
8 Kannada 34.2 Safe
9 Malayalam 34.0 Safe
10 Odia 27.0 Safe
11 Punjabi 24.8 Safe
12 Maithili 19.5 Vulnerable
13 Assamese 10.8 Safe
14 Santali 4.4 Definitely Endangered
15 Kashmiri 4.3 Definitely Endangered
16 Nepali 4.1 Vulnerable
17 Bhili 4.1 Definitely Endangered
18 Konkani 3.2 Vulnerable
19 Sindhi 2.3 Severely Endangered
20 Dogri 1.8 Definitely Endangered
21 Gondi 1.5 Severely Endangered
22 Tulu 1.4 Definitely Endangered
23 Manipuri 1.3 Vulnerable
24 Bodo 1.2 Vulnerable
25 Khasi 1.0 Vulnerable
26 Mundari 0.8 Definitely Endangered
27 Mizo 0.8 Safe
28 Nishi 0.1 Definitely Endangered
29 Bhutia 0.0 Definitely Endangered
30 Lepcha 0.0 Severely Endangered


8. Wide‑to‑long pivot—PROC TRANSPOSE for longitudinal modelling

data speakers_hist;

   set lang_india;

   Speakers1951 = Speakers_Millions * 0.45;

   Speakers2001 = Speakers_Millions * 0.82;

   Speakers2011 = Speakers_Millions * 0.95;

run;

proc print;run;

Output:

Obs Language Family Lang_ID Speakers_Millions Official_States Dialects Script_Count Literacy_Rate Recognition_Year Digital_Index Endanger_Score Speakers1951 Speakers2001 Speakers2011
1 Hindi Indo-Aryan 101 568.00 11 23 2 74 1950 88 Safe 255.600 465.760 539.600
2 Bengali Indo-Aryan 102 97.00 2 18 2 71 1950 84 Safe 43.650 79.540 92.150
3 Marathi Indo-Aryan 103 83.00 1 15 1 76 1950 72 Safe 37.350 68.060 78.850
4 Telugu Dravidian 104 82.00 2 21 2 70 1950 75 Safe 36.900 67.240 77.900
5 Tamil Dravidian 105 75.00 2 22 2 78 1950 80 Safe 33.750 61.500 71.250
6 Gujarati Indo-Aryan 106 56.00 1 10 1 79 1950 65 Safe 25.200 45.920 53.200
7 Urdu Indo-Aryan 107 50.00 2 13 2 69 1950 67 Vulnerable 22.500 41.000 47.500
8 Kannada Dravidian 108 45.00 1 20 1 76 1950 68 Safe 20.250 36.900 42.750
9 Odia Indo-Aryan 109 37.00 1 14 1 73 1950 60 Safe 16.650 30.340 35.150
10 Malayalam Dravidian 110 35.00 1 16 2 97 1950 77 Safe 15.750 28.700 33.250
11 Punjabi Indo-Aryan 111 33.00 2 9 2 75 1992 70 Safe 14.850 27.060 31.350
12 Maithili Indo-Aryan 112 30.00 1 12 1 65 2003 40 Vulnerable 13.500 24.600 28.500
13 Santali Austro-Asiatic 113 8.00 2 7 2 55 2003 35 Definitely Endangered 3.600 6.560 7.600
14 Kashmiri Indo-Aryan 114 7.00 1 6 2 62 1950 38 Definitely Endangered 3.150 5.740 6.650
15 Nepali Indo-Aryan 115 6.00 1 5 1 68 1950 50 Vulnerable 2.700 4.920 5.700
16 Konkani Indo-Aryan 116 4.00 1 4 2 81 1992 55 Vulnerable 1.800 3.280 3.800
17 Dogri Indo-Aryan 117 3.00 1 3 2 61 2003 30 Definitely Endangered 1.350 2.460 2.850
18 Sindhi Indo-Aryan 118 3.00 0 3 1 75 1967 25 Severely Endangered 1.350 2.460 2.850
19 Bodo Sino-Tibetan 119 2.00 1 2 1 60 2003 20 Vulnerable 0.900 1.640 1.900
20 Manipuri Sino-Tibetan 120 1.80 1 2 2 70 1992 30 Vulnerable 0.810 1.476 1.710
21 Assamese Indo-Aryan 121 15.00 1 11 2 72 1950 58 Safe 6.750 12.300 14.250
22 Tulu Dravidian 122 2.00 0 3 2 69 . 25 Definitely Endangered 0.900 1.640 1.900
23 Bhili Indo-Aryan 123 9.00 0 4 1 45 . 15 Definitely Endangered 4.050 7.380 8.550
24 Gondi Dravidian 124 3.00 0 6 2 50 . 10 Severely Endangered 1.350 2.460 2.850
25 Mundari Austro-Asiatic 125 2.00 0 4 1 40 . 12 Definitely Endangered 0.900 1.640 1.900
26 Khasi Austro-Asiatic 126 1.40 0 2 2 68 . 18 Vulnerable 0.630 1.148 1.330
27 Lepcha Sino-Tibetan 127 0.05 0 2 1 55 . 5 Severely Endangered 0.023 0.041 0.048
28 Mizo Sino-Tibetan 128 0.83 1 2 1 91 . 40 Safe 0.374 0.681 0.789
29 Bhutia Sino-Tibetan 129 0.07 0 1 1 53 . 4 Definitely Endangered 0.032 0.057 0.067
30 Nishi Sino-Tibetan 130 0.33 0 2 1 35 . 6 Definitely Endangered 0.149 0.271 0.314


proc transpose data=speakers_hist out=speakers_long

   name=Census_Year prefix=Count_;

   by Lang_ID Language Family;

   var Speakers1951 Speakers2001 Speakers2011;

run;

proc print;run;

Output:

Obs Lang_ID Language Family Census_Year Count_1
1 101 Hindi Indo-Aryan Speakers1951 255.600
2 101 Hindi Indo-Aryan Speakers2001 465.760
3 101 Hindi Indo-Aryan Speakers2011 539.600
4 102 Bengali Indo-Aryan Speakers1951 43.650
5 102 Bengali Indo-Aryan Speakers2001 79.540
6 102 Bengali Indo-Aryan Speakers2011 92.150
7 103 Marathi Indo-Aryan Speakers1951 37.350
8 103 Marathi Indo-Aryan Speakers2001 68.060
9 103 Marathi Indo-Aryan Speakers2011 78.850
10 104 Telugu Dravidian Speakers1951 36.900
11 104 Telugu Dravidian Speakers2001 67.240
12 104 Telugu Dravidian Speakers2011 77.900
13 105 Tamil Dravidian Speakers1951 33.750
14 105 Tamil Dravidian Speakers2001 61.500
15 105 Tamil Dravidian Speakers2011 71.250
16 106 Gujarati Indo-Aryan Speakers1951 25.200
17 106 Gujarati Indo-Aryan Speakers2001 45.920
18 106 Gujarati Indo-Aryan Speakers2011 53.200
19 107 Urdu Indo-Aryan Speakers1951 22.500
20 107 Urdu Indo-Aryan Speakers2001 41.000
21 107 Urdu Indo-Aryan Speakers2011 47.500
22 108 Kannada Dravidian Speakers1951 20.250
23 108 Kannada Dravidian Speakers2001 36.900
24 108 Kannada Dravidian Speakers2011 42.750
25 109 Odia Indo-Aryan Speakers1951 16.650
26 109 Odia Indo-Aryan Speakers2001 30.340
27 109 Odia Indo-Aryan Speakers2011 35.150
28 110 Malayalam Dravidian Speakers1951 15.750
29 110 Malayalam Dravidian Speakers2001 28.700
30 110 Malayalam Dravidian Speakers2011 33.250
31 111 Punjabi Indo-Aryan Speakers1951 14.850
32 111 Punjabi Indo-Aryan Speakers2001 27.060
33 111 Punjabi Indo-Aryan Speakers2011 31.350
34 112 Maithili Indo-Aryan Speakers1951 13.500
35 112 Maithili Indo-Aryan Speakers2001 24.600
36 112 Maithili Indo-Aryan Speakers2011 28.500
37 113 Santali Austro-Asiatic Speakers1951 3.600
38 113 Santali Austro-Asiatic Speakers2001 6.560
39 113 Santali Austro-Asiatic Speakers2011 7.600
40 114 Kashmiri Indo-Aryan Speakers1951 3.150
41 114 Kashmiri Indo-Aryan Speakers2001 5.740
42 114 Kashmiri Indo-Aryan Speakers2011 6.650
43 115 Nepali Indo-Aryan Speakers1951 2.700
44 115 Nepali Indo-Aryan Speakers2001 4.920
45 115 Nepali Indo-Aryan Speakers2011 5.700
46 116 Konkani Indo-Aryan Speakers1951 1.800
47 116 Konkani Indo-Aryan Speakers2001 3.280
48 116 Konkani Indo-Aryan Speakers2011 3.800
49 117 Dogri Indo-Aryan Speakers1951 1.350
50 117 Dogri Indo-Aryan Speakers2001 2.460
51 117 Dogri Indo-Aryan Speakers2011 2.850
52 118 Sindhi Indo-Aryan Speakers1951 1.350
53 118 Sindhi Indo-Aryan Speakers2001 2.460
54 118 Sindhi Indo-Aryan Speakers2011 2.850
55 119 Bodo Sino-Tibetan Speakers1951 0.900
56 119 Bodo Sino-Tibetan Speakers2001 1.640
57 119 Bodo Sino-Tibetan Speakers2011 1.900
58 120 Manipuri Sino-Tibetan Speakers1951 0.810
59 120 Manipuri Sino-Tibetan Speakers2001 1.476
60 120 Manipuri Sino-Tibetan Speakers2011 1.710
61 121 Assamese Indo-Aryan Speakers1951 6.750
62 121 Assamese Indo-Aryan Speakers2001 12.300
63 121 Assamese Indo-Aryan Speakers2011 14.250
64 122 Tulu Dravidian Speakers1951 0.900
65 122 Tulu Dravidian Speakers2001 1.640
66 122 Tulu Dravidian Speakers2011 1.900
67 123 Bhili Indo-Aryan Speakers1951 4.050
68 123 Bhili Indo-Aryan Speakers2001 7.380
69 123 Bhili Indo-Aryan Speakers2011 8.550
70 124 Gondi Dravidian Speakers1951 1.350
71 124 Gondi Dravidian Speakers2001 2.460
72 124 Gondi Dravidian Speakers2011 2.850
73 125 Mundari Austro-Asiatic Speakers1951 0.900
74 125 Mundari Austro-Asiatic Speakers2001 1.640
75 125 Mundari Austro-Asiatic Speakers2011 1.900
76 126 Khasi Austro-Asiatic Speakers1951 0.630
77 126 Khasi Austro-Asiatic Speakers2001 1.148
78 126 Khasi Austro-Asiatic Speakers2011 1.330
79 127 Lepcha Sino-Tibetan Speakers1951 0.023
80 127 Lepcha Sino-Tibetan Speakers2001 0.041
81 127 Lepcha Sino-Tibetan Speakers2011 0.048
82 128 Mizo Sino-Tibetan Speakers1951 0.374
83 128 Mizo Sino-Tibetan Speakers2001 0.681
84 128 Mizo Sino-Tibetan Speakers2011 0.789
85 129 Bhutia Sino-Tibetan Speakers1951 0.032
86 129 Bhutia Sino-Tibetan Speakers2001 0.057
87 129 Bhutia Sino-Tibetan Speakers2011 0.067
88 130 Nishi Sino-Tibetan Speakers1951 0.149
89 130 Nishi Sino-Tibetan Speakers2001 0.271
90 130 Nishi Sino-Tibetan Speakers2011 0.314


9. PROC TABULATE & PROC REPORT

/* 9A. Summary table—average literacy by family */

proc tabulate data=lang_india f=8.1 order=data;

   class Family;

   var Literacy_Rate Digital_Index;

   table Family,

         Literacy_Rate*Mean

         Digital_Index*Mean;

   label Literacy_Rate='Avg Literacy %'

         Digital_Index='Avg Digital Index';

run;

Output:

  Avg Literacy % Avg Digital Index
Mean Mean
Family 69.7 54.5
Indo-Aryan
Dravidian 73.3 55.8
Austro-Asiatic 54.3 21.7
Sino-Tibetan 60.7 17.5


/* 9B. Styled report for policy brief */

title h=12pt "LANGUAGES AT RISK SCORING ≥3";

proc report data=lang_india nowd split='*';

   column Language Family Speakers_Millions Endanger_Score Literacy_Rate;

   define Language / order width=16;

   define Family   / format=$famfmt. 'Family';

   define Endanger_Score / format=riskfmt. 'Risk';

   define Literacy_Rate / analysis mean 'Lit %';

   compute Endanger_Score;

      if Endanger_Score>=3 then call define(_col_,'style','style={background=lightred}');

   endcomp;

run;

title;

Output:

LANGUAGES AT RISK SCORING =3

Language Family Speakers_Millions Risk Lit %
Assamese Indo-Aryan 15 Safe 72
Bengali Indo-Aryan 97 Safe 71
Bhili Indo-Aryan 9 Definitely Endangered 45
Bhutia Sino-Tibetan 0.07 Definitely Endangered 53
Bodo Sino-Tibetan 2 Vulnerable 60
Dogri Indo-Aryan 3 Definitely Endangered 61
Gondi Dravidian 3 Severely Endangered 50
Gujarati Indo-Aryan 56 Safe 79
Hindi Indo-Aryan 568 Safe 74
Kannada Dravidian 45 Safe 76
Kashmiri Indo-Aryan 7 Definitely Endangered 62
Khasi Austro-Asiatic 1.4 Vulnerable 68
Konkani Indo-Aryan 4 Vulnerable 81
Lepcha Sino-Tibetan 0.05 Severely Endangered 55
Maithili Indo-Aryan 30 Vulnerable 65
Malayalam Dravidian 35 Safe 97
Manipuri Sino-Tibetan 1.8 Vulnerable 70
Marathi Indo-Aryan 83 Safe 76
Mizo Sino-Tibetan 0.83 Safe 91
Mundari Austro-Asiatic 2 Definitely Endangered 40
Nepali Indo-Aryan 6 Vulnerable 68
Nishi Sino-Tibetan 0.33 Definitely Endangered 35
Odia Indo-Aryan 37 Safe 73
Punjabi Indo-Aryan 33 Safe 75
Santali Austro-Asiatic 8 Definitely Endangered 55
Sindhi Indo-Aryan 3 Severely Endangered 75
Tamil Dravidian 75 Safe 78
Telugu Dravidian 82 Safe 70
Tulu Dravidian 2 Definitely Endangered 69
Urdu Indo-Aryan 50 Vulnerable 69


10. Performance tip—indexing high‑cardinality numeric key

proc datasets lib=work nolist;

   modify lang_india;

   index create Lang_ID / unique;

quit;

Output:

11. Good‑bye block—PROC MEANS with ODS EXCEL

ods excel file="&sysuserhome.\lang_summary.xlsx" options(sheet_name="RiskStats");


proc means data=lang_india n median q1 q3;

   class Endanger_Score;

   var Speakers_Millions;

run;


ods excel close;

Output:
The MEANS Procedure

Analysis Variable : Speakers_Millions
Endanger_Score N Obs N Median Lower Quartile Upper Quartile
Safe 12 12 50.5000000 34.0000000 82.5000000
Vulnerable 7 7 4.0000000 1.8000000 30.0000000
Definitely Endangered 8 8 2.5000000 1.1650000 7.5000000
Severely Endangered 3 3 3.0000000 0.0500000 3.0000000



To Visit My Previous E-Commerce Dataset:Click Here
To Visit My Previous Length,Input,Retain Statements:Click Here
To Visit My Previous Urban Traffic Dataset:Click Here
To Visit My Previous Home Energy Consumption Dataset:Click Here





--->PLEASE FOLLOW OUR BLOG FOR MORE INFORMATION.
--->PLEASE DO COMMENTS AND SHARE OUR BLOG.

PLEASE FOLLOW OUR TELEGRAM CHANNEL CLICK HERE

PLEASE FOLLOW OUR FACEBOOK PAGE  CLICK HERE

PLEASE FOLLOW OUR INSTAGRAM PAGE CLICK HERE






No comments:

Post a Comment