Wednesday, 31 December 2025

356.LANGUAGE POPULARITY & DIFFICULTY DATA ANALYSIS USING SAS DATA STEP | PROC SQL | PROC MEANS | PROC RANK | PROC UNIVARIATE | MACROS | DATE FUNCTIONS (MDY-INTNX-INTCK) | APPEND | MERGE | TRANSPOSE

LANGUAGE POPULARITY & DIFFICULTY DATA ANALYSIS USING SAS DATA STEP | PROC SQL | PROC MEANS | PROC RANK | PROC UNIVARIATE | MACROS | DATE FUNCTIONS (MDY-INTNX-INTCK) | APPEND | MERGE | TRANSPOSE 

options nocenter;

1.LANGUAGES MASTER DATASET CREATION

data languages_master;

    length Language_Name $15 Region $15 Introduced_Date $9;

    format Review_Year 4.;

    input Language_Name $ Speakers Region $ Difficulty_Level Countries_Count

        Introduced_Year;

    /* Store historical date safely as character */

    Introduced_Date = cats('01JAN', Introduced_Year);

    /* Review year after 5 years */

    Review_Year = Introduced_Year + 5;

datalines;

English 1500 Global 2 60 1600

Mandarin 1100 Asia 5 3 1200

Hindi 600 Asia 3 4 1500

Spanish 550 Europe 2 21 1400

French 300 Europe 3 29 1300

Arabic 310 MiddleEast 4 25 700

Russian 260 Europe 4 4 900

Portuguese 260 Europe 2 10 1500

Bengali 230 Asia 3 2 1200

Japanese 125 Asia 5 1 800

German 90 Europe 3 6 1100

Korean 80 Asia 4 2 900

Italian 65 Europe 2 4 1400

Turkish 75 Europe 3 2 1100

;

run;

proc print data=languages_master;

run;

OUTPUT:

ObsLanguage_NameRegionIntroduced_DateReview_YearSpeakersDifficulty_LevelCountries_CountIntroduced_Year
1EnglishGlobal01JAN1600160515002601600
2MandarinAsia01JAN120012051100531200
3HindiAsia01JAN15001505600341500
4SpanishEurope01JAN140014055502211400
5FrenchEurope01JAN130013053003291300
6ArabicMiddleEast01JAN700705310425700
7RussianEurope01JAN90090526044900
8PortugueseEurope01JAN150015052602101500
9BengaliAsia01JAN12001205230321200
10JapaneseAsia01JAN80080512551800
11GermanEurope01JAN1100110590361100
12KoreanAsia01JAN9009058042900
13ItalianEurope01JAN1400140565241400
14TurkishEurope01JAN1100110575321100


2.DATES PRACTICE

data languages_dates;

    set languages_master;

    /* Reference year instead of SAS date */

    Reference_Year = 2026;

    /* Correct calculation using years */

    Years_Since_Intro = Reference_Year - Introduced_Year;

run;

proc print data=languages_dates;

run;

OUTPUT:

ObsLanguage_NameRegionIntroduced_DateReview_YearSpeakersDifficulty_LevelCountries_CountIntroduced_YearReference_YearYears_Since_Intro
1EnglishGlobal01JAN16001605150026016002026426
2MandarinAsia01JAN1200120511005312002026826
3HindiAsia01JAN150015056003415002026526
4SpanishEurope01JAN1400140555022114002026626
5FrenchEurope01JAN1300130530032913002026726
6ArabicMiddleEast01JAN70070531042570020261326
7RussianEurope01JAN9009052604490020261126
8PortugueseEurope01JAN1500150526021015002026526
9BengaliAsia01JAN120012052303212002026826
10JapaneseAsia01JAN8008051255180020261226
11GermanEurope01JAN11001105903611002026926
12KoreanAsia01JAN900905804290020261126
13ItalianEurope01JAN14001405652414002026626
14TurkishEurope01JAN11001105753211002026926


3.PROC SQL – DATA QUERY & AGGREGATION

proc sql;

    create table language_summary as

    select Region,

           count(Language_Name) as Total_Languages,

           mean(Speakers) as Avg_Speakers

    from languages_dates

    group by Region;

quit;

proc print data=language_summary;

run;

OUTPUT:

ObsRegionTotal_LanguagesAvg_Speakers
1Asia5427.00
2Europe7228.57
3Global11500.00
4MiddleEast1310.00


4.PROC MEANS – DESCRIPTIVE STATISTICS

proc means data=languages_dates mean min max;

    var Speakers Countries_Count Difficulty_Level;

run;

OUTPUT:

The MEANS Procedure

VariableMeanMinimumMaximum
Speakers
Countries_Count
Difficulty_Level
396.0714286
12.3571429
3.2142857
65.0000000
1.0000000
2.0000000
1500.00
60.0000000
5.0000000

5.PROC RANK – LANGUAGE POPULARITY RANKING

proc rank data=languages_dates out=language_rank descending;

    var Speakers;

    ranks Popularity_Rank;

run;

proc print data=language_rank;

run;

OUTPUT:

ObsLanguage_NameRegionIntroduced_DateReview_YearSpeakersDifficulty_LevelCountries_CountIntroduced_YearReference_YearYears_Since_IntroPopularity_Rank
1EnglishGlobal01JAN160016051500260160020264261.0
2MandarinAsia01JAN12001205110053120020268262.0
3HindiAsia01JAN1500150560034150020265263.0
4SpanishEurope01JAN14001405550221140020266264.0
5FrenchEurope01JAN13001305300329130020267266.0
6ArabicMiddleEast01JAN700705310425700202613265.0
7RussianEurope01JAN90090526044900202611267.5
8PortugueseEurope01JAN15001505260210150020265267.5
9BengaliAsia01JAN1200120523032120020268269.0
10JapaneseAsia01JAN800805125518002026122610.0
11GermanEurope01JAN1100110590361100202692611.0
12KoreanAsia01JAN90090580429002026112612.0
13ItalianEurope01JAN1400140565241400202662614.0
14TurkishEurope01JAN1100110575321100202692613.0


6.MACRO – AUTOMATED DIFFICULTY CLASSIFICATION

%macro difficulty_class;

data language_class;

    set language_rank;

    length Difficulty_Group $12;

    if Difficulty_Level <=2 then Difficulty_Group='Easy';

    else if Difficulty_Level=3 then Difficulty_Group='Medium';

    else Difficulty_Group='Hard';

run;

proc print data=language_class;

run;

%mend;


%difficulty_class;

OUTPUT:

ObsLanguage_NameRegionIntroduced_DateReview_YearSpeakersDifficulty_LevelCountries_CountIntroduced_YearReference_YearYears_Since_IntroPopularity_RankDifficulty_Group
1EnglishGlobal01JAN160016051500260160020264261.0Easy
2MandarinAsia01JAN12001205110053120020268262.0Hard
3HindiAsia01JAN1500150560034150020265263.0Medium
4SpanishEurope01JAN14001405550221140020266264.0Easy
5FrenchEurope01JAN13001305300329130020267266.0Medium
6ArabicMiddleEast01JAN700705310425700202613265.0Hard
7RussianEurope01JAN90090526044900202611267.5Hard
8PortugueseEurope01JAN15001505260210150020265267.5Easy
9BengaliAsia01JAN1200120523032120020268269.0Medium
10JapaneseAsia01JAN800805125518002026122610.0Hard
11GermanEurope01JAN1100110590361100202692611.0Medium
12KoreanAsia01JAN90090580429002026112612.0Hard
13ItalianEurope01JAN1400140565241400202662614.0Easy
14TurkishEurope01JAN1100110575321100202692613.0Medium


7.PROC UNIVARIATE – DISTRIBUTION ANALYSIS

proc univariate data=language_class;

    var Speakers;

    histogram Speakers;

run;

OUTPUT:

The UNIVARIATE Procedure

Variable: Speakers

Moments
N14Sum Weights14
Mean396.071429Sum Observations5545
Std Deviation424.305671Variance180035.302
Skewness1.81572518Kurtosis2.88471787
Uncorrected SS4536675Corrected SS2340458.93
Coeff Variation107.128573Std Error Mean113.400461
Basic Statistical Measures
LocationVariability
Mean396.0714Std Deviation424.30567
Median260.0000Variance180035
Mode260.0000Range1435
  Interquartile Range460.00000
Tests for Location: Mu0=0
TestStatisticp Value
Student's tt3.492679Pr > |t|0.0040
SignM7Pr >= |M|0.0001
Signed RankS52.5Pr >= |S|0.0001
Quantiles (Definition 5)
LevelQuantile
100% Max1500
99%1500
95%1500
90%1100
75% Q3550
50% Median260
25% Q190
10%75
5%65
1%65
0% Min65
Extreme Observations
LowestHighest
ValueObsValueObs
65133106
75145504
80126003
901111002
1251015001

The UNIVARIATE Procedure

Histogram for Speakers

8.PROC APPEND – ADD NEW LANGUAGE RECORDS

data new_languages;

    length Language_Name $15 Region $15 Introduced_Date $9;

    format Introduced_Year 4.;

    input Language_Name $ Speakers Region $ Difficulty_Level Countries_Count

        Introduced_Date $;

    /* Extract year safely */

    Introduced_Year = input(substr(Introduced_Date, 6, 4), 4.);

     Review_Year = Introduced_Year + 5;

datalines;

Swahili 80 Africa 3 5 01JAN1500

Thai    70 Asia   4 1 01JAN1300

;

run;

proc print data=new_languages;

run;

OUTPUT:

ObsLanguage_NameRegionIntroduced_DateIntroduced_YearSpeakersDifficulty_LevelCountries_CountReview_Year
1SwahiliAfrica01JAN1500150080351505
2ThaiAsia01JAN1300130070411305


proc append base=languages_master 

            data=new_languages;

run;

proc print data=new_languages;

run;

OUTPUT:

ObsLanguage_NameRegionIntroduced_DateIntroduced_YearSpeakersDifficulty_LevelCountries_CountReview_Year
1SwahiliAfrica01JAN1500150080351505
2ThaiAsia01JAN1300130070411305


9.PROC TRANSPOSE – DATA RESTRUCTURING

proc transpose data=languages_master out=transposed_languages;

    by Region NotSorted;

    var Speakers Countries_Count;

run;

proc print data=transposed_languages;

run;

OUTPUT:

ObsRegion_NAME_COL1COL2
1GlobalSpeakers1500.
2GlobalCountries_Count60.
3AsiaSpeakers1100600
4AsiaCountries_Count34
5EuropeSpeakers550300
6EuropeCountries_Count2129
7MiddleEastSpeakers310.
8MiddleEastCountries_Count25.
9EuropeSpeakers260260
10EuropeCountries_Count410
11AsiaSpeakers230125
12AsiaCountries_Count21
13EuropeSpeakers90.
14EuropeCountries_Count6.
15AsiaSpeakers80.
16AsiaCountries_Count2.
17EuropeSpeakers6575
18EuropeCountries_Count42
19AfricaSpeakers80.
20AfricaCountries_Count5.
21AsiaSpeakers70.
22AsiaCountries_Count1.
23AfricaSpeakers80.
24AfricaCountries_Count5.
25AsiaSpeakers70.
26AsiaCountries_Count1.
27AfricaSpeakers80.
28AfricaCountries_Count5.
29AsiaSpeakers70.
30AsiaCountries_Count1.


10.PROC MERGE – COMBINING DATASETS

proc sort data=languages_master; by Language_Name; run;

proc print data=languages_master;

run;

OUTPUT:

ObsLanguage_NameRegionIntroduced_DateReview_YearSpeakersDifficulty_LevelCountries_CountIntroduced_Year
1ArabicMiddleEast01JAN700705310425700
2BengaliAsia01JAN12001205230321200
3EnglishGlobal01JAN1600160515002601600
4FrenchEurope01JAN130013053003291300
5GermanEurope01JAN1100110590361100
6HindiAsia01JAN15001505600341500
7ItalianEurope01JAN1400140565241400
8JapaneseAsia01JAN80080512551800
9KoreanAsia01JAN9009058042900
10MandarinAsia01JAN120012051100531200
11PortugueseEurope01JAN150015052602101500
12RussianEurope01JAN90090526044900
13SpanishEurope01JAN140014055502211400
14SwahiliAfrica01JAN1500.80351500
15SwahiliAfrica01JAN1500.80351500
16SwahiliAfrica01JAN1500150580351500
17ThaiAsia01JAN1300.70411300
18ThaiAsia01JAN1300.70411300
19ThaiAsia01JAN1300130570411300
20TurkishEurope01JAN1100110575321100


proc sort data=language_rank; by Language_Name; run;

proc print data=language_rank;

run;

OUTPUT:

ObsLanguage_NameRegionIntroduced_DateReview_YearSpeakersDifficulty_LevelCountries_CountIntroduced_YearReference_YearYears_Since_IntroPopularity_Rank
1ArabicMiddleEast01JAN700705310425700202613265.0
2BengaliAsia01JAN1200120523032120020268269.0
3EnglishGlobal01JAN160016051500260160020264261.0
4FrenchEurope01JAN13001305300329130020267266.0
5GermanEurope01JAN1100110590361100202692611.0
6HindiAsia01JAN1500150560034150020265263.0
7ItalianEurope01JAN1400140565241400202662614.0
8JapaneseAsia01JAN800805125518002026122610.0
9KoreanAsia01JAN90090580429002026112612.0
10MandarinAsia01JAN12001205110053120020268262.0
11PortugueseEurope01JAN15001505260210150020265267.5
12RussianEurope01JAN90090526044900202611267.5
13SpanishEurope01JAN14001405550221140020266264.0
14TurkishEurope01JAN1100110575321100202692613.0


data language_final;

    merge languages_master(in=a)

          language_rank(in=b);

    by Language_Name;

    if a;

run;

proc print data=language_final;

run;

OUTPUT:

ObsLanguage_NameRegionIntroduced_DateReview_YearSpeakersDifficulty_LevelCountries_CountIntroduced_YearReference_YearYears_Since_IntroPopularity_Rank
1ArabicMiddleEast01JAN700705310425700202613265.0
2BengaliAsia01JAN1200120523032120020268269.0
3EnglishGlobal01JAN160016051500260160020264261.0
4FrenchEurope01JAN13001305300329130020267266.0
5GermanEurope01JAN1100110590361100202692611.0
6HindiAsia01JAN1500150560034150020265263.0
7ItalianEurope01JAN1400140565241400202662614.0
8JapaneseAsia01JAN800805125518002026122610.0
9KoreanAsia01JAN90090580429002026112612.0
10MandarinAsia01JAN12001205110053120020268262.0
11PortugueseEurope01JAN15001505260210150020265267.5
12RussianEurope01JAN90090526044900202611267.5
13SpanishEurope01JAN14001405550221140020266264.0
14SwahiliAfrica01JAN1500.80351500...
15SwahiliAfrica01JAN1500.80351500...
16SwahiliAfrica01JAN1500150580351500...
17ThaiAsia01JAN1300.70411300...
18ThaiAsia01JAN1300.70411300...
19ThaiAsia01JAN1300130570411300...
20TurkishEurope01JAN1100110575321100202692613.0





To Visit My Previous Online Streaming Flatform Dataset:Click Here
To Visit My Previous Statistical Evaluation Of Clinical Trials:Click Here
To Visit My Previous Unlocking Retail Insights Dataset:Click Here
To Visit My Previous Sas Interview Questions-1:Click Here  



Follow Us On : 


 


--->FOLLOW OUR BLOG FOR MORE INFORMATION.

--->PLEASE DO COMMENTS AND SHARE OUR BLOG.