356.Can SAS analytics reveal which programming languages are popular but surprisingly difficult to master?

Can SAS analytics reveal which programming languages are popular but surprisingly difficult to master?

options nocenter;

1.LANGUAGES MASTER DATASET CREATION

data languages_master;

    length Language_Name $15 Region $15 Introduced_Date $9;

    format Review_Year 4.;

    input Language_Name $ Speakers Region $ Difficulty_Level Countries_Count

        Introduced_Year;

    /* Store historical date safely as character */

    Introduced_Date = cats('01JAN', Introduced_Year);

    /* Review year after 5 years */

    Review_Year = Introduced_Year + 5;

datalines;

English 1500 Global 2 60 1600

Mandarin 1100 Asia 5 3 1200

Hindi 600 Asia 3 4 1500

Spanish 550 Europe 2 21 1400

French 300 Europe 3 29 1300

Arabic 310 MiddleEast 4 25 700

Russian 260 Europe 4 4 900

Portuguese 260 Europe 2 10 1500

Bengali 230 Asia 3 2 1200

Japanese 125 Asia 5 1 800

German 90 Europe 3 6 1100

Korean 80 Asia 4 2 900

Italian 65 Europe 2 4 1400

Turkish 75 Europe 3 2 1100

;

run;

proc print data=languages_master;

run;

OUTPUT:

ObsLanguage_NameRegionIntroduced_DateReview_YearSpeakersDifficulty_LevelCountries_CountIntroduced_Year
1EnglishGlobal01JAN1600160515002601600
2MandarinAsia01JAN120012051100531200
3HindiAsia01JAN15001505600341500
4SpanishEurope01JAN140014055502211400
5FrenchEurope01JAN130013053003291300
6ArabicMiddleEast01JAN700705310425700
7RussianEurope01JAN90090526044900
8PortugueseEurope01JAN150015052602101500
9BengaliAsia01JAN12001205230321200
10JapaneseAsia01JAN80080512551800
11GermanEurope01JAN1100110590361100
12KoreanAsia01JAN9009058042900
13ItalianEurope01JAN1400140565241400
14TurkishEurope01JAN1100110575321100


2.DATES PRACTICE

data languages_dates;

    set languages_master;

    /* Reference year instead of SAS date */

    Reference_Year = 2026;

    /* Correct calculation using years */

    Years_Since_Intro = Reference_Year - Introduced_Year;

run;

proc print data=languages_dates;

run;

OUTPUT:

ObsLanguage_NameRegionIntroduced_DateReview_YearSpeakersDifficulty_LevelCountries_CountIntroduced_YearReference_YearYears_Since_Intro
1EnglishGlobal01JAN16001605150026016002026426
2MandarinAsia01JAN1200120511005312002026826
3HindiAsia01JAN150015056003415002026526
4SpanishEurope01JAN1400140555022114002026626
5FrenchEurope01JAN1300130530032913002026726
6ArabicMiddleEast01JAN70070531042570020261326
7RussianEurope01JAN9009052604490020261126
8PortugueseEurope01JAN1500150526021015002026526
9BengaliAsia01JAN120012052303212002026826
10JapaneseAsia01JAN8008051255180020261226
11GermanEurope01JAN11001105903611002026926
12KoreanAsia01JAN900905804290020261126
13ItalianEurope01JAN14001405652414002026626
14TurkishEurope01JAN11001105753211002026926


3.PROC SQL – DATA QUERY & AGGREGATION

proc sql;

    create table language_summary as

    select Region,

           count(Language_Name) as Total_Languages,

           mean(Speakers) as Avg_Speakers

    from languages_dates

    group by Region;

quit;

proc print data=language_summary;

run;

OUTPUT:

ObsRegionTotal_LanguagesAvg_Speakers
1Asia5427.00
2Europe7228.57
3Global11500.00
4MiddleEast1310.00


4.PROC MEANS – DESCRIPTIVE STATISTICS

proc means data=languages_dates mean min max;

    var Speakers Countries_Count Difficulty_Level;

run;

OUTPUT:

The MEANS Procedure

VariableMeanMinimumMaximum
Speakers
Countries_Count
Difficulty_Level
396.0714286
12.3571429
3.2142857
65.0000000
1.0000000
2.0000000
1500.00
60.0000000
5.0000000

5.PROC RANK – LANGUAGE POPULARITY RANKING

proc rank data=languages_dates out=language_rank descending;

    var Speakers;

    ranks Popularity_Rank;

run;

proc print data=language_rank;

run;

OUTPUT:

ObsLanguage_NameRegionIntroduced_DateReview_YearSpeakersDifficulty_LevelCountries_CountIntroduced_YearReference_YearYears_Since_IntroPopularity_Rank
1EnglishGlobal01JAN160016051500260160020264261.0
2MandarinAsia01JAN12001205110053120020268262.0
3HindiAsia01JAN1500150560034150020265263.0
4SpanishEurope01JAN14001405550221140020266264.0
5FrenchEurope01JAN13001305300329130020267266.0
6ArabicMiddleEast01JAN700705310425700202613265.0
7RussianEurope01JAN90090526044900202611267.5
8PortugueseEurope01JAN15001505260210150020265267.5
9BengaliAsia01JAN1200120523032120020268269.0
10JapaneseAsia01JAN800805125518002026122610.0
11GermanEurope01JAN1100110590361100202692611.0
12KoreanAsia01JAN90090580429002026112612.0
13ItalianEurope01JAN1400140565241400202662614.0
14TurkishEurope01JAN1100110575321100202692613.0


6.MACRO – AUTOMATED DIFFICULTY CLASSIFICATION

%macro difficulty_class;

data language_class;

    set language_rank;

    length Difficulty_Group $12;

    if Difficulty_Level <=2 then Difficulty_Group='Easy';

    else if Difficulty_Level=3 then Difficulty_Group='Medium';

    else Difficulty_Group='Hard';

run;

proc print data=language_class;

run;

%mend;


%difficulty_class;

OUTPUT:

ObsLanguage_NameRegionIntroduced_DateReview_YearSpeakersDifficulty_LevelCountries_CountIntroduced_YearReference_YearYears_Since_IntroPopularity_RankDifficulty_Group
1EnglishGlobal01JAN160016051500260160020264261.0Easy
2MandarinAsia01JAN12001205110053120020268262.0Hard
3HindiAsia01JAN1500150560034150020265263.0Medium
4SpanishEurope01JAN14001405550221140020266264.0Easy
5FrenchEurope01JAN13001305300329130020267266.0Medium
6ArabicMiddleEast01JAN700705310425700202613265.0Hard
7RussianEurope01JAN90090526044900202611267.5Hard
8PortugueseEurope01JAN15001505260210150020265267.5Easy
9BengaliAsia01JAN1200120523032120020268269.0Medium
10JapaneseAsia01JAN800805125518002026122610.0Hard
11GermanEurope01JAN1100110590361100202692611.0Medium
12KoreanAsia01JAN90090580429002026112612.0Hard
13ItalianEurope01JAN1400140565241400202662614.0Easy
14TurkishEurope01JAN1100110575321100202692613.0Medium


7.PROC UNIVARIATE – DISTRIBUTION ANALYSIS

proc univariate data=language_class;

    var Speakers;

    histogram Speakers;

run;

OUTPUT:

The UNIVARIATE Procedure

Variable: Speakers

Moments
N14Sum Weights14
Mean396.071429Sum Observations5545
Std Deviation424.305671Variance180035.302
Skewness1.81572518Kurtosis2.88471787
Uncorrected SS4536675Corrected SS2340458.93
Coeff Variation107.128573Std Error Mean113.400461
Basic Statistical Measures
LocationVariability
Mean396.0714Std Deviation424.30567
Median260.0000Variance180035
Mode260.0000Range1435
  Interquartile Range460.00000
Tests for Location: Mu0=0
TestStatisticp Value
Student's tt3.492679Pr > |t|0.0040
SignM7Pr >= |M|0.0001
Signed RankS52.5Pr >= |S|0.0001
Quantiles (Definition 5)
LevelQuantile
100% Max1500
99%1500
95%1500
90%1100
75% Q3550
50% Median260
25% Q190
10%75
5%65
1%65
0% Min65
Extreme Observations
LowestHighest
ValueObsValueObs
65133106
75145504
80126003
901111002
1251015001

The UNIVARIATE Procedure

Histogram for Speakers

8.PROC APPEND – ADD NEW LANGUAGE RECORDS

data new_languages;

    length Language_Name $15 Region $15 Introduced_Date $9;

    format Introduced_Year 4.;

    input Language_Name $ Speakers Region $ Difficulty_Level Countries_Count

        Introduced_Date $;

    /* Extract year safely */

    Introduced_Year = input(substr(Introduced_Date, 6, 4), 4.);

     Review_Year = Introduced_Year + 5;

datalines;

Swahili 80 Africa 3 5 01JAN1500

Thai    70 Asia   4 1 01JAN1300

;

run;

proc print data=new_languages;

run;

OUTPUT:

ObsLanguage_NameRegionIntroduced_DateIntroduced_YearSpeakersDifficulty_LevelCountries_CountReview_Year
1SwahiliAfrica01JAN1500150080351505
2ThaiAsia01JAN1300130070411305


proc append base=languages_master 

            data=new_languages;

run;

proc print data=new_languages;

run;

OUTPUT:

ObsLanguage_NameRegionIntroduced_DateIntroduced_YearSpeakersDifficulty_LevelCountries_CountReview_Year
1SwahiliAfrica01JAN1500150080351505
2ThaiAsia01JAN1300130070411305


9.PROC TRANSPOSE – DATA RESTRUCTURING

proc transpose data=languages_master out=transposed_languages;

    by Region NotSorted;

    var Speakers Countries_Count;

run;

proc print data=transposed_languages;

run;

OUTPUT:

ObsRegion_NAME_COL1COL2
1GlobalSpeakers1500.
2GlobalCountries_Count60.
3AsiaSpeakers1100600
4AsiaCountries_Count34
5EuropeSpeakers550300
6EuropeCountries_Count2129
7MiddleEastSpeakers310.
8MiddleEastCountries_Count25.
9EuropeSpeakers260260
10EuropeCountries_Count410
11AsiaSpeakers230125
12AsiaCountries_Count21
13EuropeSpeakers90.
14EuropeCountries_Count6.
15AsiaSpeakers80.
16AsiaCountries_Count2.
17EuropeSpeakers6575
18EuropeCountries_Count42
19AfricaSpeakers80.
20AfricaCountries_Count5.
21AsiaSpeakers70.
22AsiaCountries_Count1.
23AfricaSpeakers80.
24AfricaCountries_Count5.
25AsiaSpeakers70.
26AsiaCountries_Count1.
27AfricaSpeakers80.
28AfricaCountries_Count5.
29AsiaSpeakers70.
30AsiaCountries_Count1.


10.PROC MERGE – COMBINING DATASETS

proc sort data=languages_master; by Language_Name; run;

proc print data=languages_master;

run;

OUTPUT:

ObsLanguage_NameRegionIntroduced_DateReview_YearSpeakersDifficulty_LevelCountries_CountIntroduced_Year
1ArabicMiddleEast01JAN700705310425700
2BengaliAsia01JAN12001205230321200
3EnglishGlobal01JAN1600160515002601600
4FrenchEurope01JAN130013053003291300
5GermanEurope01JAN1100110590361100
6HindiAsia01JAN15001505600341500
7ItalianEurope01JAN1400140565241400
8JapaneseAsia01JAN80080512551800
9KoreanAsia01JAN9009058042900
10MandarinAsia01JAN120012051100531200
11PortugueseEurope01JAN150015052602101500
12RussianEurope01JAN90090526044900
13SpanishEurope01JAN140014055502211400
14SwahiliAfrica01JAN1500.80351500
15SwahiliAfrica01JAN1500.80351500
16SwahiliAfrica01JAN1500150580351500
17ThaiAsia01JAN1300.70411300
18ThaiAsia01JAN1300.70411300
19ThaiAsia01JAN1300130570411300
20TurkishEurope01JAN1100110575321100


proc sort data=language_rank; by Language_Name; run;

proc print data=language_rank;

run;

OUTPUT:

ObsLanguage_NameRegionIntroduced_DateReview_YearSpeakersDifficulty_LevelCountries_CountIntroduced_YearReference_YearYears_Since_IntroPopularity_Rank
1ArabicMiddleEast01JAN700705310425700202613265.0
2BengaliAsia01JAN1200120523032120020268269.0
3EnglishGlobal01JAN160016051500260160020264261.0
4FrenchEurope01JAN13001305300329130020267266.0
5GermanEurope01JAN1100110590361100202692611.0
6HindiAsia01JAN1500150560034150020265263.0
7ItalianEurope01JAN1400140565241400202662614.0
8JapaneseAsia01JAN800805125518002026122610.0
9KoreanAsia01JAN90090580429002026112612.0
10MandarinAsia01JAN12001205110053120020268262.0
11PortugueseEurope01JAN15001505260210150020265267.5
12RussianEurope01JAN90090526044900202611267.5
13SpanishEurope01JAN14001405550221140020266264.0
14TurkishEurope01JAN1100110575321100202692613.0


data language_final;

    merge languages_master(in=a)

          language_rank(in=b);

    by Language_Name;

    if a;

run;

proc print data=language_final;

run;

OUTPUT:

ObsLanguage_NameRegionIntroduced_DateReview_YearSpeakersDifficulty_LevelCountries_CountIntroduced_YearReference_YearYears_Since_IntroPopularity_Rank
1ArabicMiddleEast01JAN700705310425700202613265.0
2BengaliAsia01JAN1200120523032120020268269.0
3EnglishGlobal01JAN160016051500260160020264261.0
4FrenchEurope01JAN13001305300329130020267266.0
5GermanEurope01JAN1100110590361100202692611.0
6HindiAsia01JAN1500150560034150020265263.0
7ItalianEurope01JAN1400140565241400202662614.0
8JapaneseAsia01JAN800805125518002026122610.0
9KoreanAsia01JAN90090580429002026112612.0
10MandarinAsia01JAN12001205110053120020268262.0
11PortugueseEurope01JAN15001505260210150020265267.5
12RussianEurope01JAN90090526044900202611267.5
13SpanishEurope01JAN14001405550221140020266264.0
14SwahiliAfrica01JAN1500.80351500...
15SwahiliAfrica01JAN1500.80351500...
16SwahiliAfrica01JAN1500150580351500...
17ThaiAsia01JAN1300.70411300...
18ThaiAsia01JAN1300.70411300...
19ThaiAsia01JAN1300130570411300...
20TurkishEurope01JAN1100110575321100202692613.0





To Visit My Previous Online Streaming Flatform Dataset:Click Here
To Visit My Previous Statistical Evaluation Of Clinical Trials:Click Here
To Visit My Previous Unlocking Retail Insights Dataset:Click Here
To Visit My Previous Sas Interview Questions-1:Click Here  



Follow Us On : 


 


--->FOLLOW OUR BLOG FOR MORE INFORMATION.

--->PLEASE DO COMMENTS AND SHARE OUR BLOG.



Comments

Popular posts from this blog

412.Can We Build And Clean A University Course Analytics & Fraud Detection System In Sas Using Only Macros While Intentionally Creating And Fixing Errors?

420.Can We Detect Errors, Prevent Fraud, And Optimize Biometric Access System Security Using Advanced SAS Programming?

418.Can We Design, Debug, Detect Fraud, And Optimize A Smart Parking System Using Advanced SAS Programming Techniques?