Saturday, 22 November 2025

317.SPORTS PLAYER PERFORMANCE DATA CREATION AND ANALYSIS USING PROC SQL | PROC MEANS | PROC UNIVARIATE | PROC SGPLOT | PROC SGPANEL WITH MACRO-BASED WIN RATE AND DATE DERIVATIONS USING INTCK AND INTNX

SPORTS PLAYER PERFORMANCE DATA CREATION AND ANALYSIS USING PROC SQL | PROC MEANS | PROC UNIVARIATE | PROC SGPLOT | PROC SGPANEL WITH MACRO-BASED WIN RATE AND DATE DERIVATIONS USING INTCK AND INTNX

options nocenter;

STEP 1: CREATE A RAW SPORTS DATASET USING PROC SQL

proc sql;

  create table work.sports_raw as

  select 

      "P001"        as Player_ID length=5,

      "India"       as Country length=20,

      85            as Matches_Played,

      55            as Wins,

      3150          as Ranking_Points,

      "Grand Slam"  as Tournament_Type length=20,

      '15JAN2015'd  as Debut_Date,

      '20OCT2024'd  as Last_Played_Date

  from sashelp.class(obs=1)

  union all

  select 

      "P002", "Australia", 72, 48, 2980, "Grand Slam",

      '10MAR2016'd, '05SEP2024'd

  from sashelp.class(obs=1)

  union all

  select 

      "P003", "Spain", 102, 79, 4020, "Masters",

      '01MAY2013'd, '30NOV2024'd

  from sashelp.class(obs=1)

  union all

  select 

      "P004", "USA", 60, 36, 2500, "International",

      '25AUG2017'd, '18JUL2024'd

  from sashelp.class(obs=1)

  union all

  select 

      "P005", "France", 95, 63, 3300, "Masters",

      '12DEC2014'd, '09SEP2024'd

  from sashelp.class(obs=1)

  union all

  select 

      "P006", "Germany", 54, 29, 2100, "International",

      '03FEB2018'd, '14AUG2024'd

  from sashelp.class(obs=1)

  union all

  select 

      "P007", "Japan", 77, 50, 2900, "Grand Slam",

      '19JUL2016'd, '02NOV2024'd

  from sashelp.class(obs=1)

  union all

  select 

      "P008", "Brazil", 65, 38, 2400, "International",

      '08JAN2017'd, '28OCT2024'd

  from sashelp.class(obs=1)

  union all

  select 

      "P009", "UK", 110, 81, 4200, "Masters",

      '21APR2012'd, '15NOV2024'd

  from sashelp.class(obs=1)

  union all

  select 

      "P010", "Italy", 58, 31, 2050, "International",

      '07JUN2018'd, '01SEP2024'd

  from sashelp.class(obs=1)

  union all

  select 

      "P011", "Russia", 92, 60, 3200, "Grand Slam",

      '29OCT2014'd, '30AUG2024'd

  from sashelp.class(obs=1)

  union all

  select 

      "P012", "Argentina", 70, 44, 2700, "International",

      '11JAN2016'd, '22OCT2024'd

  from sashelp.class(obs=1)

  union all

  select 

      "P013", "Canada", 49, 27, 1950, "International",

      '05SEP2019'd, '05NOV2024'd

  from sashelp.class(obs=1)

  union all

  select 

      "P014", "China", 88, 62, 3400, "Masters",

      '17NOV2015'd, '10OCT2024'd

  from sashelp.class(obs=1)

  ;

quit;

/* Apply date formats */

data work.sports_raw;

  set work.sports_raw;

  format Debut_Date Last_Played_Date date9.;

run;

proc print data=work.sports_raw;

run;

OUTPUT:

ObsPlayer_IDCountryMatches_PlayedWinsRanking_PointsTournament_TypeDebut_DateLast_Played_Date
1P001India85553150Grand Slam15JAN201520OCT2024
2P002Australia72482980Grand Slam10MAR201605SEP2024
3P003Spain102794020Masters01MAY201330NOV2024
4P004USA60362500International25AUG201718JUL2024
5P005France95633300Masters12DEC201409SEP2024
6P006Germany54292100International03FEB201814AUG2024
7P007Japan77502900Grand Slam19JUL201602NOV2024
8P008Brazil65382400International08JAN201728OCT2024
9P009UK110814200Masters21APR201215NOV2024
10P010Italy58312050International07JUN201801SEP2024
11P011Russia92603200Grand Slam29OCT201430AUG2024
12P012Argentina70442700International11JAN201622OCT2024
13P013Canada49271950International05SEP201905NOV2024
14P014China88623400Masters17NOV201510OCT2024


STEP 2: DEFINE A MACRO TO DERIVE WIN RATE & DATE-BASED METRICS

%macro derive_winrate(indata=, outdata=);

data &outdata.;

    set &indata.;

    /* Basic derived metrics */

    if Matches_Played > 0 then Win_Rate = Wins / Matches_Played;

    else Win_Rate = .;

    Losses = Matches_Played - Wins;

    /* Career duration using INTCK (month level) */

    Career_Months = intck('month', Debut_Date, Last_Played_Date, 'c');

    Career_Years  = Career_Months / 12;

    /* Align debut to start of debut season using INTNX */

    Season_Start = intnx('year', Debut_Date, 0, 'b');  /* 'b' = beginning */

    format Win_Rate percent8.2

           Career_Years 8.1

           Season_Start date9.;

  run;

  proc print data=&outdata.;

  run;

%mend derive_winrate;


%derive_winrate(indata=work.sports_raw, outdata=work.sports_final);

OUTPUT:

ObsPlayer_IDCountryMatches_PlayedWinsRanking_PointsTournament_TypeDebut_DateLast_Played_DateWin_RateLossesCareer_MonthsCareer_YearsSeason_Start
1P001India85553150Grand Slam15JAN201520OCT202464.71%301179.801JAN2015
2P002Australia72482980Grand Slam10MAR201605SEP202466.67%241018.401JAN2016
3P003Spain102794020Masters01MAY201330NOV202477.45%2313811.501JAN2013
4P004USA60362500International25AUG201718JUL202460.00%24826.801JAN2017
5P005France95633300Masters12DEC201409SEP202466.32%321169.701JAN2014
6P006Germany54292100International03FEB201814AUG202453.70%25786.501JAN2018
7P007Japan77502900Grand Slam19JUL201602NOV202464.94%27998.301JAN2016
8P008Brazil65382400International08JAN201728OCT202458.46%27937.801JAN2017
9P009UK110814200Masters21APR201215NOV202473.64%2915012.501JAN2012
10P010Italy58312050International07JUN201801SEP202453.45%27746.201JAN2018
11P011Russia92603200Grand Slam29OCT201430AUG202465.22%321189.801JAN2014
12P012Argentina70442700International11JAN201622OCT202462.86%261058.801JAN2016
13P013Canada49271950International05SEP201905NOV202455.10%22625.201JAN2019
14P014China88623400Masters17NOV201510OCT202470.45%261068.801JAN2015


STEP 3: USE PROC SQL FOR SOME BASIC QUERIES & VIEWS

proc sql  outobs=5;

  /* Top 5 players by Win_Rate */

  create table work.top5_winrate as

  select Player_ID, Country, Tournament_Type,

         Matches_Played, Wins, Win_Rate, Ranking_Points

  from work.sports_final

  order by Win_Rate desc, Ranking_Points desc;

proc print data=work.top5_winrate;

run;

OUTPUT:

ObsPlayer_IDCountryTournament_TypeMatches_PlayedWinsWin_RateRanking_Points
1P003SpainMasters1027977.45%4020
2P009UKMasters1108173.64%4200
3P014ChinaMasters886270.45%3400
4P002AustraliaGrand Slam724866.67%2980
5P005FranceMasters956366.32%3300

  /* Average ranking points by Tournament_Type */

  create table work.avg_points_by_type as

  select Tournament_Type,

         count(*)      as Num_Players,

         avg(Ranking_Points) as Avg_Ranking_Points

  from work.sports_final

  group by Tournament_Type;

quit;

proc print data=work.avg_points_by_type;

run;

OUTPUT:

ObsTournament_TypeNum_PlayersAvg_Ranking_Points
1Grand Slam43057.50
2International62283.33
3Masters43730.00


STEP 4: SUMMARY STATISTICS USING PROC MEANS

proc means data=work.sports_final

           n mean std min max maxdec=2;

  class Tournament_Type;

  var Matches_Played Wins Win_Rate Ranking_Points Career_Years;

run;

OUTPUT:

The MEANS Procedure

Tournament_TypeN ObsVariableNMeanStd DevMinimumMaximum
Grand Slam4
Matches_Played
Wins
Win_Rate
Ranking_Points
Career_Years
4
4
4
4
4
81.50
53.25
0.65
3057.50
9.06
8.81
5.38
0.01
141.04
0.85
72.00
48.00
0.65
2900.00
8.25
92.00
60.00
0.67
3200.00
9.83
International6
Matches_Played
Wins
Win_Rate
Ranking_Points
Career_Years
6
6
6
6
6
59.33
34.17
0.57
2283.33
6.86
7.53
6.37
0.04
294.39
1.25
49.00
27.00
0.53
1950.00
5.17
70.00
44.00
0.63
2700.00
8.75
Masters4
Matches_Played
Wins
Win_Rate
Ranking_Points
Career_Years
4
4
4
4
4
98.75
71.25
0.72
3730.00
10.63
9.43
10.14
0.05
446.77
1.67
88.00
62.00
0.66
3300.00
8.83
110.00
81.00
0.77
4200.00
12.50

STEP 5: DISTRIBUTION ANALYSIS USING PROC UNIVARIATE

proc univariate data=work.sports_final;

  var Ranking_Points;

  histogram Ranking_Points / normal;

  inset n mean std min max / position=ne;

run;

OUTPUT:

The UNIVARIATE Procedure

Variable: Ranking_Points

Moments
N14Sum Weights14
Mean2917.85714Sum Observations40850
Std Deviation691.92239Variance478756.593
Skewness0.35762409Kurtosis-0.4989298
Uncorrected SS125418300Corrected SS6223835.71
Coeff Variation23.7133744Std Error Mean184.924037
Basic Statistical Measures
LocationVariability
Mean2917.857Std Deviation691.92239
Median2940.000Variance478757
Mode.Range2250
  Interquartile Range900.00000
Tests for Location: Mu0=0
TestStatisticp Value
Student's tt15.77868Pr > |t|<.0001
SignM7Pr >= |M|0.0001
Signed RankS52.5Pr >= |S|0.0001
Quantiles (Definition 5)
LevelQuantile
100% Max4200
99%4200
95%4200
90%4020
75% Q33300
50% Median2940
25% Q12400
10%2050
5%1950
1%1950
0% Min1950
Extreme Observations
LowestHighest
ValueObsValueObs
195013320011
20501033005
21006340014
2400840203
2500442009

The UNIVARIATE Procedure

Histogram for Ranking_Points

The UNIVARIATE Procedure

Fitted Normal Distribution for Ranking_Points

Parameters for Normal Distribution
ParameterSymbolEstimate
MeanMu2917.857
Std DevSigma691.9224
Goodness-of-Fit Tests for Normal Distribution
TestStatisticp Value
Kolmogorov-SmirnovD0.10010171Pr > D>0.150
Cramer-von MisesW-Sq0.02483572Pr > W-Sq>0.250
Anderson-DarlingA-Sq0.21887937Pr > A-Sq>0.250
Quantiles for Normal Distribution
PercentQuantile
ObservedEstimated
1.01950.001308.20
5.01950.001779.75
10.02050.002031.12
25.02400.002451.16
50.02940.002917.86
75.03300.003384.55
90.04020.003804.59
95.04200.004055.97
99.04200.004527.51

STEP 6: VISUALIZATION USING PROC SGPLOT

/* Bar chart of total wins by Country */

proc sgplot data=work.sports_final;

  vbar Country / response=Wins stat=sum;

  xaxis label="Country";

  yaxis label="Total Wins";

  title "Total Wins by Country";

run;

OUTPUT:

The SGPlot Procedure


/* Scatter plot of Win_Rate vs. Ranking_Points */

proc sgplot data=work.sports_final;

  scatter x=Win_Rate y=Ranking_Points;

  xaxis label="Win Rate";

  yaxis label="Ranking Points";

  title "Relationship Between Win Rate and Ranking Points";

run;

OUTPUT:

The SGPlot Procedure


STEP 7: BOXPLOTS BY TOURNAMENT TYPE USING PROC SGPANEL

proc sgpanel data=work.sports_final;

  panelby Tournament_Type / columns=2;

  vbox Win_Rate;

  colaxis label="Tournament Type";

  rowaxis label="Win Rate";

  title "Distribution of Win Rate by Tournament Type";

run;

OUTPUT:
The SGPanel Procedure
The SGPanel Procedure



To Visit My Previous Different Tablets Market Analysis Dataset:Click Here
To Visit My Previous Self Description Analysis Dataset:Click Here
To Visit My Previous Birds Of India Dataset:Click Here
To Visit My Previous Ganesh Mandaps In India Dataset:Click Here  



Follow Us On : 


 


--- FOLLOW OUR BLOG FOR MORE INFORMATION.

--->PLEASE DO COMMENTS AND SHARE OUR BLOG.




No comments:

Post a Comment