Tuesday, 11 November 2025

306.TENNIS PLAYERS PERFORMANCE STUDY | DATA STEP DERIVATIONS | PROC SQL SUMMARIES | PROC EXPAND TIME-SERIES ANALYSIS | PROC SGPLOT VISUALIZATION | MACRO AUTOMATION

TENNIS PLAYERS PERFORMANCE STUDY | DATA STEP DERIVATIONS | PROC SQL SUMMARIES | PROC EXPAND TIME-SERIES ANALYSIS | PROC SGPLOT VISUALIZATION | MACRO AUTOMATION

1) Create master dataset

options validvarname=any nocenter;

data work.tennis_players;

  infile datalines dlm=',' dsd truncover;

  length PlayerID $4 Name $30 Country $20 Hand $1;

  format DOB ddmmyy10.;

  input PlayerID :$4. Name :$30. Country :$20. DOB :ddmmyy10. Height_cm Hand :$1.;

datalines;

P001,"Rafael Nadal","Spain",03-06-1986,185,R

P002,"Novak Djokovic","Serbia",22-05-1987,188,R

P003,"Roger Federer","Switzerland",08-08-1981,185,R

P004,"Andy Murray","United Kingdom",15-05-1987,190,R

P005,"Serena Williams","USA",26-09-1981,175,R

P006,"Naomi Osaka","Japan",16-10-1997,180,R

P007,"Ashleigh Barty","Australia",24-04-1996,166,R

P008,"Simona Halep","Romania",27-09-1991,168,R

P009,"Dominic Thiem","Austria",03-09-1993,185,R

P010,"Alexander Zverev","Germany",20-04-1997,198,R

P011,"Iga Swiatek","Poland",31-05-2001,175,R

P012,"Carlos Alcaraz","Spain",05-05-2003,185,R

;

run;

proc print data=work.tennis_players noobs; 

run;

OUTPUT:

PlayerIDNameCountryHandDOBHeight_cm
P001Rafael NadalSpainR03/06/1986185
P002Novak DjokovicSerbiaR22/05/1987188
P003Roger FedererSwitzerlandR08/08/1981185
P004Andy MurrayUnited KingdomR15/05/1987190
P005Serena WilliamsUSAR26/09/1981175
P006Naomi OsakaJapanR16/10/1997180
P007Ashleigh BartyAustraliaR24/04/1996166
P008Simona HalepRomaniaR27/09/1991168
P009Dominic ThiemAustriaR03/09/1993185
P010Alexander ZverevGermanyR20/04/1997198
P011Iga SwiatekPolandR31/05/2001175
P012Carlos AlcarazSpainR05/05/2003185


2) DATA step derivations: Age (as of a fixed date), AgeGroup, Height_m

data work.tennis_players_deriv;

  set work.tennis_players;

  /* Reference date is explicit to make results reproducible */

  ref_date = '11NOV2025'd;


  /* Calculate integer age at ref_date accurately */

  years = intck('year', DOB, ref_date);

  /* Adjust if birthday hasn't occurred yet in reference year */

  if month(ref_date) < month(DOB) or (month(ref_date)=month(DOB) and 

  day(ref_date)<day(DOB)) then Age = years - 1;

  else Age = years;

  /* Height in meters */

  Height_m = round(Height_cm / 100, 0.01);

  /*  Age group */

  if Age < 20 then AgeGroup = 'Under20';

  else if 20 <= Age < 25 then AgeGroup = '20-24';

  else if 25 <= Age < 30 then AgeGroup = '25-29';

  else if 30 <= Age < 35 then AgeGroup = '30-34';

  else AgeGroup = '35plus';

  /* Drop helper variable years, keep ref_date for traceability */

  drop years;

run;

proc print data=work.tennis_players_deriv noobs; 

 var PlayerID Name Country DOB Age AgeGroup Height_cm Height_m Hand ref_date;

run;

OUTPUT:

PlayerIDNameCountryDOBAgeAgeGroupHeight_cmHeight_mHandref_date
P001Rafael NadalSpain03/06/19863935plus1851.85R24056
P002Novak DjokovicSerbia22/05/19873835plus1881.88R24056
P003Roger FedererSwitzerland08/08/19814435plus1851.85R24056
P004Andy MurrayUnited Kingdom15/05/19873835plus1901.90R24056
P005Serena WilliamsUSA26/09/19814435plus1751.75R24056
P006Naomi OsakaJapan16/10/19972825-291801.80R24056
P007Ashleigh BartyAustralia24/04/19962925-291661.66R24056
P008Simona HalepRomania27/09/19913430-341681.68R24056
P009Dominic ThiemAustria03/09/19933230-341851.85R24056
P010Alexander ZverevGermany20/04/19972825-291981.98R24056
P011Iga SwiatekPoland31/05/20012420-241751.75R24056
P012Carlos AlcarazSpain05/05/20032220-241851.85R24056


3) PROC SQL summaries: counts, averages, country-level rollups

proc sql;

  create table work.country_summary as

  select Country,

         count(*) as NumPlayers,

         mean(Age) as AvgAge format=6.2,

         min(Age) as Youngest,

         max(Age) as Oldest,

         mean(Height_cm) as AvgHeight_cm format=6.1

  from work.tennis_players_deriv

  group by Country

  order by NumPlayers desc;

quit;

proc print data=work.country_summary noobs;

run;

OUTPUT:

CountryNumPlayersAvgAgeYoungestOldestAvgHeight_cm
Spain230.502239185.0
Romania134.003434168.0
Germany128.002828198.0
Serbia138.003838188.0
USA144.004444175.0
Poland124.002424175.0
United Kingdom138.003838190.0
Australia129.002929166.0
Austria132.003232185.0
Japan128.002828180.0
Switzerland144.004444185.0


4) Create synthetic monthly ranking points time series for 3 players

/* Choose 3 players to demonstrate time-series: P002, P001, P012 */

data work.rankings;

  length PlayerID $4;

  format Month yymmn6.;


  /* Loop through player IDs */

  do PlayerID = 'P002','P001','P012';


    /* Assign a numeric ID for each player */

    select (PlayerID);

      when ('P002') player_num = 1;

      when ('P001') player_num = 2;

      when ('P012') player_num = 3;

      otherwise player_num = 0;

    end;


    /* Generate 12 months of data per player */

    do m = 0 to 11;

      Month = intnx('month', '01JAN2025'd, m, 'begin');


      /* Deterministic seed for reproducible randomness */

      seed = 12345 + player_num*1000 + m;

      call streaminit(seed);


      /* Different base points per player */

      base = 8000 + player_num*250;  


      /* Monthly seasonal and random variation */

      seasonal = 200 * sin(2*3.14159*m/12) + 50*mod(m,3);

      noise = round(rand('NORMAL', 0, 150), 1);


      /* Final player points */

      Points = round(base + seasonal + noise, 1);

      output;

    end;

  end;


  /* Drop intermediate variables */

  drop m seed base seasonal noise player_num;

run;


proc print data=work.rankings(obs=12) noobs;

run;

OUTPUT:

PlayerIDMonthPoints
P0022025018281
P0022025028363
P0022025038752
P0022025048285
P0022025058585
P0022025068330
P0022025078127
P0022025088151
P0022025098147
P0022025107846
P0022025117933
P0022025128413


5) PROC EXPAND: compute 3-month and 6-month moving averages (SAS/ETS required)

proc sort data=work.rankings;

 by PlayerID Month;

run;

proc print data=work.rankings;

run;

OUTPUT:

ObsPlayerIDMonthPoints
1P0012025018586
2P0012025028507
3P0012025038515
4P0012025048618
5P0012025058561
6P0012025068671
7P0012025078601
8P0012025088215
9P0012025098357
10P0012025108174
11P0012025118417
12P0012025128366
13P0022025018281
14P0022025028363
15P0022025038752
16P0022025048285
17P0022025058585
18P0022025068330
19P0022025078127
20P0022025088151
21P0022025098147
22P0022025107846
23P0022025117933
24P0022025128413
25P0122025018512
26P0122025028507
27P0122025039154
28P0122025048888
29P0122025058746
30P0122025069161
31P0122025078466
32P0122025088588
33P0122025098917
34P0122025108585
35P0122025118682
36P0122025128861

proc expand data=work.rankings out=work.rankings_ma method=none;

  by PlayerID;

  id Month;

  convert Points = MA_3 / transformout=(movave 3);

  convert Points = MA_6 / transformout=(movave 6);

run;

proc print data=work.rankings_ma(obs=18) noobs;

run;

OUTPUT:

PlayerIDMonthMA_3MA_6Points
P0012025018586.008586.008586
P0012025028546.508546.508507
P0012025038536.008536.008515
P0012025048546.678556.508618
P0012025058564.678557.408561
P0012025068616.678576.338671
P0012025078611.008578.838601
P0012025088495.678530.178215
P0012025098391.008503.838357
P0012025108248.678429.838174
P0012025118316.008405.838417
P0012025128319.008355.008366
P0022025018281.008281.008281
P0022025028322.008322.008363
P0022025038465.338465.338752
P0022025048466.678420.258285
P0022025058540.678453.208585
P0022025068400.008432.678330


6) Plot time series and moving averages with PROC SGPLOT

proc sgpanel data=work.rankings_ma;

    panelby PlayerID / layout=panel onepanel;

    series x=Month y=Points / markers;

    series x=Month y=MA_3 / lineattrs=(pattern=2) legendlabel='3-Month MA';

    series x=Month y=MA_6 / lineattrs=(pattern=3) legendlabel='6-Month MA';

    rowaxis label="Ranking Points";

    colaxis label="Month";

    title "Ranking Points and Moving Averages by Player";

run;

OUTPUT:

The SGPanel Procedure


7) Macro examples: 

- %derive_core(ds=, out=) : run same derivations on any dataset with DOB, Height_cm

- %player_movingavg(pid=, out=): generate moving average for a single player using PROC EXPAND


%macro derive_core(ds=work.tennis_players, out=);

  data &out.;

    set &ds.;

    ref_date='11NOV2025'd;

    years = intck('year', DOB, ref_date);

    if month(ref_date) < month(DOB) or (month(ref_date)=month(DOB) and day(ref_date)<day(DOB)) 

    then Age = years - 1;

    else Age = years;

    Height_m = round(Height_cm/100,0.01);

    if Age < 20 then AgeGroup='Under20';

    else if 20 <= Age < 25 then AgeGroup='20-24';

    else if 25 <= Age < 30 then AgeGroup='25-29';

    else if 30 <= Age < 35 then AgeGroup='30-34';

    else AgeGroup='35plus';

    drop years;

  run;

 proc print;run;

%mend derive_core;


%derive_core(ds=work.tennis_players, out=work.tennis_players_macro);

OUTPUT:

ObsPlayerIDNameCountryHandDOBHeight_cmref_dateAgeHeight_mAgeGroup
1P001Rafael NadalSpainR03/06/198618524056391.8535plus
2P002Novak DjokovicSerbiaR22/05/198718824056381.8835plus
3P003Roger FedererSwitzerlandR08/08/198118524056441.8535plus
4P004Andy MurrayUnited KingdomR15/05/198719024056381.9035plus
5P005Serena WilliamsUSAR26/09/198117524056441.7535plus
6P006Naomi OsakaJapanR16/10/199718024056281.8025-29
7P007Ashleigh BartyAustraliaR24/04/199616624056291.6625-29
8P008Simona HalepRomaniaR27/09/199116824056341.6830-34
9P009Dominic ThiemAustriaR03/09/199318524056321.8530-34
10P010Alexander ZverevGermanyR20/04/199719824056281.9825-29
11P011Iga SwiatekPolandR31/05/200117524056241.7520-24
12P012Carlos AlcarazSpainR05/05/200318524056221.8520-24


/* Macro to compute moving averages for a single player ID (convenience wrapper) */

%macro player_movingavg(pid=P002, out=work.pid_ma);

  data _tmp;

    set work.rankings;

    where PlayerID="&pid.";

  run;

  proc print;run;

  proc sort data=_tmp; 

  by Month; 

  run;

  proc expand data=_tmp out=&out. method=none;

    id Month;

    convert Points = MA_3 / transformout=(movave 3);

    convert Points = MA_6 / transformout=(movave 6);

  run;

%mend player_movingavg;


%player_movingavg(pid=P001, out=work.P001_ma);

OUTPUT:

ObsPlayerIDMonthPoints
1P0012025018586
2P0012025028507
3P0012025038515
4P0012025048618
5P0012025058561
6P0012025068671
7P0012025078601
8P0012025088215
9P0012025098357
10P0012025108174
11P0012025118417
12P0012025128366

proc print data=work.P001_ma noobs; 

run;

OUTPUT:

MonthMA_3MA_6PlayerIDPoints
2025018586.008586.00P0018586
2025028546.508546.50P0018507
2025038536.008536.00P0018515
2025048546.678556.50P0018618
2025058564.678557.40P0018561
2025068616.678576.33P0018671
2025078611.008578.83P0018601
2025088495.678530.17P0018215
2025098391.008503.83P0018357
2025108248.678429.83P0018174
2025118316.008405.83P0018417
2025128319.008355.00P0018366



To Visit My Previous Online Streaming Flatform Dataset:Click Here
To Visit My Previous Statistical Evaluation Of Clinical Trials:Click Here
To Visit My Previous Unlocking Retail Insights Dataset:Click Here
To Visit My Previous Sas Interview Questions-1:Click Here


Follow Us On : 

 


--- FOLLOW OUR BLOG FOR MORE INFORMATION.

--->PLEASE DO COMMENTS AND SHARE OUR BLOG.

No comments:

Post a Comment