FITNESS DATASET CREATION AND ANALYSIS USING PROC SQL | PROC MEANS | PROC UNIVARIATE | PROC SGPLOT | PROC FREQ | PROC FORMAT | PROC RANK | PROC REPORT | PROC CORR (WITH MACROS AND DATE FUNCTIONS LIKE INTCK | INTNX)
options nocenter;
1. Create raw table with explicit dates and sample values
data work.fitness_raw;
format Baseline_Date Measurement_Date Next_Check_Date date9.;
/* Next_Check_Date placeholder - will derive later */
infile datalines dsd dlm=',';
input Person_ID :$8. Steps_Per_Day :comma32. Calories_Burned :comma32. BMI :8.2
Heart_Rate :8. Baseline_Date :date9. Measurement_Date :date9.;
datalines;
P001, 12000, 3200, 22.5, 62, 01JAN2024, 01JUL2025
P002, 3500, 2100, 29.8, 82, 01FEB2024, 10JUL2025
P003, 8000, 2500, 24.3, 72, 15MAR2024, 25JUN2025
P004, 4000, 2300, 27.1, 78, 01APR2024, 05JUL2025
P005, 15000, 3400, 21.0, 58, 20MAY2024, 20JUN2025
P006, 6000, 2400, 26.5, 75, 10JUN2024, 30JUN2025
P007, 10000, 3000, 23.4, 66, 01JUL2024, 01JUL2025
P008, 2000, 1900, 31.2, 88, 15JUL2024, 15JUL2025
P009, 9000, 2800, 25.0, 70, 01AUG2024, 10JUL2025
P010, 11000, 3100, 22.0, 64, 05SEP2024, 05JUL2025
P011, 4500, 2250, 28.4, 79, 10OCT2024, 10JUL2025
P012, 7000, 2600, 24.9, 71, 20NOV2024, 20JUL2025
;
run;
proc print data=work.fitness_raw ;
title "Raw fitness_raw dataset - initial check";
run;
OUTPUT:
| Obs | Baseline_Date | Measurement_Date | Next_Check_Date | Person_ID | Steps_Per_Day | Calories_Burned | BMI | Heart_Rate |
|---|---|---|---|---|---|---|---|---|
| 1 | 01JAN2024 | 01JUL2025 | . | P001 | 12000 | 3200 | 22.5 | 62 |
| 2 | 01FEB2024 | 10JUL2025 | . | P002 | 3500 | 2100 | 29.8 | 82 |
| 3 | 15MAR2024 | 25JUN2025 | . | P003 | 8000 | 2500 | 24.3 | 72 |
| 4 | 01APR2024 | 05JUL2025 | . | P004 | 4000 | 2300 | 27.1 | 78 |
| 5 | 20MAY2024 | 20JUN2025 | . | P005 | 15000 | 3400 | 21.0 | 58 |
| 6 | 10JUN2024 | 30JUN2025 | . | P006 | 6000 | 2400 | 26.5 | 75 |
| 7 | 01JUL2024 | 01JUL2025 | . | P007 | 10000 | 3000 | 23.4 | 66 |
| 8 | 15JUL2024 | 15JUL2025 | . | P008 | 2000 | 1900 | 31.2 | 88 |
| 9 | 01AUG2024 | 10JUL2025 | . | P009 | 9000 | 2800 | 25.0 | 70 |
| 10 | 05SEP2024 | 05JUL2025 | . | P010 | 11000 | 3100 | 22.0 | 64 |
| 11 | 10OCT2024 | 10JUL2025 | . | P011 | 4500 | 2250 | 28.4 | 79 |
| 12 | 20NOV2024 | 20JUL2025 | . | P012 | 7000 | 2600 | 24.9 | 71 |
2. Derive additional date-based and health variables using DATA step
data work.fitness;
set work.fitness_raw;
/* Compute days between Baseline and Measurement (inclusive/exclusive choice: exclusive) */
Duration_Days = intck('day', Baseline_Date, Measurement_Date);
/* Compute Next_Check_Date as 3 months after Measurement (INTNX with 'same' alignment) */
Next_Check_Date = intnx('month', Measurement_Date, 3, 'same');
/* Step score: cap at 20000 for scaling */
Steps_Capped = min(Steps_Per_Day,20000);
Step_Score = (Steps_Capped / 10000) * 30; /* 0-60 scaled to 30 baseline */
/* BMI score: best at 21.75-24.99 -> use gaussian-ish penalty (simple) */
if BMI <= 18.5 then BMI_Score = 10;
else if 18.5 < BMI <= 24.99 then BMI_Score = 30;
else if 25 <= BMI <= 29.99 then BMI_Score = 20;
else BMI_Score = 10; /* obese range */
/* Heart rate score: lower is better (but not extremely low) */
if 50 <= Heart_Rate <= 70 then HR_Score = 30;
else if 71 <= Heart_Rate <= 80 then HR_Score = 20;
else if Heart_Rate < 50 then HR_Score = 15;
else HR_Score = 10;
/* Calories score: approximate expected burn (higher with more activity) */
Cal_Score = min((Calories_Burned / 3000) * 20, 20);
/* Composite score out of 100 */
Composite_Score = Step_Score + BMI_Score + HR_Score + Cal_Score;
/* Keep composite in 0-100 boundary */
if Composite_Score > 100 then Composite_Score = 100;
drop Steps_Capped;
run;
proc print data=work.fitness;
run;
OUTPUT:
| Obs | Baseline_Date | Measurement_Date | Next_Check_Date | Person_ID | Steps_Per_Day | Calories_Burned | BMI | Heart_Rate | Duration_Days | Step_Score | BMI_Score | HR_Score | Cal_Score | Composite_Score |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 01JAN2024 | 01JUL2025 | 01OCT2025 | P001 | 12000 | 3200 | 22.5 | 62 | 547 | 36.0 | 30 | 30 | 20.0000 | 100.000 |
| 2 | 01FEB2024 | 10JUL2025 | 10OCT2025 | P002 | 3500 | 2100 | 29.8 | 82 | 525 | 10.5 | 20 | 10 | 14.0000 | 54.500 |
| 3 | 15MAR2024 | 25JUN2025 | 25SEP2025 | P003 | 8000 | 2500 | 24.3 | 72 | 467 | 24.0 | 30 | 20 | 16.6667 | 90.667 |
| 4 | 01APR2024 | 05JUL2025 | 05OCT2025 | P004 | 4000 | 2300 | 27.1 | 78 | 460 | 12.0 | 20 | 20 | 15.3333 | 67.333 |
| 5 | 20MAY2024 | 20JUN2025 | 20SEP2025 | P005 | 15000 | 3400 | 21.0 | 58 | 396 | 45.0 | 30 | 30 | 20.0000 | 100.000 |
| 6 | 10JUN2024 | 30JUN2025 | 30SEP2025 | P006 | 6000 | 2400 | 26.5 | 75 | 385 | 18.0 | 20 | 20 | 16.0000 | 74.000 |
| 7 | 01JUL2024 | 01JUL2025 | 01OCT2025 | P007 | 10000 | 3000 | 23.4 | 66 | 365 | 30.0 | 30 | 30 | 20.0000 | 100.000 |
| 8 | 15JUL2024 | 15JUL2025 | 15OCT2025 | P008 | 2000 | 1900 | 31.2 | 88 | 365 | 6.0 | 10 | 10 | 12.6667 | 38.667 |
| 9 | 01AUG2024 | 10JUL2025 | 10OCT2025 | P009 | 9000 | 2800 | 25.0 | 70 | 343 | 27.0 | 20 | 30 | 18.6667 | 95.667 |
| 10 | 05SEP2024 | 05JUL2025 | 05OCT2025 | P010 | 11000 | 3100 | 22.0 | 64 | 303 | 33.0 | 30 | 30 | 20.0000 | 100.000 |
| 11 | 10OCT2024 | 10JUL2025 | 10OCT2025 | P011 | 4500 | 2250 | 28.4 | 79 | 273 | 13.5 | 20 | 20 | 15.0000 | 68.500 |
| 12 | 20NOV2024 | 20JUL2025 | 20OCT2025 | P012 | 7000 | 2600 | 24.9 | 71 | 242 | 21.0 | 30 | 20 | 17.3333 | 88.333 |
3. Macro for automatic classification based on Composite_Score and BMI thresholds
%macro classify_health(in=, out=);
/* Creates a new dataset with Fitness_Level variable assigned */
data &out.;
set &in.;
length Fitness_Level $12.;
if Composite_Score >= 80 then Fitness_Level = 'Excellent';
else if Composite_Score >= 60 then Fitness_Level = 'Good';
else if Composite_Score >= 40 then Fitness_Level = 'Average';
else Fitness_Level = 'Poor';
/* Make a short textual note for borderline cases (example of derived variable) */
if (BMI >= 25 and Composite_Score >= 60) then Fitness_Comment = 'High BMI but good activity';
else if (BMI < 18.5 and Composite_Score < 40) then Fitness_Comment = 'Underweight & low score';
else Fitness_Comment = 'No major note';
format Baseline_Date Measurement_Date Next_Check_Date date9.;
run;
proc print data=&out. ;
run;
%mend classify_health;
%classify_health(in=work.fitness, out=work.fitness_final);
OUTPUT:
| Obs | Baseline_Date | Measurement_Date | Next_Check_Date | Person_ID | Steps_Per_Day | Calories_Burned | BMI | Heart_Rate | Duration_Days | Step_Score | BMI_Score | HR_Score | Cal_Score | Composite_Score | Fitness_Level | Fitness_Comment |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 01JAN2024 | 01JUL2025 | 01OCT2025 | P001 | 12000 | 3200 | 22.5 | 62 | 547 | 36.0 | 30 | 30 | 20.0000 | 100.000 | Excellent | No major note |
| 2 | 01FEB2024 | 10JUL2025 | 10OCT2025 | P002 | 3500 | 2100 | 29.8 | 82 | 525 | 10.5 | 20 | 10 | 14.0000 | 54.500 | Average | No major note |
| 3 | 15MAR2024 | 25JUN2025 | 25SEP2025 | P003 | 8000 | 2500 | 24.3 | 72 | 467 | 24.0 | 30 | 20 | 16.6667 | 90.667 | Excellent | No major note |
| 4 | 01APR2024 | 05JUL2025 | 05OCT2025 | P004 | 4000 | 2300 | 27.1 | 78 | 460 | 12.0 | 20 | 20 | 15.3333 | 67.333 | Good | High BMI but good activity |
| 5 | 20MAY2024 | 20JUN2025 | 20SEP2025 | P005 | 15000 | 3400 | 21.0 | 58 | 396 | 45.0 | 30 | 30 | 20.0000 | 100.000 | Excellent | No major note |
| 6 | 10JUN2024 | 30JUN2025 | 30SEP2025 | P006 | 6000 | 2400 | 26.5 | 75 | 385 | 18.0 | 20 | 20 | 16.0000 | 74.000 | Good | High BMI but good activity |
| 7 | 01JUL2024 | 01JUL2025 | 01OCT2025 | P007 | 10000 | 3000 | 23.4 | 66 | 365 | 30.0 | 30 | 30 | 20.0000 | 100.000 | Excellent | No major note |
| 8 | 15JUL2024 | 15JUL2025 | 15OCT2025 | P008 | 2000 | 1900 | 31.2 | 88 | 365 | 6.0 | 10 | 10 | 12.6667 | 38.667 | Poor | No major note |
| 9 | 01AUG2024 | 10JUL2025 | 10OCT2025 | P009 | 9000 | 2800 | 25.0 | 70 | 343 | 27.0 | 20 | 30 | 18.6667 | 95.667 | Excellent | High BMI but good activity |
| 10 | 05SEP2024 | 05JUL2025 | 05OCT2025 | P010 | 11000 | 3100 | 22.0 | 64 | 303 | 33.0 | 30 | 30 | 20.0000 | 100.000 | Excellent | No major note |
| 11 | 10OCT2024 | 10JUL2025 | 10OCT2025 | P011 | 4500 | 2250 | 28.4 | 79 | 273 | 13.5 | 20 | 20 | 15.0000 | 68.500 | Good | High BMI but good activity |
| 12 | 20NOV2024 | 20JUL2025 | 20OCT2025 | P012 | 7000 | 2600 | 24.9 | 71 | 242 | 21.0 | 30 | 20 | 17.3333 | 88.333 | Excellent | No major note |
4. Use PROC SQL to create a summary table and to demonstrate SQL skills
proc sql noprint;
create table work.fitness_summary_sql as
select
count(Person_ID) as N,
mean(Steps_Per_Day) as Mean_Steps format=8.1,
median(Steps_Per_Day) as Median_Steps,
min(Steps_Per_Day) as Min_Steps,
max(Steps_Per_Day) as Max_Steps,
mean(BMI) as Mean_BMI format=8.2,
mean(Heart_Rate) as Mean_HR format=8.2
from work.fitness_final;
quit;
proc print data=work.fitness_summary_sql noobs;
title "Summary statistics (PROC SQL)";
run;
OUTPUT:
| N | Mean_Steps | Median_Steps | Min_Steps | Max_Steps | Mean_BMI | Mean_HR |
|---|---|---|---|---|---|---|
| 12 | 7666.7 | 7500 | 2000 | 15000 | 25.51 | 72.08 |
5. PROC MEANS: numeric summaries by Fitness_Level
proc means data=work.fitness_final n mean median std min max;
class Fitness_Level;
var Steps_Per_Day Calories_Burned BMI Heart_Rate Composite_Score Duration_Days;
title "PROC MEANS by Fitness_Level";
run;
OUTPUT:
The MEANS Procedure
| Fitness_Level | N Obs | Variable | N | Mean | Median | Std Dev | Minimum | Maximum |
|---|---|---|---|---|---|---|---|---|
| Average | 1 | Steps_Per_Day Calories_Burned BMI Heart_Rate Composite_Score Duration_Days | 1 1 1 1 1 1 | 3500.00 2100.00 29.8000000 82.0000000 54.5000000 525.0000000 | 3500.00 2100.00 29.8000000 82.0000000 54.5000000 525.0000000 | . . . . . . | 3500.00 2100.00 29.8000000 82.0000000 54.5000000 525.0000000 | 3500.00 2100.00 29.8000000 82.0000000 54.5000000 525.0000000 |
| Excellent | 7 | Steps_Per_Day Calories_Burned BMI Heart_Rate Composite_Score Duration_Days | 7 7 7 7 7 7 | 10285.71 2942.86 23.3000000 66.1428571 96.3809524 380.4285714 | 10000.00 3000.00 23.4000000 66.0000000 100.0000000 365.0000000 | 2690.37 325.8688021 1.5318834 5.1777914 5.0052882 101.8820513 | 7000.00 2500.00 21.0000000 58.0000000 88.3333333 242.0000000 | 15000.00 3400.00 25.0000000 72.0000000 100.0000000 547.0000000 |
| Good | 3 | Steps_Per_Day Calories_Burned BMI Heart_Rate Composite_Score Duration_Days | 3 3 3 3 3 3 | 4833.33 2316.67 27.3333333 77.3333333 69.9444444 372.6666667 | 4500.00 2300.00 27.1000000 78.0000000 68.5000000 385.0000000 | 1040.83 76.3762616 0.9712535 2.0816660 3.5603267 94.1080939 | 4000.00 2250.00 26.5000000 75.0000000 67.3333333 273.0000000 | 6000.00 2400.00 28.4000000 79.0000000 74.0000000 460.0000000 |
| Poor | 1 | Steps_Per_Day Calories_Burned BMI Heart_Rate Composite_Score Duration_Days | 1 1 1 1 1 1 | 2000.00 1900.00 31.2000000 88.0000000 38.6666667 365.0000000 | 2000.00 1900.00 31.2000000 88.0000000 38.6666667 365.0000000 | . . . . . . | 2000.00 1900.00 31.2000000 88.0000000 38.6666667 365.0000000 | 2000.00 1900.00 31.2000000 88.0000000 38.6666667 365.0000000 |
6. PROC UNIVARIATE: check distribution of Composite_Score and Steps
proc univariate data=work.fitness_final cibasic;
var Composite_Score Steps_Per_Day;
histogram Composite_Score / normal;
inset mean median std / pos = ne;
title "PROC UNIVARIATE: Composite_Score and Steps distribution";
run;
OUTPUT:
The UNIVARIATE Procedure
Variable: Composite_Score
| Moments | |||
|---|---|---|---|
| N | 12 | Sum Weights | 12 |
| Mean | 81.4722222 | Sum Observations | 977.666667 |
| Std Deviation | 20.6486766 | Variance | 426.367845 |
| Skewness | -0.9179529 | Kurtosis | -0.1506143 |
| Uncorrected SS | 84342.7222 | Corrected SS | 4690.0463 |
| Coeff Variation | 25.3444377 | Std Error Mean | 5.9607595 |
| Basic Statistical Measures | |||
|---|---|---|---|
| Location | Variability | ||
| Mean | 81.4722 | Std Deviation | 20.64868 |
| Median | 89.5000 | Variance | 426.36785 |
| Mode | 100.0000 | Range | 61.33333 |
| Interquartile Range | 32.08333 | ||
| Basic Confidence Limits Assuming Normality | |||
|---|---|---|---|
| Parameter | Estimate | 95% Confidence Limits | |
| Mean | 81.47222 | 68.35268 | 94.59177 |
| Std Deviation | 20.64868 | 14.62742 | 35.05893 |
| Variance | 426.36785 | 213.96149 | 1229 |
| Tests for Location: Mu0=0 | ||||
|---|---|---|---|---|
| Test | Statistic | p Value | ||
| Student's t | t | 13.66809 | Pr > |t| | <.0001 |
| Sign | M | 6 | Pr >= |M| | 0.0005 |
| Signed Rank | S | 39 | Pr >= |S| | 0.0005 |
| Quantiles (Definition 5) | |
|---|---|
| Level | Quantile |
| 100% Max | 100.0000 |
| 99% | 100.0000 |
| 95% | 100.0000 |
| 90% | 100.0000 |
| 75% Q3 | 100.0000 |
| 50% Median | 89.5000 |
| 25% Q1 | 67.9167 |
| 10% | 54.5000 |
| 5% | 38.6667 |
| 1% | 38.6667 |
| 0% Min | 38.6667 |
| Extreme Observations | |||
|---|---|---|---|
| Lowest | Highest | ||
| Value | Obs | Value | Obs |
| 38.6667 | 8 | 95.6667 | 9 |
| 54.5000 | 2 | 100.0000 | 1 |
| 67.3333 | 4 | 100.0000 | 5 |
| 68.5000 | 11 | 100.0000 | 7 |
| 74.0000 | 6 | 100.0000 | 10 |
The UNIVARIATE Procedure
The UNIVARIATE Procedure
Fitted Normal Distribution for Composite_Score
| Parameters for Normal Distribution | ||
|---|---|---|
| Parameter | Symbol | Estimate |
| Mean | Mu | 81.47222 |
| Std Dev | Sigma | 20.64868 |
| Goodness-of-Fit Tests for Normal Distribution | ||||
|---|---|---|---|---|
| Test | Statistic | p Value | ||
| Kolmogorov-Smirnov | D | 0.21349384 | Pr > D | 0.133 |
| Cramer-von Mises | W-Sq | 0.10996326 | Pr > W-Sq | 0.076 |
| Anderson-Darling | A-Sq | 0.66797269 | Pr > A-Sq | 0.063 |
| Quantiles for Normal Distribution | ||
|---|---|---|
| Percent | Quantile | |
| Observed | Estimated | |
| 1.0 | 38.6667 | 33.4362 |
| 5.0 | 38.6667 | 47.5082 |
| 10.0 | 54.5000 | 55.0099 |
| 25.0 | 67.9167 | 67.5449 |
| 50.0 | 89.5000 | 81.4722 |
| 75.0 | 100.0000 | 95.3995 |
| 90.0 | 100.0000 | 107.9346 |
| 95.0 | 100.0000 | 115.4363 |
| 99.0 | 100.0000 | 129.5082 |
The UNIVARIATE Procedure
Variable: Steps_Per_Day
| Moments | |||
|---|---|---|---|
| N | 12 | Sum Weights | 12 |
| Mean | 7666.66667 | Sum Observations | 92000 |
| Std Deviation | 3898.32909 | Variance | 15196969.7 |
| Skewness | 0.34547102 | Kurtosis | -0.6063581 |
| Uncorrected SS | 872500000 | Corrected SS | 167166667 |
| Coeff Variation | 50.8477707 | Std Error Mean | 1125.35067 |
| Basic Statistical Measures | |||
|---|---|---|---|
| Location | Variability | ||
| Mean | 7666.667 | Std Deviation | 3898 |
| Median | 7500.000 | Variance | 15196970 |
| Mode | . | Range | 13000 |
| Interquartile Range | 6250 | ||
| Basic Confidence Limits Assuming Normality | |||
|---|---|---|---|
| Parameter | Estimate | 95% Confidence Limits | |
| Mean | 7667 | 5190 | 10144 |
| Std Deviation | 3898 | 2762 | 6619 |
| Variance | 15196970 | 7626199 | 43809669 |
| Tests for Location: Mu0=0 | ||||
|---|---|---|---|---|
| Test | Statistic | p Value | ||
| Student's t | t | 6.812691 | Pr > |t| | <.0001 |
| Sign | M | 6 | Pr >= |M| | 0.0005 |
| Signed Rank | S | 39 | Pr >= |S| | 0.0005 |
| Quantiles (Definition 5) | |
|---|---|
| Level | Quantile |
| 100% Max | 15000 |
| 99% | 15000 |
| 95% | 15000 |
| 90% | 12000 |
| 75% Q3 | 10500 |
| 50% Median | 7500 |
| 25% Q1 | 4250 |
| 10% | 3500 |
| 5% | 2000 |
| 1% | 2000 |
| 0% Min | 2000 |
| Extreme Observations | |||
|---|---|---|---|
| Lowest | Highest | ||
| Value | Obs | Value | Obs |
| 2000 | 8 | 9000 | 9 |
| 3500 | 2 | 10000 | 7 |
| 4000 | 4 | 11000 | 10 |
| 4500 | 11 | 12000 | 1 |
| 6000 | 6 | 15000 | 5 |
7. PROC FREQ for categorical distribution of Fitness_Level
proc freq data=work.fitness_final;
tables Fitness_Level / nocum norow nocol;
title "Distribution of Fitness_Level (PROC FREQ)";
run;
OUTPUT:
The FREQ Procedure
| Fitness_Level | Frequency | Percent |
|---|---|---|
| Average | 1 | 8.33 |
| Excellent | 7 | 58.33 |
| Good | 3 | 25.00 |
| Poor | 1 | 8.33 |
8. PROC FORMAT for nice reporting
proc format;
value scorefmt
low - <40 = 'Low (<40)'
40 - <60 = 'Below Average (40-59)'
60 - <80 = 'Good (60-79)'
80 - high = 'Excellent (80+)';
run;
LOG:
9. PROC RANK: rank individuals by Steps_Per_Day
proc rank data=work.fitness_final out=work.fitness_rank ties=low descending;
var Steps_Per_Day;
ranks Steps_Rank;
run;
proc print data=work.fitness_rank noobs;
var Person_ID Steps_Per_Day Steps_Rank Fitness_Level Composite_Score;
title "Ranked by Steps_Per_Day";
run;
OUTPUT:
| Person_ID | Steps_Per_Day | Steps_Rank | Fitness_Level | Composite_Score |
|---|---|---|---|---|
| P001 | 12000 | 2 | Excellent | 100.000 |
| P002 | 3500 | 11 | Average | 54.500 |
| P003 | 8000 | 6 | Excellent | 90.667 |
| P004 | 4000 | 10 | Good | 67.333 |
| P005 | 15000 | 1 | Excellent | 100.000 |
| P006 | 6000 | 8 | Good | 74.000 |
| P007 | 10000 | 4 | Excellent | 100.000 |
| P008 | 2000 | 12 | Poor | 38.667 |
| P009 | 9000 | 5 | Excellent | 95.667 |
| P010 | 11000 | 3 | Excellent | 100.000 |
| P011 | 4500 | 9 | Good | 68.500 |
| P012 | 7000 | 7 | Excellent | 88.333 |
10. PROC CORR: check correlation between Steps, Calories, BMI, HR, Composite
proc corr data=work.fitness_final pearson nosimple;
var Steps_Per_Day Calories_Burned BMI Heart_Rate Composite_Score;
title "Correlation matrix (PROC CORR)";
run;
OUTPUT:
The CORR Procedure
| 5 Variables: | Steps_Per_Day Calories_Burned BMI Heart_Rate Composite_Score |
|---|
| Pearson Correlation Coefficients, N = 12 Prob > |r| under H0: Rho=0 | |||||
|---|---|---|---|---|---|
| Steps_Per_Day | Calories_Burned | BMI | Heart_Rate | Composite_Score | |
| Steps_Per_Day | 1.00000 | 0.98446 <.0001 | -0.95596 <.0001 | -0.98147 <.0001 | 0.90224 <.0001 |
| Calories_Burned | 0.98446 <.0001 | 1.00000 | -0.96049 <.0001 | -0.98886 <.0001 | 0.91810 <.0001 |
| BMI | -0.95596 <.0001 | -0.96049 <.0001 | 1.00000 | 0.98401 <.0001 | -0.96037 <.0001 |
| Heart_Rate | -0.98147 <.0001 | -0.98886 <.0001 | 0.98401 <.0001 | 1.00000 | -0.94707 <.0001 |
| Composite_Score | 0.90224 <.0001 | 0.91810 <.0001 | -0.96037 <.0001 | -0.94707 <.0001 | 1.00000 |
11. PROC SGPLOT: scatter and series to visualize Steps vs Composite_Score
proc sgplot data=work.fitness_final;
scatter x=Steps_Per_Day y=Composite_Score / markerattrs=(symbol=CircleFilled);
reg x=Steps_Per_Day y=Composite_Score / cli clm; /* regression line with CI */
xaxis label="Steps Per Day";
yaxis label="Composite Score";
title "Steps vs Composite Score (Scatter + Regression)";
run;
OUTPUT:
12. PROC REPORT: polished report with formatted scores
proc report data=work.fitness_final nowd;
columns Person_ID Measurement_Date Steps_Per_Day Calories_Burned BMI Heart_Rate
Composite_Score Fitness_Level;
define Measurement_Date / display format=date9. 'Measurement';
define Composite_Score / format=8.1 'Composite';
title "Detailed fitness report (PROC REPORT)";
run;
title;
OUTPUT:
| Person_ID | Measurement | Steps_Per_Day | Calories_Burned | BMI | Heart_Rate | Composite | Fitness_Level |
|---|---|---|---|---|---|---|---|
| P001 | 01JUL2025 | 12000 | 3200 | 22.5 | 62 | 100.0 | Excellent |
| P002 | 10JUL2025 | 3500 | 2100 | 29.8 | 82 | 54.5 | Average |
| P003 | 25JUN2025 | 8000 | 2500 | 24.3 | 72 | 90.7 | Excellent |
| P004 | 05JUL2025 | 4000 | 2300 | 27.1 | 78 | 67.3 | Good |
| P005 | 20JUN2025 | 15000 | 3400 | 21 | 58 | 100.0 | Excellent |
| P006 | 30JUN2025 | 6000 | 2400 | 26.5 | 75 | 74.0 | Good |
| P007 | 01JUL2025 | 10000 | 3000 | 23.4 | 66 | 100.0 | Excellent |
| P008 | 15JUL2025 | 2000 | 1900 | 31.2 | 88 | 38.7 | Poor |
| P009 | 10JUL2025 | 9000 | 2800 | 25 | 70 | 95.7 | Excellent |
| P010 | 05JUL2025 | 11000 | 3100 | 22 | 64 | 100.0 | Excellent |
| P011 | 10JUL2025 | 4500 | 2250 | 28.4 | 79 | 68.5 | Good |
| P012 | 20JUL2025 | 7000 | 2600 | 24.9 | 71 | 88.3 | Excellent |
13. Example QC: flag outliers in steps (simple rule: <2500 or >20000)
data work.fitness_qc;
set work.fitness_final;
if Steps_Per_Day < 2500 or Steps_Per_Day > 20000 then Steps_Flag = 'Outlier';
else Steps_Flag = 'OK';
run;
proc print data=work.fitness_qc;
run;
OUTPUT:
| Obs | Baseline_Date | Measurement_Date | Next_Check_Date | Person_ID | Steps_Per_Day | Calories_Burned | BMI | Heart_Rate | Duration_Days | Step_Score | BMI_Score | HR_Score | Cal_Score | Composite_Score | Fitness_Level | Fitness_Comment | Steps_Flag |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 01JAN2024 | 01JUL2025 | 01OCT2025 | P001 | 12000 | 3200 | 22.5 | 62 | 547 | 36.0 | 30 | 30 | 20.0000 | 100.000 | Excellent | No major note | OK |
| 2 | 01FEB2024 | 10JUL2025 | 10OCT2025 | P002 | 3500 | 2100 | 29.8 | 82 | 525 | 10.5 | 20 | 10 | 14.0000 | 54.500 | Average | No major note | OK |
| 3 | 15MAR2024 | 25JUN2025 | 25SEP2025 | P003 | 8000 | 2500 | 24.3 | 72 | 467 | 24.0 | 30 | 20 | 16.6667 | 90.667 | Excellent | No major note | OK |
| 4 | 01APR2024 | 05JUL2025 | 05OCT2025 | P004 | 4000 | 2300 | 27.1 | 78 | 460 | 12.0 | 20 | 20 | 15.3333 | 67.333 | Good | High BMI but good activity | OK |
| 5 | 20MAY2024 | 20JUN2025 | 20SEP2025 | P005 | 15000 | 3400 | 21.0 | 58 | 396 | 45.0 | 30 | 30 | 20.0000 | 100.000 | Excellent | No major note | OK |
| 6 | 10JUN2024 | 30JUN2025 | 30SEP2025 | P006 | 6000 | 2400 | 26.5 | 75 | 385 | 18.0 | 20 | 20 | 16.0000 | 74.000 | Good | High BMI but good activity | OK |
| 7 | 01JUL2024 | 01JUL2025 | 01OCT2025 | P007 | 10000 | 3000 | 23.4 | 66 | 365 | 30.0 | 30 | 30 | 20.0000 | 100.000 | Excellent | No major note | OK |
| 8 | 15JUL2024 | 15JUL2025 | 15OCT2025 | P008 | 2000 | 1900 | 31.2 | 88 | 365 | 6.0 | 10 | 10 | 12.6667 | 38.667 | Poor | No major note | Outlier |
| 9 | 01AUG2024 | 10JUL2025 | 10OCT2025 | P009 | 9000 | 2800 | 25.0 | 70 | 343 | 27.0 | 20 | 30 | 18.6667 | 95.667 | Excellent | High BMI but good activity | OK |
| 10 | 05SEP2024 | 05JUL2025 | 05OCT2025 | P010 | 11000 | 3100 | 22.0 | 64 | 303 | 33.0 | 30 | 30 | 20.0000 | 100.000 | Excellent | No major note | OK |
| 11 | 10OCT2024 | 10JUL2025 | 10OCT2025 | P011 | 4500 | 2250 | 28.4 | 79 | 273 | 13.5 | 20 | 20 | 15.0000 | 68.500 | Good | High BMI but good activity | OK |
| 12 | 20NOV2024 | 20JUL2025 | 20OCT2025 | P012 | 7000 | 2600 | 24.9 | 71 | 242 | 21.0 | 30 | 20 | 17.3333 | 88.333 | Excellent | No major note | OK |
14. Average of Steps_Per_Day
proc avg data=work.fitness_final;
var Steps_Per_Day;
run;
OUTPUT:
| Obs | _TYPE_ | _FREQ_ | Avg_Steps |
|---|---|---|---|
| 1 | 0 | 12 | 7666.67 |
/* Note: In practice above there is an Invalid in this code Find it,Correct it and Use it /*
YESTERDAY INVALID CODE ANSWER
9) Utility macro to list top N by a chosen metric
%macro top_n(data=work.startups_cat, var=Growth_Rate, n=5);
proc sort data=&data out=_top descending;
by descending &var;
run;
Descending has to place in by statement in PROC SORT;
title "Top &n Startups by &var";
proc print data=_top (obs=&n);
var Startup_Name Industry &var Country Funding Employees Growth_Rate;
run;
%mend top_n;
%top_n(data=work.startups_cat, var=Growth_Rate, n=5);
OUTPUT:
| Obs | Startup_Name | Industry | Growth_Rate | Country | Funding | Employees | Growth_Rate |
|---|---|---|---|---|---|---|---|
| 1 | NanoHealth Bio | Biotech | 38.90 | Germany | $25,000,000 | 200 | 38.90 |
| 2 | MediSoft Labs | Healthcare IT | 32.10 | Canada | $12,000,000 | 120 | 32.10 |
| 3 | BlockWave Systems | Blockchain | 27.00 | Singapore | $4,600,000 | 35 | 27.00 |
| 4 | AeroScale Innovations | Aerospace | 24.50 | United States | $5,500,000 | 45 | 24.50 |
| 5 | CyberGuard AI | Cybersecurity | 23.20 | Israel | $6,100,000 | 52 | 23.20 |
No comments:
Post a Comment