- Get link
- X
- Other Apps
Try It Yourself And Comment Your Output - II
I DATASET
/* Creating a sample dataset named PATIENTS */
DATA PATIENTS;
INFILE DATALINES DLM='' DSD;
INPUT ID $ Name $ Age Gender $ BP SysBP DiasBP Cholesterol Glucose Weight Height;
DATALINES;
001 John 45 M 120 80 200 90 80 175
002 Mary 38 F 110 70 180 85 65 160
003 Steve 55 M 130 85 220 100 90 180
004 Anna 29 F 115 75 190 92 55 155
005 Tom 42 M 140 90 250 110 100 185
006 Lisa 50 F 125 85 210 95 70 165
007 Alex 33 M 118 78 175 80 75 170
;
RUN;
PROC PRINT DATA=PATIENTS;
RUN;
PROC CONTENTS DATA=PATIENTS;
RUN;
PROC MEANS DATA=PATIENTS N NMISS MIN MAX MEAN;
RUN;
PROC FREQ DATA=PATIENTS;
TABLES Gender;
RUN;
DATA CLEAN_PATIENTS;
SET PATIENTS;
/* Standardizing variable names */
Gender = UPCASE(Gender);
/* Handling missing values (if any were present) */
IF Age = . THEN Age = 40;
IF Cholesterol = . THEN Cholesterol = MEAN(OF Cholesterol);
IF Height = . THEN Height = 170; /* Assigning default height if missing */
RUN;
PROC PRINT DATA=CLEAN_PATIENTS;
RUN;
PROC UNIVARIATE DATA=CLEAN_PATIENTS;
VAR Cholesterol Glucose Weight Height;
RUN;
DATA FINAL_PATIENTS;
SET CLEAN_PATIENTS;
/* Calculating BMI */
BMI = Weight / ((Height/100) ** 2);
/* Categorizing Cholesterol Levels */
IF Cholesterol < 200 THEN Chol_Cat = 'Normal';
ELSE IF Cholesterol >= 200 AND Cholesterol < 240 THEN Chol_Cat = 'Borderline';
ELSE Chol_Cat = 'High';
RUN;
PROC PRINT DATA=FINAL_PATIENTS;
RUN;
PROC MEANS DATA=FINAL_PATIENTS;
VAR Age BP SysBP DiasBP Cholesterol Glucose BMI;
RUN;
PROC MEANS DATA=FINAL_PATIENTS;
CLASS Gender;
VAR Age Cholesterol BMI;
RUN;
PROC FREQ DATA=FINAL_PATIENTS;
TABLES Chol_Cat;
RUN;
PROC CORR DATA=FINAL_PATIENTS;
VAR Age Cholesterol Glucose BMI;
RUN;
PROC SGPLOT DATA=FINAL_PATIENTS;
SCATTER X=Age Y=Cholesterol / GROUP=Gender;
RUN;
PROC EXPORT DATA=FINAL_PATIENTS
OUTFILE="C:\Users\YourName\Documents\Final_Patients.csv"
DBMS=CSV REPLACE;
RUN;
II DATASET
data employees;
input EmployeeID Name $ Department $ Salary HireDate :date9.;
format HireDate date9.;
datalines;
1 John Sales 55000 15JAN2015
2 Jane Marketing 62000 22MAR2016
3 Jim Sales 58000 10JUL2017
4 Jack IT 60000 05MAY2018
5 Jill HR 59000 12AUG2019
6 John Sales 55000 15JAN2015
7 Jake IT 61000 20SEP2020
8 Jane Marketing 63000 22MAR2016
9 Julia HR 60000 18DEC2021
10 Jim Sales 58000 10JUL2017
;
run;
proc print data=employees;
run;
proc sort data=employees nodupkey out=employees_clean;
by Name;
run;
proc print data=employees_clean;
run;
data employees_clean;
set employees_clean;
if EmployeeID = 5 then Salary = .;
if EmployeeID = 9 then Department = '';
run;
proc print data=employees_clean;
run;
proc means data=employees_clean n nmiss;
var Salary;
run;
proc stdize data=employees_clean reponly method=mean out=employees_imputed;
var Salary;
run;
proc print;run;
proc univariate data=employees_imputed;
var Salary;
histogram Salary / normal;
inset mean median std / format=6.2;
run;
If You Want Dataset I Output: Click Here
If You Want Dataset II Output : Click Here
- Get link
- X
- Other Apps
Comments
Post a Comment