192.Try It Yourself And Comment Your Output - II

                                     Try It Yourself And Comment Your Output - II

Note:If Required Change The Library Name..... 

I DATASET 

 /* Creating a sample dataset named PATIENTS */

DATA PATIENTS;

    INFILE DATALINES DLM='' DSD;

    INPUT ID $ Name $ Age Gender $ BP SysBP DiasBP Cholesterol Glucose Weight Height;

    DATALINES;

001 John 45 M 120 80 200 90 80 175

002 Mary 38 F 110 70 180 85 65 160

003 Steve 55 M 130 85 220 100 90 180

004 Anna 29 F 115 75 190 92 55 155

005 Tom 42 M 140 90 250 110 100 185

006 Lisa 50 F 125 85 210 95 70 165

007 Alex 33 M 118 78 175 80 75 170

;

RUN;


PROC PRINT DATA=PATIENTS;

RUN;


PROC CONTENTS DATA=PATIENTS;

RUN;


PROC MEANS DATA=PATIENTS N NMISS MIN MAX MEAN;

RUN;


PROC FREQ DATA=PATIENTS;

    TABLES Gender;

RUN;


DATA CLEAN_PATIENTS;

    SET PATIENTS;

    /* Standardizing variable names */

    Gender = UPCASE(Gender);

    /* Handling missing values (if any were present) */

    IF Age = . THEN Age = 40; 

    IF Cholesterol = . THEN Cholesterol = MEAN(OF Cholesterol);

IF Height = . THEN Height = 170; /* Assigning default height if missing */

RUN;


PROC PRINT DATA=CLEAN_PATIENTS;

RUN;


PROC UNIVARIATE DATA=CLEAN_PATIENTS;

    VAR Cholesterol Glucose Weight Height;

RUN;


DATA FINAL_PATIENTS;

    SET CLEAN_PATIENTS;

    /* Calculating BMI */

    BMI = Weight / ((Height/100) ** 2);

    /* Categorizing Cholesterol Levels */

    IF Cholesterol < 200 THEN Chol_Cat = 'Normal';

    ELSE IF Cholesterol >= 200 AND Cholesterol < 240 THEN Chol_Cat = 'Borderline';

    ELSE Chol_Cat = 'High';

RUN;


PROC PRINT DATA=FINAL_PATIENTS;

RUN;


PROC MEANS DATA=FINAL_PATIENTS;

    VAR Age BP SysBP DiasBP Cholesterol Glucose BMI;

RUN;


PROC MEANS DATA=FINAL_PATIENTS;

    CLASS Gender;

    VAR Age Cholesterol BMI;

RUN;


PROC FREQ DATA=FINAL_PATIENTS;

    TABLES Chol_Cat;

RUN;


PROC CORR DATA=FINAL_PATIENTS;

    VAR Age Cholesterol Glucose BMI;

RUN;


PROC SGPLOT DATA=FINAL_PATIENTS;

    SCATTER X=Age Y=Cholesterol / GROUP=Gender;

RUN;


PROC EXPORT DATA=FINAL_PATIENTS

    OUTFILE="C:\Users\YourName\Documents\Final_Patients.csv"

    DBMS=CSV REPLACE;

RUN;


II DATASET 


data employees;

    input EmployeeID Name $ Department $ Salary HireDate :date9.;

    format HireDate date9.;

    datalines;

1 John Sales 55000 15JAN2015

2 Jane Marketing 62000 22MAR2016

3 Jim Sales 58000 10JUL2017

4 Jack IT 60000 05MAY2018

5 Jill HR 59000 12AUG2019

6 John Sales 55000 15JAN2015

7 Jake IT 61000 20SEP2020

8 Jane Marketing 63000 22MAR2016

9 Julia HR 60000 18DEC2021

10 Jim Sales 58000 10JUL2017

;

run;


proc print data=employees;

run;



proc sort data=employees nodupkey out=employees_clean;

    by Name;

run;

proc print data=employees_clean;

run;


data employees_clean;

    set employees_clean;

    if EmployeeID = 5 then Salary = .;

    if EmployeeID = 9 then Department = '';

run;

proc print data=employees_clean;

run;


proc means data=employees_clean n nmiss;

    var Salary;

run;


proc stdize data=employees_clean reponly method=mean out=employees_imputed;

    var Salary;

run;

proc print;run;


proc univariate data=employees_imputed;

    var Salary;

    histogram Salary / normal;

    inset mean median std / format=6.2;

run;


If You Want Dataset I Output: Click Here

If You Want Dataset II OutputClick Here


PRACTICE AND COMMENT YOUR OUTPUT: 

--->PLEASE FOLLOW OUR BLOG FOR MORE UPDATES.

PLEASE FOLLOW OUR TELEGRAM CHANNEL CLICK HERE

PLEASE FOLLOW OUR FACEBOOK PAGE  CLICK HERE

PLEASE FOLLOW OUR INSTAGRAM PAGE CLICK HERE

Comments