128.INPUT | FORMAT STATEMENT | PROC SQL | DISTINCT | PROC SORT | NODUP | NODUPKEY | PROC SQL GROUP BY | HAVING CLAUSE

INPUT | FORMAT STATEMENT | PROC SQL | DISTINCT | PROC SORT | NODUP | NODUPKEY | PROC SQL GROUP BY | HAVING CLAUSE 


/*Write a PROC SQL query to identify duplicate records.*/

/*Remove duplicates using PROC SORT with NODUPKEY and NODUP.*/


DATA PATIENT_VISITS;

 INPUT USUBJID VISIT$ VISITDT:YYMMDD10.;

 FORMAT VISITDT DATE9.;

 CARDS;

101 BASELINE 2024-01-01

101 WEEK1 2024-01-08

101 WEEK1 2024-01-08

102 BASELINE 2024-01-02

;

RUN;

PROC PRINT;RUN;


OUTPUT:

Obs USUBJID VISIT VISITDT
1 101 BASELINE 01JAN2024
2 101 WEEK1 08JAN2024
3 101 WEEK1 08JAN2024
4 102 BASELINE 02JAN2024


PROC SQL;

 SELECT DISTINCT *

  FROM PATIENT_VISITS;

QUIT;


OUTPUT:

USUBJID VISIT VISITDT
101 BASELINE 01JAN2024
101 WEEK1 08JAN2024
102 BASELINE 02JAN2024


/*ALTERNATE METHOD FOR DUPLICATES*/


PROC SORT DATA=PATIENT_VISITS OUT=SORTDUPKEY NODUPKEY;

 BY USUBJID VISIT VISITDT ;

RUN;

PROC PRINT;RUN;


OUTPUT:

Obs USUBJID VISIT VISITDT
1 101 BASELINE 01JAN2024
2 101 WEEK1 08JAN2024
3 102 BASELINE 02JAN2024


PROC SORT DATA=PATIENT_VISITS OUT=SORTDUP NODUP;

 BY _ALL_;

RUN;

PROC PRINT;RUN;


OUTPUT:

Obs USUBJID VISIT VISITDT
1 101 BASELINE 01JAN2024
2 101 WEEK1 08JAN2024
3 102 BASELINE 02JAN2024


/*COUNTS OF DUPLICATES*/


PROC SQL;

 SELECT * , COUNT(*) AS DUP_COUNT 

  FROM PATIENT_VISITS

   GROUP BY USUBJID,VISIT,VISITDT

   HAVING COUNT(*) > 1;

QUIT;


OUTPUT:
USUBJID VISIT VISITDT DUP_COUNT
101 WEEK1 08JAN2024 2



PRACTICE AND COMMENT YOUR CODE: 

-->PLEASE FOLLOW OUR BLOG FOR MORE UPDATES.

TO FOLLOW OUR TELEGRAM CHANNEL CLICK HERE


Comments