128.INPUT | FORMAT STATEMENT | PROC SQL | DISTINCT | PROC SORT | NODUP | NODUPKEY | PROC SQL GROUP BY | HAVING CLAUSE
- Get link
- X
- Other Apps
INPUT | FORMAT STATEMENT | PROC SQL | DISTINCT | PROC SORT | NODUP | NODUPKEY | PROC SQL GROUP BY | HAVING CLAUSE
/*Write a PROC SQL query to identify duplicate records.*/
/*Remove duplicates using PROC SORT with NODUPKEY and NODUP.*/
DATA PATIENT_VISITS;
INPUT USUBJID VISIT$ VISITDT:YYMMDD10.;
FORMAT VISITDT DATE9.;
CARDS;
101 BASELINE 2024-01-01
101 WEEK1 2024-01-08
101 WEEK1 2024-01-08
102 BASELINE 2024-01-02
;
RUN;
PROC PRINT;RUN;
OUTPUT:
Obs | USUBJID | VISIT | VISITDT |
---|---|---|---|
1 | 101 | BASELINE | 01JAN2024 |
2 | 101 | WEEK1 | 08JAN2024 |
3 | 101 | WEEK1 | 08JAN2024 |
4 | 102 | BASELINE | 02JAN2024 |
PROC SQL;
SELECT DISTINCT *
FROM PATIENT_VISITS;
QUIT;
OUTPUT:
USUBJID | VISIT | VISITDT |
---|---|---|
101 | BASELINE | 01JAN2024 |
101 | WEEK1 | 08JAN2024 |
102 | BASELINE | 02JAN2024 |
/*ALTERNATE METHOD FOR DUPLICATES*/
PROC SORT DATA=PATIENT_VISITS OUT=SORTDUPKEY NODUPKEY;
BY USUBJID VISIT VISITDT ;
RUN;
PROC PRINT;RUN;
OUTPUT:
Obs | USUBJID | VISIT | VISITDT |
---|---|---|---|
1 | 101 | BASELINE | 01JAN2024 |
2 | 101 | WEEK1 | 08JAN2024 |
3 | 102 | BASELINE | 02JAN2024 |
PROC SORT DATA=PATIENT_VISITS OUT=SORTDUP NODUP;
BY _ALL_;
RUN;
PROC PRINT;RUN;
OUTPUT:
Obs | USUBJID | VISIT | VISITDT |
---|---|---|---|
1 | 101 | BASELINE | 01JAN2024 |
2 | 101 | WEEK1 | 08JAN2024 |
3 | 102 | BASELINE | 02JAN2024 |
/*COUNTS OF DUPLICATES*/
PROC SQL;
SELECT * , COUNT(*) AS DUP_COUNT
FROM PATIENT_VISITS
GROUP BY USUBJID,VISIT,VISITDT
HAVING COUNT(*) > 1;
QUIT;
USUBJID | VISIT | VISITDT | DUP_COUNT |
---|---|---|---|
101 | WEEK1 | 08JAN2024 | 2 |
- Get link
- X
- Other Apps
Comments
Post a Comment