COMBINING THE DATASETS
---> TWO DATASETS WILL BE COMBINED TOGETHER
1.APPENDING::: VERTICALLY COMBINING
2.CONCATENATING:: HORIZONTALLY COMBINING
3.MERGING:: HORIZONTALLY COMBINING
1.APPENDING::(VERTICALLY)-IT WILL ADD THE ALL OBSERVATIONS FROM 2ND DATASET
TO ORIGINAL(1ST) DATASET
--->I.FOR THE SAME VARIABLE INFO IN BOTH DATASETS
DATA AA;
INPUT NAME$ AGE GEND$ CLASS;
CARDS;
KARTHIK 25 M 8
TIRUMALA 26 M 9
HEMA 24 F 10
RAMYA 25 F 11
;
RUN;
PROC PRINT;
RUN;
DATA BB;
INPUT NAME$ AGE GEND$ CLASS;
CARDS;
RAM 24 M 6
RAJU 26 M 5
NIKITHA 24 F 8
SRAVANI 26 F 6
AJAYRAJ 27 M 7
;
RUN;
PROC PRINT;
RUN;
PROC APPEND BASE=AA
DATA=BB;
RUN;
II.WHAT IF THE DATASETS HAVING DIFFERENT VARIABLE INFORMATION
DATA CC;
INPUT NAME$ AGE GEND$ CLASS;
CARDS;
KARTHIK 25 M 8
TIRUMALA 26 M 9
HEMA 24 F 10
RAMYA 25 F 11
;
RUN;
PROC PRINT;
RUN;
DATA DD;
INPUT NAME$ AGE GEND$ SECTION$;
CARDS;
RAM 24 M A
RAJU 26 M B
NIKITHA 24 F A
SRAVANI 26 F C
AJAYRAJ 27 M A
;
RUN;
PROC PRINT;
RUN;
PROC APPEND BASE=CC
DATA=DD;
RUN;
USE "FORCE" OPTION TO COMBINE THE DATASET
PROC APPEND BASE=CC
DATA=DD FORCE;
RUN;
2.CONCATINATING(HORIZONTAL)::
--->IT WILL ADD ALL OBSERVATIONS AND VARIABLES FROM BOTH DATASETS INTO A 'NEW DATASET'
--->IF THE DIFFRENT VARIABLE ALSO THERE IT WILL CONCATINATE THE DATSETS
DATA EE;
INPUT NAME$ AGE GEND$ CLASS;
CARDS;
KARTHIK 25 M 8
TIRUMALA 26 M 9
HEMA 24 F 10
RAMYA 25 F 11
;
RUN;
PROC PRINT;
RUN;
DATA FF;
INPUT NAME$ AGE GEND$ CLASS;
CARDS;
RAM 24 M 6
RAJU 26 M 5
NIKITHA 24 F 8
SRAVANI 26 F 6
AJAYRAJ 27 M 7
;
RUN;
PROC PRINT;
RUN;
DATA CONCATINATE;
SET EE
FF;
RUN;
PROC PRINT;
RUN;
--->WHAT IF THE DATASETS HAVING DIFFERENT VARIABLE INFO
DATA GG;
INPUT NAME$ AGE GEND$ CLASS;
CARDS;
KARTHIK 25 M 8
TIRUMALA 26 M 9
HEMA 24 F 10
RAMYA 25 F 11
;
RUN;
PROC PRINT;
RUN;
DATA HH;
INPUT NAME$ AGE GEND$ SECTION$;
CARDS;
RAM 24 M A
RAJU 26 M B
NIKITHA 24 F A
SRAVANI 26 F C
AJAYRAJ 27 M A
;
RUN;
PROC PRINT;
RUN;
DATA CONCAT;
SET GG
HH;
RUN;
PROC PRINT;
RUN;
--->RENAME: WE CAN CHANGE THE NAME OF THE VARIABLE(RENAMING THE VAR NAME)
DATA CONCAT;
SET GG(RENAME=(AGE=NEWAGE))
HH(RENAME=(GEND=SEX));
RUN;
PROC PRINT;
RUN;
3.MERGING::
--->WE CAN COMBINE TWO OR MORE SAS DATASETS INTO A NEW DATASET
---> BEFORE MERGING THE DATASET YOU NEED TO 'SORT' THE DATA
WITH 'BY' STATEMENT
DATA AA1;
INPUT ID NAME$ AGE GENDER$;
CARDS;
11 AAAA 25 M
44 BBBB 26 F
33 CCCC 27 M
22 DDDD 28 F
77 WWWW 25 M
55 AAAA 24 F
;
RUN;
PROC PRINT;
RUN;
PROC SORT DATA=AA1;
BY ID;
RUN;
DATA BB1;
INPUT ID NAME$ AGE CHAR$;
CARDS;
88 EEEE 23 F
66 FFFF 24 M
77 GGGG 25 M
55 HHHH 27 F
;
RUN;
PROC PRINT;
RUN;
PROC SORT DATA=BB1;
BY ID;
RUN;
DATA CC1;
INPUT ID NAME$ AGE CHAR$;
CARDS;
99 IIII 24 M
12 JJJJ 27 F
10 KKKK 26 F
11 LLLL 24 M
;
PROC PRINT;
RUN;
PROC SORT DATA=CC1;
BY ID;
RUN;
DATA MERGEE;
MERGE AA1
BB1
CC1;
BY ID;
RUN;
PROC PRINT;
RUN;
--->HOW TO SUBSET THE OBSERVATIONS IN MERGING(IN= OPTION)???
--->BY USING IN OPTION
--->WHAT IS MATCH MERGE???
DATA MATCH;
MERGE AA1(IN=XX)
BB1(IN=YY);
BY ID;
AA1=XX;
BB1=YY;
RUN;
PROC PRINT;
RUN;
DATA MATCH2;
MERGE AA1(IN=XX)
BB1(IN=YY);
BY ID;
AA1=XX;
BB1=YY;
IF XX=1;
RUN;
PROC PRINT;
RUN;
DATA MATCH22;
MERGE AA1(IN=XX)
BB1(IN=YY);
BY ID;
AA1=XX;
BB1=YY;
IF YY=1;
RUN;
PROC PRINT;
RUN;
DATA MATCH2;
MERGE AA1(IN=XX)
BB1(IN=YY);
BY ID;
AA1=XX;
BB1=YY;
IF XX=1 AND YY=1;
RUN;
PROC PRINT;
RUN;
*****SORT PROCEDURE*****;
--SORT THE DATA
--DEFAULT 'ASCENDING ORDER' AND IF WANT 'DESCENDING ORDER' THEN ADD 'DESCCENDING' BEFORE THE VARIABLE
DATA SORTT;
INPUT ID NAME$ SEX$;
CARDS;
11 AA M
33 BB M
22 CC F
66 DD M
44 EE F
66 FF M
44 EE F
22 CC M
;
RUN;
PROC PRINT;
RUN;
PROC SORT DATA=SORTT OUT=SORTED;
BY ID ;
RUN;
PROC SORT DATA=SORTT OUT=SORTED;
BY DESCENDING ID;
RUN;
--->WHAT IS THE DIFFERENCE BETWEEN 'NODUP,NODUPKEY,DUPOUT'
--->TO REMOVE THE DUPLICATES FROM THE DATASET.
--->NODUP : IT WILL REMOVE ALL THE DUPLICATES
PROC SORT DATA=SORTED NODUP OUT=DUPLICATE;
BY ID;
RUN;
--->NODUPKEY :IT WILL COMPARE THE 'BY' STATEMENT AND REMOVE THE DUPLICATES
PROC SORT DATA=SORTED NODUPKEY OUT=DUP;
BY ID;
RUN;
--->DUPOUT: IS REMOVED DUPLICATES ARE STORED IN DATASET
PROC SORT DATA=SORTED NODUP DUPOUT=ALLDUP;
BY ID;
RUN;
PROC SORT DATA=SORTED NODUPKEY DUPOUT=ALLDUP2;
BY ID;
RUN;
Comments
Post a Comment