10.COMBINING THE DATASETS

                                       COMBINING THE DATASETS


  ---> TWO DATASETS WILL BE COMBINED TOGETHER


  1.APPENDING::: VERTICALLY COMBINING 


  2.CONCATENATING:: HORIZONTALLY COMBINING 


  3.MERGING:: HORIZONTALLY COMBINING 


1.APPENDING::(VERTICALLY)-IT WILL ADD THE ALL OBSERVATIONS FROM 2ND DATASET 

                TO ORIGINAL(1ST) DATASET


--->I.FOR THE SAME VARIABLE INFO IN BOTH DATASETS


DATA AA;

INPUT NAME$ AGE GEND$ CLASS;

CARDS;

KARTHIK 25  M 8

TIRUMALA 26 M 9

HEMA     24 F 10

RAMYA    25 F 11

;

RUN;

PROC PRINT;

RUN;


DATA BB;

INPUT NAME$ AGE GEND$ CLASS;

CARDS;

RAM     24 M 6

RAJU    26 M 5

NIKITHA 24 F 8

SRAVANI 26 F 6

AJAYRAJ 27 M 7

;

RUN;

PROC PRINT;

RUN;


PROC APPEND BASE=AA

            DATA=BB;

RUN;


II.WHAT IF THE DATASETS HAVING DIFFERENT VARIABLE INFORMATION


DATA CC;

INPUT NAME$ AGE GEND$ CLASS;

CARDS;

KARTHIK 25  M 8

TIRUMALA 26 M 9

HEMA     24 F 10

RAMYA    25 F 11

;

RUN;

PROC PRINT;

RUN;


DATA DD;

INPUT NAME$ AGE GEND$ SECTION$;

CARDS;

RAM     24 M A

RAJU    26 M B

NIKITHA 24 F A

SRAVANI 26 F C

AJAYRAJ 27 M A

;

RUN;

PROC PRINT;

RUN;


PROC APPEND BASE=CC

            DATA=DD;

RUN;


USE "FORCE" OPTION TO COMBINE THE DATASET


PROC APPEND BASE=CC

            DATA=DD FORCE;

RUN;



  2.CONCATINATING(HORIZONTAL)::


--->IT WILL ADD ALL OBSERVATIONS AND VARIABLES FROM BOTH DATASETS INTO A 'NEW DATASET'


--->IF THE DIFFRENT VARIABLE ALSO THERE IT WILL CONCATINATE THE DATSETS



DATA EE;

INPUT NAME$ AGE GEND$ CLASS;

CARDS;

KARTHIK 25  M 8

TIRUMALA 26 M 9

HEMA     24 F 10

RAMYA    25 F 11

;

RUN;

PROC PRINT;

RUN;


DATA FF;

INPUT NAME$ AGE GEND$ CLASS;

CARDS;

RAM     24 M 6

RAJU    26 M 5

NIKITHA 24 F 8

SRAVANI 26 F 6

AJAYRAJ 27 M 7

;

RUN;

PROC PRINT;

RUN;


DATA CONCATINATE;

SET EE

    FF;

RUN;

PROC PRINT;

RUN;


--->WHAT IF THE DATASETS HAVING DIFFERENT VARIABLE INFO


DATA GG;

INPUT NAME$ AGE GEND$ CLASS;

CARDS;

KARTHIK  25  M 8

TIRUMALA 26 M  9

HEMA     24 F 10

RAMYA    25 F 11

;

RUN;

PROC PRINT;

RUN;


DATA HH;

INPUT NAME$ AGE GEND$ SECTION$;

CARDS;

RAM     24 M A

RAJU    26 M B

NIKITHA 24 F A

SRAVANI 26 F C

AJAYRAJ 27 M A

;

RUN;

PROC PRINT;

RUN;


DATA CONCAT;

SET GG

    HH;

RUN;

PROC PRINT;

RUN;


--->RENAME: WE CAN CHANGE THE NAME OF THE VARIABLE(RENAMING THE VAR NAME)


DATA CONCAT;

SET GG(RENAME=(AGE=NEWAGE))

    HH(RENAME=(GEND=SEX));

RUN;

PROC PRINT;

RUN;



3.MERGING::


--->WE CAN COMBINE TWO OR MORE SAS DATASETS INTO A NEW DATASET

---> BEFORE MERGING THE DATASET YOU NEED TO 'SORT' THE DATA 

          WITH 'BY' STATEMENT


DATA AA1;

INPUT ID NAME$ AGE GENDER$;

CARDS;

11 AAAA 25 M

44 BBBB 26 F

33 CCCC 27 M

22 DDDD 28 F

77 WWWW 25 M

55 AAAA 24 F

;

RUN;

PROC PRINT;

RUN;


PROC SORT DATA=AA1;

BY ID;

RUN;



DATA BB1;

INPUT ID NAME$ AGE CHAR$;

CARDS;

88 EEEE 23 F

66 FFFF 24 M

77 GGGG 25 M

55 HHHH 27 F

;

RUN;

PROC PRINT;

RUN;


PROC SORT DATA=BB1;

BY ID;

RUN;


DATA CC1;

INPUT ID NAME$ AGE CHAR$;

CARDS;

99 IIII 24 M

12 JJJJ 27 F

10 KKKK 26 F

11 LLLL 24 M

;

PROC PRINT;

RUN;


PROC SORT DATA=CC1;

BY ID;

RUN;


DATA MERGEE;

MERGE AA1

      BB1

  CC1;

BY ID;

RUN;

PROC PRINT;

RUN;




--->HOW TO SUBSET THE OBSERVATIONS IN MERGING(IN= OPTION)???

--->BY USING IN OPTION

--->WHAT IS MATCH MERGE???


DATA MATCH;

MERGE AA1(IN=XX)

      BB1(IN=YY);

BY ID;

AA1=XX;

BB1=YY;

RUN;

PROC PRINT;

RUN;


DATA MATCH2;

MERGE AA1(IN=XX)

      BB1(IN=YY);

BY ID;

AA1=XX;

BB1=YY;

IF XX=1;

RUN;

PROC PRINT;

RUN;


DATA MATCH22;

MERGE AA1(IN=XX)

      BB1(IN=YY);

BY ID;

AA1=XX;

BB1=YY;

IF YY=1;

RUN;

PROC PRINT;

RUN;



DATA MATCH2;

MERGE AA1(IN=XX)

      BB1(IN=YY);

BY ID;

AA1=XX;

BB1=YY;

IF XX=1 AND YY=1;

RUN;

PROC PRINT;

RUN;



*****SORT PROCEDURE*****;

--SORT THE DATA

--DEFAULT 'ASCENDING ORDER' AND IF WANT 'DESCENDING ORDER' THEN ADD 'DESCCENDING' BEFORE THE VARIABLE


DATA SORTT;

INPUT ID NAME$ SEX$;

CARDS;

11 AA M

33 BB M

22 CC F

66 DD M

44 EE F

66 FF M

44 EE F

22 CC M

;

RUN;

PROC PRINT;

RUN;


PROC SORT DATA=SORTT OUT=SORTED;

BY ID ;

RUN;


PROC SORT DATA=SORTT OUT=SORTED;

BY  DESCENDING ID;

RUN;


--->WHAT IS THE DIFFERENCE BETWEEN 'NODUP,NODUPKEY,DUPOUT'

--->TO REMOVE THE DUPLICATES FROM THE DATASET.

--->NODUP : IT WILL REMOVE ALL THE DUPLICATES


PROC SORT DATA=SORTED NODUP OUT=DUPLICATE;

BY ID;

RUN;


--->NODUPKEY :IT WILL COMPARE THE 'BY' STATEMENT AND REMOVE THE DUPLICATES


PROC SORT DATA=SORTED NODUPKEY OUT=DUP;

BY ID;

RUN;


--->DUPOUT: IS REMOVED DUPLICATES ARE STORED IN DATASET



PROC SORT DATA=SORTED NODUP DUPOUT=ALLDUP;

BY ID;

RUN;


PROC SORT DATA=SORTED NODUPKEY DUPOUT=ALLDUP2;

BY ID;

RUN;


Comments