Thursday, 31 October 2024

18.PROC TRANSPOSE

                                                 PROC TRANSPOSE 


PROC TRANSPOSE:-

USED FOR DATA ROTATION/FLIPPING DATA/RE-STRUCTURING THE DATA

-OBSERVATIONS TO VARIABLES

-VARIABLES TO OBSERVATIONS


BY:: BY USING SORTED DATA WE CAN ALSO TRANSPOSE THE DATA


DATA TRANS;

INPUT NAME$ SUBJECT$ MARKS;

CARDS;

RAJU MATHS 52

SHIVA ENGLISH 56

MANEESHA ZOO 45

RAVLI ENGLISH 55

KUMAR BOTANY 65

REDDY MATHS 63

ANIL ZOO 56

RAM MATHS 54

;

RUN;


PROC SORT DATA=TRANS;

BY NAME;

RUN;


PROC TRANSPOSE DATA=TRANS OUT=TRANSPOSED;

BY NAME;

ID SUBJECT;

VAR MARKS;

RUN;


BY DEFAULT PROC TRANSPOSE WILL TRANSPOSE THE "ALL NUMERIC VARIABLES" IN THE DATASET 


PROC TRANSPOSE DATA=TRANS OUT=NOSTATEMENT PREFIX=STUDENT;

RUN;


SORT PROCEDURE IS NOT REQUIRED IF YOU ARE MENTIONED "NOTSORTED" OPTION IN "BY STATEMENT


PROC TRANSPOSE DATA=TRANS NAME=VNAME OUT=NOTSORTED;

BY NAME NOTSORTED;

ID SUBJECT;

VAR MARKS;

RUN;


PROC TRANSPOSE DATA=SASHELP.CLASS PREFIX=NAME_  SUFFIX=_STUDENT OUT=CLASS;

BY AGE NOTSORTED;

ID NAME;

VAR HEIGHT AGE WEIGHT;

RUN;



PROC TRANSPOSE DATA=SASHELP.CLASS OUT=CLASSTRA;

BY AGE NOTSORTED;

ID NAME;

VAR HEIGHT;

RUN;


--PLEASE FOLLOW THE BLOG TO GET MORE INFO...








Monday, 28 October 2024

17.DO LOOPS

                                                  DO LOOPS 


--TO DO REPETITIVE CALCULATIONS

--AND TO REDUCE THE CODE


DATA REPEAT;

AMOUNT=5000;

RATE=0.50;

YEARLY=AMOUNT*RATE;

QUARTERLY+((QUARTERLY+AMOUNT)+RATE/4);

QUARTERLY+((QUARTERLY+AMOUNT)+RATE/4);

QUARTERLY+((QUARTERLY+AMOUNT)+RATE/4);

QUARTERLY+((QUARTERLY+AMOUNT)+RATE/4);

RUN;

PROC PRINT;

RUN;


DATA REAL;

AMOUNT=5000;

RATE=0.50;

YEARLY=AMOUNT*RATE;

DO I=1 TO 4;

QUARTERLY+((QUARTERLY+AMOUNT)+RATE/4);

OUTPUT;

END;

RUN;

PROC PRINT;

RUN;


DATA REAL;

AMOUNT=5000;

RATE=0.50;

YEARLY=AMOUNT*RATE;

DO I=1 TO 1000;

QUARTERLY+((QUARTERLY+AMOUNT)+RATE/4);

OUTPUT;

END;

RUN;

PROC PRINT;

RUN;


-IF YOU ARE USING "DO" STATEMENT IT WILL BE CLOSED WITH "END" STATEMENT


WITHOUT 'OUTPUT' STATEMENT IT WILL GIVE 'ONLY ONE OBSERVATION' AND 'HIGHEST ITRETIVE VALUE'



DATA AA;

DO I=1 TO 5; *1+2=3 2+2=4 3+2=5 4+2=6 5+2=7 I=6 Y=7 */;

Y=I+2;

END;

RUN;

PROC PRINT;

RUN;



DATA BB;

DO I=1 TO 10; *1*2=2 2*2=4.....10*2=20   11 20;

Y=I*2;

END;

RUN;

PROC PRINT;

RUN;


WITH OUTPUT STATEMENT ALL OBSERVATION AND ALL VARIABLE WILL BE GIVEN


DATA CC;

DO I=1 TO 10;

Y=I*2;

OUTPUT;

END;

RUN;

PROC PRINT;

RUN;


DATA DD(DROP=I);

DO I=1 TO 10;

Y=I+2;

OUTPUT;

END;

RUN;

PROC PRINT NOOBS;

RUN;



DATA EE;

DO I=1 TO 10;

Y=I+2;

OUTPUT;

END;

RUN;

PROC PRINT;

RUN;


--IF DECIMAL VALUES ARE THERE IN DATA USE 'BY' STATEMENT


DATA FF;

DO I=1 TO 5 BY 0.3;

Y=I*2;

END;

RUN;

PROC PRINT;

RUN;



DATA GG;

DO I=1 TO 5 BY 0.2345673;

Y=I*2;

OUTPUT;

END;

RUN;

PROC PRINT;

RUN;


DATA HH(DROP=RATE);

DO YEAR=2001 TO 2005;

BONUS+5000;

BONUS+(RATE*0.052);

END;

RUN;

PROC PRINT;

RUN;


DATA HH(DROP=RATE);

DO YEAR=2001 TO 2005;

BONUS+5000;

BONUS+(RATE*0.052);OUTPUT;

END;

RUN;

PROC PRINT;

RUN;


                   FOLLOW THE BLOG FOR MORE INFORMATION....

Sunday, 27 October 2024

16.SAS FUNCTIONS

                                              SAS FUNCTIONS


TO CHANGE THE CASE OF THE CHARACTER

--UPCASE:: VALUE WILL CAPITAL LETTERS

--LOWCASE:: VALUE WILL SMALL LETTERS

--PROPCASE:: FIRST LETTER WILL BE IN CAPITAL REMAINING ALL SMALL LETTERS

 

DATA CASE;

STR='THis Is SaS dAtA vaLUE';

AAA=UPCASE(STR);

BBB=LOWCASE(STR);

CCC=PROPCASE(STR);

RUN;

PROC PRINT;

RUN;


DATA CLASS;

SET SASHELP.CLASS;

NAME=LOWCASE(NAME);

RUN;

PROC PRINT;

RUN;



--TO EXTRACT THE PART OF THE STRING

 1.SUBSTR(VARIABLE,POSITION,LENGTH)

 2.CHAR


DATA STR;

STRING='RAMAVATHI RASAM INFOSYS HYD';

STR1=SUBSTR(STRING,3,3);

STR2=SUBSTR(STRING,5);

STR3=SUBSTR(STRING,5,7);

RUN;

PROC PRINT;

RUN;


DATA SS;

A='RANGARAJ HITECH CITY';

B=SUBSTR(A,4);

C=SUBSTR(A,7,3);

RUN;

PROC PRINT;

RUN;



DATA DD;

STR='KJHSDDGA ABDJGUY';

STR1=SUBSTR(STR,6,5);

RUN;

PROC PRINT;

RUN;


DATA SASS;

SET SASHELP.CLASS;

KEEP NAME;

NAME=SUBSTR(NAME,2,2); 

NAME=UPCASE(NAME);

RUN;

PROC PRINT;

RUN;



DATA CHR;

STTT='ABCD1223';

RED=CHAR(STTT,3);

RE=CHAR(STTT,6);

RUN;

PROC PRINT;

RUN;



--SCAN: TO EXTRACT STRING IN TO WORDS


--SCAN(VARIABLE,POSITION(LEFT TO RIGHT). 

   IF USING   '-'    -1   -2    (RIGHT TO LEFT)


DATA SCAN;

TRE='SAI PRIYANKA SUCHARITHA SANA';

WRS=SCAN(TRE,3);

RWQ=SCAN(TRE,2);

WWW=SCAN(TRE,-2);

EEE=SCAN(TRE,-4);

RUN;

PROC PRINT;

RUN;


DATA SCA1;

TRE='SAI PRIYANKA***SCHARITHA SANA';

WRS=SCAN(TRE,2,"**");

RWQ=SCAN(TRE,2);

RUN;

PROC PRINT;

RUN;


--TRIM : ITWILL REMOVE TRAILING BLANKS AND CONCATENATE AND GIVE THE VALUES


DATA TRIMMM;

STR="KRISH   ";

STR1="REDDY  ";

AA=TRIM(STR);

BB=TRIM(STR1);

CC=TRIM("K R I S H  ");

DD=TRIM("A  ") || ("BB  ");

RUN;

PROC PRINT;

RUN;


--STRIP : ITWILL REMOVE TRAILING AND LEADING BLANKS


DATA STRR;

STR="   KRISH   ";

STR1="    REDDY  ";

AA=STRIP(STR);

CC=STRIP(STR1);

BB=STRIP("  THIS IS SAS  ");

RUN;

PROC PRINT;

RUN;


FIND

DATA FINDD;

STRR='THIS IS SAS DATA VALUES';

RED=FIND(STRR,"IS");

ERD=FIND(STRR,"VALUES");

EDS=INDEX(STRR,"DATA");

RUN;

PROC PRINT;

RUN;


--FIND: MODIFIERS::IF THE DATA/STRING IN CASE SENSITIVE

DATA FINDD1;

STRR='THIS IS SAS DATA VALUES';

RED=FIND(STRR,"is",'i');

ERD=FIND(STRR,"VALUES");

EDS=FIND(STRR,"data",'i');

RUN;

PROC PRINT;

RUN;


--TRANWRD: REPLACING THE WORD

DATA TRANWDD;

STRE="MR ANIRUDH REDDY";

NME=TRANWRD(STRE,"MR","MISTER");

RUN;

PROC PRINT;RUN;


--TRANSLATE :REPLACING THE LETTER

DATA TRASLT;*(VAR,REPLACING LETTER,POSITION OF REP LETTER);

STRE="MR ANIRUDH REDDY";

NME=TRANSLATE(STRE,"A","D");

RUN;

PROC PRINT;

RUN;


--COMPBL:IT IS REMOVES MULTIPLE BLANKS

DATA CMPB;

STREE='PRIYA        REDDY';

VAR=COMPBL(STREE);

RUN;

PROC PRINT;

RUN;


--CATX: RETURNS A VALUE TO VARIABLE

DATA CNCT;

AA='PRIYA';

BB='REDDY';

CC=AA||BB;

DD=CATX("   ",AA,BB);

RUN;

PROC PRINT;

RUN;



NUMERIC FUNCTIONS: NUMERIC FUNCTIONS ARE CEIL,FLOOR,INT,ROUND.


DATA NUMRIC;

VALUE=4.6;

AA=CEIL(VALUE);

AA1=FLOOR(VALUE);

AA2=INT(VALUE);

AA3=ROUND(VALUE);

RUN;

PROC PRINT;

RUN;



Friday, 25 October 2024

15.RETAIN AND SUM STATEMENT

                           15.RETAIN AND SUM STATEMENT


1.CREATING AN ACCUMULATING(OVERALL) TOTAL VARIABLE


2.ACCUMULATING TOTAL FOR GROUPING THE DATA


    1.RETAIN STATEMENT

    2.SUM STATEMENT



    1.RETAIN(HOLDING THE VALUE) STATEMENT


-->IT WILL RETAIN VALUE OF VARIABLE IN PDV ACROSS THE ITERATION OF DATASET

-->IT WILL INITIALIZE THE RETAIN VALUE TO 'MISSING/ZERO' BEFORE THE FIRST EXECUTION

-->IF THERE IS A MISSING VALUE IN  VARIABLE ,ALL VARIABLE VALUES WILL BE MISSING



DATA SAS;

INPUT ID SALARY;

CARDS;

101 25000

102 26000

103 27000

104 18000

105 28000

106 23000

107 29000

108 30000

109 31000

110 34000

;

RUN;

PROC PRINT;

RUN;


DATA SWA;

SET SAS;

TOTAL=SALARY+TOTAL;

RUN;

PROC PRINT;

RUN;


DATA SWA11;

SET SAS;

TOTAL=0;

TOTAL=SALARY+TOTAL; *25000+0=25000 26000+0=26000;

RUN;

PROC PRINT;

RUN;


DATA S;

SET SASHELP.CLASS;

AGENEW=0;

TOTAL=AGE+AGENEW;

RUN;

PROC PRINT;

RUN;


TO AVOID THIS CONDITION USE 'RETAIN' STATEMENT 


DATA SWA12;

SET SAS;

RETAIN TOTAL 0;

TOTAL=SALARY+TOTAL; *25000+0=25000 25000+26000=51000;

RUN;

PROC PRINT;

RUN;


WHAT IF THE VALUES ARE MISSING IN  THE DATA???

--->ALL VARIABLE VALUES WILL BE MISSING


DATA SAS12;

INPUT ID SALARY;

CARDS;

101 25000

102 26000

103 27000

104   .  

105 28000

106 23000

107 29000

108 30000

109 31000

110 34000

;

RUN;

PROC PRINT;

RUN;


DATA EWSS;

SET SAS12;

RETAIN TOTAL 0;

TOTAL=TOTAL+SALARY;

RUN;

PROC PRINT;

RUN;



WHAT IF THE DATA MISSING AT FIRST OBS??

-->ALL VALUES ARE MISSING


DATA SAS12;

INPUT ID SALARY;

CARDS;

101   .  

102   .  

103 27000

104 28000

105 28000

106 23000

107 29000

108 30000

109 31000

110 34000

;

RUN;

PROC PRINT;

RUN;


DATA EWSS;

SET SAS12;

RETAIN TOTAL 0;

TOTAL=TOTAL+SALARY;

RUN;

PROC PRINT;

RUN;


2.SUM


-->"AUTOMATICALLY IT WILL RETAIN" THE VARIABLE VALUES

-->HERE IT WILL CREATE 'NEW VARIABLE' VALUE ON THE LEFT SIDE OF PLUS(+) SIGN

            *WHERE IF THEN SUM;

                    EX::TOTAL+SALARY

-->INITIALIZE THE VALUE TO ZERO

-->**IT WILL "IGNORES THE MISSING" VALUES AND GIVE TOTAL/SUM OF THE OBS/VALUES



DATA SAS;

INPUT ID SALARY;

CARDS;

101 25000

102 26000

103 27000

104 18000

105 28000

106 23000

107 29000

108 30000

109 31000

110 34000

;

RUN;

PROC PRINT;

RUN;


DATA SUMM;

SET SAS;

TOTAL+SALARY;

RUN;

PROC PRINT;

RUN;


WHAT IF MISSING VALUES ARE IN OBS?

-->IT WILL WRITE THE VALUE BY ADDING THE NEXT SUM


DATA SAS;

INPUT ID SALARY;

CARDS;

101 25000

102 26000

103 .    

104 18000

105 28000

106 23000

107 29000

108 30000

109 31000

110 34000

;

RUN;

PROC PRINT;

RUN;


DATA WQW;

SET SAS;

TOTAL+SALARY;

RUN;

PROC PRINT;

RUN;


DATA SAS122;

INPUT ID SALARY;

CARDS;

101   .  

102   .  

103 27000

104 28000

105 28000

106 23000

107 29000

108 30000

109 31000

110 34000

;

RUN;

PROC PRINT;

RUN;


DATA WQW1;

SET SAS122;

TOTAL+SALARY;

RUN;

PROC PRINT;

RUN;


DATA WE;

INPUT AGE AGEN;

CARDS;

11 2

12 3

13 2

14 4

15 2

;

RUN;

PROC PRINT;

RUN;


-->NEED TOTAL WITH SUM STATEMENT


DATA OIY;

SET WE;

TOTAL+AGE+AGEN;

TOTAL=SUM(OF AGE,AGEN);

RUN;

PROC PRINT;

RUN;


   PLEASE FOLLOW THE BLOG FOR MORE INFORMATION...


Thursday, 24 October 2024

14.PUT,FILE,TABULATE STATEMENTS

                             14.PUT,FILE,TABULATE STATEMENTS


--->WHAT IS THE DIFF B/W INPUT AND PUT  ?

INPUT : IT WRITES THE VARIABLES INFO IN SAS VARIABLES

PUT : IT WRITES VARIABLES FOR EXTERNAL ENVIRONMENT

            

--->WHAT IS THE DIFF B/W INFILE AND FILE ?

INFILE : IT IS USED FOR EXTERNAL DATASETS TO SAS ENVIRONMENT

FILE: IT IS USED PATH WHERE YOU WANT TO STORE THE RAW DATA


TO "READING" THE TEXT FILE INTO SAS::PROC IMPORT IN DATASTEP


DATA SAS;

INFILE "C:\Users\sumed\OneDrive\Desktop\CLASS.TXT";

INPUT NAME$ AGE GEND$ HT WT;

RUN;

PROC PRINT;

RUN;


TO "WRITE" SAS DATA TO EXTERNAL ENVIRONMENT:: PROC EXPORT IN DATASETP

--PUT:GIVE A VARIABLE INFO OF SAS DATASET

--FILE:WHERE WE CAN WRITE THE FILE/EXTERNAL FILE PATH


DATA SAS11;

SET SASHELP.CLASS;

FILE 'C:\Users\sumed\OneDrive\Desktop\CLASS12.txt';

PUT NAME AGE SEX HEIGHT WEIGHT;

RUN;

PROC PRINT;

RUN;


TO GET DIRECT OUTPUT OF TEXT FILE IN SAS ENVIRONMENT IN SEPARATE WINDOW?

--USE 'PROC FSLIST' 


PROC FSLIST FILEREF="C:\Users\OneDrive\Desktop\CLASS.TXT";

RUN;


PROC FSLIST FILEREF='C:\Users\OneDrive\Desktop\CLASS12.txt';

RUN;


PROC FSLIST FILEREF='C:\Users\Desktop\SALE.txt';

RUN;


********DIFF B/W PROC FREQ AND PROC TABULATE***********


-->FREQ:::IT WILL GIVE FREQ TABLES AND N-WAY TABLES


-->TABULATE:::IT WILL USED TO DISPLAY THE 'DESCRIPTIVE STATS' IN 'TABULAR FORMAT'


PROC FREQ DATA=SASHELP.CLASS;

TABLE SEX AGE;

RUN;



PROC TABULATE::::IT WILL USED TO DISPLAY THE 'DESCRIPTIVE STATS' 

                     IN 'TABULAR FORMAT'


PROC TABULATE DATA=SASHELP.CLASS;

CLASS SEX;

VAR AGE;

TABLE AGE SEX;

RUN;


PROC TABULATE DATA=SASHELP.CLASS;

CLASS SEX;

VAR AGE;

TABLE AGE;

RUN;


PROC TABULATE DATA=PROG1.SALES;

CLASS GENDER;

VAR SALARY;

TABLE SALARY GENDER;

RUN;


PROC TABULATE DATA=PROG1.SALES;

CLASS GENDER JOB_TITLE;

VAR SALARY;

TABLE SALARY*(N MEAN STD MIN MAX),GENDER,JOB_TITLE;

RUN;


--->TO GET DESCRIPTIVE STATS 


PROC TABULATE DATA=PROG1.SALES;

CLASS GENDER JOB_TITLE;

VAR SALARY;

TABLE SALARY*(N MEAN STD),GENDER,JOB_TITLE;

RUN;


--->TO GET THE TWO-WAY FREQ OUTPUT


PROC TABULATE DATA=PROG1.SALES;

CLASS GENDER JOB_TITLE;

VAR SALARY;

TABLE SALARY*(COLPCTN MAX ROWPCTN),GENDER,JOB_TITLE/BOX='MEAN OF SALARY';

RUN;


--->CHANGING THE LABELS OF EXISTING VARIABLES


PROC TABULATE DATA=PROG1.SALES;

CLASS GENDER;

VAR SALARY;

TABLE SALARY='EMP SALARY' GENDER='SEX' SALARY*MEAN='MEAN OF SALARY';

RUN;


PROC TABULATE DATA=SASHELP.CLASS;

CLASS SEX WEIGHT ;

VAR HEIGHT AGE;

TABLE HEIGHT*(N MEAN),SEX;

RUN;


           PLEASE FOLLOW THE BLOG AND ENCOURAGE US TO DO MORE...

                                      THANK  YOU FOR VISITING...

Wednesday, 23 October 2024

13.OUTPUT STATEMENT

                                            OUTPUT STATEMENT


CONTROLLING THE OUTPUT::::

--->HOW TO CREATE MULTIPLE OBSERVATIONS FROM 'ONE OBSERVATION'?

---> BY USING 'OUTPUT' STATEMENT         

FOR CALCULATING THE AGES FOR ONLY ONE YEAR


WHAT IF CALCULATING THE AGES FOR 5 YR?


1.FOR CALCULATING THE AGES FOR ONLY ONE YEAR


DATA SAS;

SET SASHELP.CLASS;

YEAR=1;

NEWAGE=AGE+1; *14+1=15 12+1=13 ;  

RUN;

PROC PRINT;

RUN;


2.WHAT IF CALCULATING THE AGES FOR 5 YR?

LAST OBSERVATION VALUE WILL COME IN THE OUTPUT


DATA SAS;

SET SASHELP.CLASS;

YEAR=1;

NEWAGE=AGE+1;

YEAR=2;

NEWAGE=AGE+2;

YEAR=3;

NEWAGE=AGE+3;

YEAR=4;

NEWAGE=AGE+4;

YEAR=5;

NEWAGE=AGE+5;

RUN;

PROC PRINT;

RUN;

----FOR PROPER OUTPUT USE 'OUTPUT' STATEMENT TO THE SYNTAX


DATA SAS;

SET SASHELP.CLASS;

YEAR=1;

NEWAGE=AGE+1;

OUTPUT;

YEAR=2; 

NEWAGE=AGE+2;

OUTPUT;

YEAR=3;

NEWAGE=AGE+3;

OUTPUT;

YEAR=4;

NEWAGE=AGE+4;

OUTPUT;

YEAR=5;

NEWAGE=AGE+5;

OUTPUT;

RUN;

PROC PRINT;

RUN;


DATA GROW;

SET PROG2.GROWTH;

MONTH=1;

TOTALVAL=NUMEMPS*(1+INCREASE); *220*1+0.075;

RUN;

PROC PRINT;

RUN;


DATA GROW;

SET PROG2.GROWTH;

MONTH=1;

TOTALVAL=NUMEMPS*(1+INCREASE);OUTPUT;

MONTH=2;

TOTALVAL=NUMEMPS*(2+INCREASE);OUTPUT;

MONTH=3;

TOTALVAL=NUMEMPS*(3+INCREASE);OUTPUT;

RUN;

PROC PRINT;

RUN;


DATA WEW;

SET SASHELP.CLASS;

IF AGE >14 THEN OUTPUT;

RUN;

PROC PRINT;

RUN;



WRITING MULTIPLE SAS DATASETS FROM(IN) ONE DATASTEP


DATA SUB;

INPUT ID NAME$ AGE SUBJECT$; 

CARDS;

101 ANIL 25 MATHS

102 HEMA 24 ENGLISH

103 KAMAL 26 PHARMA

104 ASHOK 25 MATHS

105 RAMYS 26 MECH

106 XXXXX 24 ENGLISH

107 YYYYY 26 PHARMA

108 ZZZZZ 24 MATHS

109 AAAAA 25 ENGLISH

110 BBBBB 28 MECH

111 CCCCC 26 MATHS

112 DDDDD 28 PHARMA

;

RUN;

PROC PRINT;

RUN;


DATA MATHS;

SET SUB;

IF SUBJECT='MATHS' THEN OUTPUT;

RUN;


DATA ENGLISH;

SET SUB;

IF SUBJECT='ENGLISH' THEN OUTPUT;

RUN;



DATA MATHS 

          ENGLISH 

          PHARMA 

          MECH;

SET SUB;

IF SUBJECT='MATHS' THEN OUTPUT MATHS;

ELSE IF SUBJECT='ENGLISH' THEN OUTPUT ENGLISH;

ELSE IF SUBJECT='PHARMA' THEN OUTPUT PHARMA;

ELSE IF SUBJECT='MECH' THEN OUTPUT MECH;

RUN;

PROC PRINT;

RUN;


NOTE:FOUR WAYS WE CAN USE THE "OUTPUT" STATEMENT


1.CREATING MULTIPLE OBSERVATION FROM ONE OBSERVATION


2.CREATING MULTIPLE DATASETS FROM ONE DATASTEP


3.SUBSETTING THE OBSERVATIONS BY "IF THEN OUTPUT"


4.CREATING THE NEW DATASET IN PROC MEANS


PLEASE FOLLOW THE BLOG AND SHARE IT...

Tuesday, 22 October 2024

12.LABELS AND FORMATS

                                    12. LABELS AND FORMATS


1.LABELS::: IT CHANGE THE APPEARANCE OF VARIABLES

EG::        STRTDT=====START DATE                                GENDER=====SEX


2.FORMATS::: TO "WRITE" THE DATA

3.INFORMATS::: TO "READ" THE DATA


2. SAS FORMATS:- ARE IN CHARACTER $,

                                                 NUMERIC &

                                                 DATE


   CHARACTER:             FIRST_NAME $2.                        

   NUMERIC:                   SALARY 15.2

                                         SALARY 12.2(25185.00) 

                                         COMMA12.2 (25,185.00)  

                                         COMMAX12.2 (25.185,00) 

                                         DOLLER12.2 ($25,185.00) 

                                         EURO12.2  (e25,185.00)

                                         EUROX12.2

                                          

3.DATE FORMATS:-

                                         12-03-2023

                                         DDMMYY6. 8. 10.(120323,12-03-23,12-03-2023)

                                        MMDDYY6. 8. 10.

                                        WORDDATE.   MARCH 12 2023

                                        WEEKDATE. MONDAT MARCH 12 2023

                                        DATE7. 12MAR23

                                        DATE9. 12MAR2023

                                        YEAR. 2023

                                        QTR. 1........4:1 qtr

                                        MONYY. MAR2023


DAY:  1 - 31 DAYS

MONTH: 1 - 12(4MONTHS --1 QTR(4 QTR)) 1234--1  5678--2   9101112--3

YEAR-1996 

WEEKDAY:  MONDAY-SUNDAY

QUARTERS: 1 - 4


DATA WEW;

SET SASHELP.CLASS;

LABEL NAME='STUDENT NAME'

              SEX='GENDER'

     AGE='STUDENT AGE';

RUN;

PROC PRINT DATA=WEW LABEL;

RUN;


PROC PRINT DATA=SASHELP.CLASS LABEL;

LABEL NAME='STUDENT NAME'

     AGE='STUDENT AGE';

RUN;



DATA SAS12;

SET PROG1.SALES;

WHERE COUNTRY='AU' AND JOB_TITLE CONTAINS 'Rep';

KEEP FIRST_NAME LAST_NAME SALARY JOB_TITLE HIRE_DATE;

LABEL JOB_TITLE='SALES EMP TITILE'

              HIRE_DATE='HAIRED DATE EMP'

      SALARY='EMP SALARY';

FORMAT FIRST_NAME $3. SALARY COMMA13.2;

RUN;

PROC PRINT DATA=SAS12 LABEL;

RUN;




DATA SAS1;

SET PROG1.SALES;

WHERE COUNTRY='AU' AND JOB_TITLE CONTAINS 'Rep';

KEEP FIRST_NAME LAST_NAME SALARY JOB_TITLE HIRE_DATE;

LABEL JOB_TITLE='SALES EMP TITILE'

             HIRE_DATE='HAIRED DATE EMP'

     SALARY='EMP SALARY';

FORMAT FIRST_NAME $4. SALARY COMMAX13.2;

RUN;

PROC PRINT DATA=SAS1 LABEL;

RUN;


DATA SAS22;

SET PROG1.SALES;

WHERE COUNTRY='AU' AND JOB_TITLE CONTAINS 'Rep';

KEEP FIRST_NAME LAST_NAME SALARY JOB_TITLE HIRE_DATE;

LABEL JOB_TITLE='SALES EMP TITILE'

              HIRE_DATE='HAIRED DATE EMP'

      SALARY='EMP SALARY';

FORMAT FIRST_NAME $4. SALARY DOLLAR13.2;

RUN;

PROC PRINT DATA=SAS22 LABEL;

RUN;


DATA SAS4;

SET PROG1.SALES;

WHERE COUNTRY='AU' AND JOB_TITLE CONTAINS 'Rep';

KEEP FIRST_NAME LAST_NAME SALARY JOB_TITLE HIRE_DATE;

LABEL JOB_TITLE='SALES EMP TITILE'

             HIRE_DATE='HAIRED DATE EMP'

    SALARY='EMP SALARY';

FORMAT FIRST_NAME $2. SALARY EUROX13.3;

RUN;

PROC PRINT DATA=SAS4 LABEL;

RUN;




DATA SAS5;

SET PROG1.SALES;

WHERE COUNTRY='AU' AND JOB_TITLE CONTAINS 'Rep';

KEEP FIRST_NAME LAST_NAME SALARY JOB_TITLE HIRE_DATE;

LABEL JOB_TITLE='SALES EMP TITILE'

             HIRE_DATE='HAIRED DATE EMP'

     SALARY='EMP SALARY';

FORMAT  HIRE_DATE DDMMYY6.;

RUN;

PROC PRINT DATA=SAS5 LABEL;

RUN;



DATA SAS6;

SET PROG1.SALES;

WHERE COUNTRY='AU' AND JOB_TITLE CONTAINS 'Rep';

KEEP FIRST_NAME LAST_NAME SALARY JOB_TITLE HIRE_DATE;

LABEL JOB_TITLE='SALES EMP TITILE'

             HIRE_DATE='HAIRED DATE EMP'

     SALARY='EMP SALARY';

FORMAT  HIRE_DATE DDMMYY8.;

RUN;

PROC PRINT DATA=SAS6 LABEL;

RUN;



DATA SAS7;

SET PROG1.SALES;

WHERE COUNTRY='AU' AND JOB_TITLE CONTAINS 'Rep';

KEEP FIRST_NAME LAST_NAME SALARY JOB_TITLE HIRE_DATE;

LABEL JOB_TITLE='SALES EMP TITILE'

             HIRE_DATE='HAIRED DATE EMP'

     SALARY='EMP SALARY';

FORMAT FIRST_NAME $4. SALARY DOLLAR13.2 HIRE_DATE DDMMYY10.;

RUN;

PROC PRINT DATA=SAS7 LABEL;

RUN;



DATA SAS8;

SET PROG1.SALES;

WHERE COUNTRY='AU' AND JOB_TITLE CONTAINS 'Rep';

KEEP FIRST_NAME LAST_NAME SALARY JOB_TITLE HIRE_DATE BIRTH_DATE;

LABEL JOB_TITLE='SALES EMP TITILE'

             HIRE_DATE='HAIRED DATE EMP'

    SALARY='EMP SALARY';

FORMAT  BIRTH_DATE HIRE_DATE WORDDATE.;

RUN;

PROC PRINT DATA=SAS8 LABEL;

RUN;



DATA SAS9;

SET PROG1.SALES;

WHERE COUNTRY='AU' AND JOB_TITLE CONTAINS 'Rep';

KEEP FIRST_NAME LAST_NAME SALARY JOB_TITLE HIRE_DATE BIRTH_DATE;

LABEL JOB_TITLE='SALES EMP TITILE'

              HIRE_DATE='HAIRED DATE EMP'

     SALARY='EMP SALARY';

FORMAT SALARY DOLLAR12.3 BIRTH_DATE HIRE_DATE WEEKDATE.;

RUN;

PROC PRINT DATA=SAS9 LABEL;

RUN;


DATA SAS33;

SET PROG1.SALES;

WHERE COUNTRY='AU' AND JOB_TITLE CONTAINS 'Rep';

KEEP FIRST_NAME LAST_NAME SALARY JOB_TITLE HIRE_DATE BIRTH_DATE;

LABEL JOB_TITLE='SALES EMP TITILE'

            HIRE_DATE='HAIRED DATE EMP'

    SALARY='EMP SALARY';

FORMAT BIRTH_DATE HIRE_DATE DATE7.;

RUN;

PROC PRINT DATA=SAS33 LABEL;

RUN;


DATA SAS44;

SET PROG1.SALES;

WHERE COUNTRY='AU' AND JOB_TITLE CONTAINS 'Rep';

KEEP FIRST_NAME LAST_NAME SALARY JOB_TITLE BIRTH_DATE HIRE_DATE;

LABEL JOB_TITLE='SALES EMP TITILE'

             HIRE_DATE='HAIRED DATE EMP'

     SALARY='EMP SALARY';

FORMAT BIRTH_DATE HIRE_DATE DATE9.;

RUN;

PROC PRINT DATA=SAS44 LABEL;

RUN;




DATA SAS44;

SET PROG1.SALES;

WHERE COUNTRY='AU' AND JOB_TITLE CONTAINS 'Rep';

KEEP FIRST_NAME LAST_NAME SALARY JOB_TITLE BIRTH_DATE HIRE_DATE;

LABEL JOB_TITLE='SALES EMP TITILE'

              HIRE_DATE='HAIRED DATE EMP'

     SALARY='EMP SALARY';

FORMAT BIRTH_DATE HIRE_DATE DATE11.;

RUN;

PROC PRINT DATA=SAS44 LABEL;

RUN;




DATA SAS;

SET PROG1.SALES;

WHERE COUNTRY='AU' AND JOB_TITLE CONTAINS 'Rep';

KEEP FIRST_NAME LAST_NAME SALARY JOB_TITLE HIRE_DATE;

LABEL JOB_TITLE='SALES EMP TITILE'

             HIRE_DATE='HAIRED DATE EMP'

     SALARY='EMP SALARY';

FORMAT  HIRE_DATE QTR.;

RUN;

PROC PRINT DATA=SAS LABEL;

RUN;




DATA SAS;

SET PROG1.SALES;

WHERE COUNTRY='AU' AND JOB_TITLE CONTAINS 'Rep';

KEEP FIRST_NAME LAST_NAME SALARY JOB_TITLE HIRE_DATE;

LABEL JOB_TITLE='SALES EMP TITILE'

             HIRE_DATE='HAIRED DATE EMP'

     SALARY='EMP SALARY';

FORMAT SALARY COMMAX10.2 HIRE_DATE DDMMYY10.;

RUN;

PROC PRINT DATA=SAS LABEL;

RUN;


DATA SAS;

SET PROG1.SALES;

WHERE COUNTRY='AU' AND JOB_TITLE CONTAINS 'Rep';

KEEP FIRST_NAME LAST_NAME SALARY JOB_TITLE HIRE_DATE;

LABEL JOB_TITLE='SALES EMP TITILE'

              HIRE_DATE='HAIRED DATE EMP'

      SALARY='EMP SALARY';

FORMAT SALARY EUROX12.5 HIRE_DATE DATE7.;

RUN;

PROC PRINT DATA=SAS LABEL;

RUN;


DATA SAS;

SET PROG1.SALES;

WHERE COUNTRY='AU' AND JOB_TITLE CONTAINS 'Rep';

KEEP FIRST_NAME LAST_NAME SALARY JOB_TITLE HIRE_DATE;

LABEL JOB_TITLE='SALES EMP TITILE'

             HIRE_DATE='HAIRED DATE EMP'

     SALARY='EMP SALARY';

FORMAT HIRE_DATE DATE9.;

RUN;

PROC PRINT DATA=SAS LABEL;

RUN;


DATA SAS;

SET PROG1.SALES;

KEEP FIRST_NAME LAST_NAME SALARY JOB_TITLE HIRE_DATE;

LABEL JOB_TITLE='SALES EMP TITILE'

             HIRE_DATE='HAIRED DATE EMP'

    SALARY='EMP SALARY';

FORMAT  HIRE_DATE WEEKDATE.;

RUN;

PROC PRINT DATA=SAS LABEL;

RUN;


DATA SAS;

SET PROG1.SALES;

KEEP FIRST_NAME LAST_NAME SALARY JOB_TITLE HIRE_DATE;

LABEL JOB_TITLE='SALES EMP TITILE'

             HIRE_DATE='HAIRED DATE EMP'

     SALARY='EMP SALARY';

FORMAT  HIRE_DATE WORDDATE.;

RUN;

PROC PRINT DATA=SAS LABEL;

RUN;


DATA SAS;

SET PROG1.SALES;

KEEP FIRST_NAME LAST_NAME SALARY JOB_TITLE HIRE_DATE;

LABEL JOB_TITLE='SALES EMP TITILE'

             HIRE_DATE='HAIRED DATE EMP'

     SALARY='EMP SALARY';

FORMAT  HIRE_DATE MONYY.;

RUN;

PROC PRINT DATA=SAS LABEL;

RUN;


PROC PRINT DATA=PROG1.SALES LABEL;

LABEL JOB_TITLE='SALE TITLE';

FORMAT BIRTH_DATE WEEKDATE.;

RUN;


PLEASE FOLLOW THE BLOG AND ENCOURAGE TO POST MORE ......


Monday, 21 October 2024

11.GCHART AND GPLOT

                                                     GCHART AND GPLOT

 

1.GCHART: PICTORIAL REPRESENTATION OF DATA

2.GPLOT: IT WILL PLOT THE GRAPHICAL REPRESENTATION OF DATA


1.GCHART: PICTORIAL REPRESENTATION OF DATA


PROC GCHART DATA=SASHELP.CLASS;

VBAR NAME AGE;

RUN;


PROC GCHART DATA=SASHELP.CLASS;

HBAR SEX AGE;

RUN;


PROC GCHART DATA=SASHELP.CLASS;

VBAR3D NAME AGE;

RUN;


PROC GCHART DATA=SASHELP.CLASS;

HBAR3D SEX AGE;

RUN;


PROC GCHART DATA=SASHELP.CLASS;

PIE NAME AGE;

RUN;


PROC GCHART DATA=SASHELP.CLASS;

PIE3D SEX AGE;

RUN;


2.GPLOT: IT WILL PLOT THE GRAPHICAL REPRESENTATION OF DATA


PROC GPLOT DATA=PROG1.SALES;

PLOT SALARY*GENDER/HAXIS=1 TO 12;

FORMAT SALARY DOLLAR12.2;

LABEL SALARY='YEAR SALARY';

SYMBOL V=DOT I=JOIN CV=RED CI=GREEN;  

RUN;



PROC GPLOT DATA=PROG1.BUDGET;

PLOT YR2004*MONTH YR2007*MONTH/OVERLAY HAXIS=1 TO 12

CFRAME="VERY LIGHT GRAY";

FORMAT YR2004 COMMA12.2;

LABEL YR2004='YEAR SALARY';

SYMBOL1 V=TRIANGLE I=JOIN CV=BLUE CI=PURPLE;

SYMBOL2 V=DOT I=JOIN CV=RED CI=YELLOW;

RUN;


PROC  GPLOT DATA=SASHELP.CLASS;

PLOT AGE*SEX;

SYMBOL V=DOT I=RL CI=GREEN;

RUN;





Sunday, 20 October 2024

10.COMBINING THE DATASETS

                                       COMBINING THE DATASETS


  ---> TWO DATASETS WILL BE COMBINED TOGETHER


  1.APPENDING::: VERTICALLY COMBINING 


  2.CONCATENATING:: HORIZONTALLY COMBINING 


  3.MERGING:: HORIZONTALLY COMBINING 


1.APPENDING::(VERTICALLY)-IT WILL ADD THE ALL OBSERVATIONS FROM 2ND DATASET 

                TO ORIGINAL(1ST) DATASET


--->I.FOR THE SAME VARIABLE INFO IN BOTH DATASETS


DATA AA;

INPUT NAME$ AGE GEND$ CLASS;

CARDS;

KARTHIK 25  M 8

TIRUMALA 26 M 9

HEMA     24 F 10

RAMYA    25 F 11

;

RUN;

PROC PRINT;

RUN;


DATA BB;

INPUT NAME$ AGE GEND$ CLASS;

CARDS;

RAM     24 M 6

RAJU    26 M 5

NIKITHA 24 F 8

SRAVANI 26 F 6

AJAYRAJ 27 M 7

;

RUN;

PROC PRINT;

RUN;


PROC APPEND BASE=AA

            DATA=BB;

RUN;


II.WHAT IF THE DATASETS HAVING DIFFERENT VARIABLE INFORMATION


DATA CC;

INPUT NAME$ AGE GEND$ CLASS;

CARDS;

KARTHIK 25  M 8

TIRUMALA 26 M 9

HEMA     24 F 10

RAMYA    25 F 11

;

RUN;

PROC PRINT;

RUN;


DATA DD;

INPUT NAME$ AGE GEND$ SECTION$;

CARDS;

RAM     24 M A

RAJU    26 M B

NIKITHA 24 F A

SRAVANI 26 F C

AJAYRAJ 27 M A

;

RUN;

PROC PRINT;

RUN;


PROC APPEND BASE=CC

            DATA=DD;

RUN;


USE "FORCE" OPTION TO COMBINE THE DATASET


PROC APPEND BASE=CC

            DATA=DD FORCE;

RUN;



  2.CONCATINATING(HORIZONTAL)::


--->IT WILL ADD ALL OBSERVATIONS AND VARIABLES FROM BOTH DATASETS INTO A 'NEW DATASET'


--->IF THE DIFFRENT VARIABLE ALSO THERE IT WILL CONCATINATE THE DATSETS



DATA EE;

INPUT NAME$ AGE GEND$ CLASS;

CARDS;

KARTHIK 25  M 8

TIRUMALA 26 M 9

HEMA     24 F 10

RAMYA    25 F 11

;

RUN;

PROC PRINT;

RUN;


DATA FF;

INPUT NAME$ AGE GEND$ CLASS;

CARDS;

RAM     24 M 6

RAJU    26 M 5

NIKITHA 24 F 8

SRAVANI 26 F 6

AJAYRAJ 27 M 7

;

RUN;

PROC PRINT;

RUN;


DATA CONCATINATE;

SET EE

    FF;

RUN;

PROC PRINT;

RUN;


--->WHAT IF THE DATASETS HAVING DIFFERENT VARIABLE INFO


DATA GG;

INPUT NAME$ AGE GEND$ CLASS;

CARDS;

KARTHIK  25  M 8

TIRUMALA 26 M  9

HEMA     24 F 10

RAMYA    25 F 11

;

RUN;

PROC PRINT;

RUN;


DATA HH;

INPUT NAME$ AGE GEND$ SECTION$;

CARDS;

RAM     24 M A

RAJU    26 M B

NIKITHA 24 F A

SRAVANI 26 F C

AJAYRAJ 27 M A

;

RUN;

PROC PRINT;

RUN;


DATA CONCAT;

SET GG

    HH;

RUN;

PROC PRINT;

RUN;


--->RENAME: WE CAN CHANGE THE NAME OF THE VARIABLE(RENAMING THE VAR NAME)


DATA CONCAT;

SET GG(RENAME=(AGE=NEWAGE))

    HH(RENAME=(GEND=SEX));

RUN;

PROC PRINT;

RUN;



3.MERGING::


--->WE CAN COMBINE TWO OR MORE SAS DATASETS INTO A NEW DATASET

---> BEFORE MERGING THE DATASET YOU NEED TO 'SORT' THE DATA 

          WITH 'BY' STATEMENT


DATA AA1;

INPUT ID NAME$ AGE GENDER$;

CARDS;

11 AAAA 25 M

44 BBBB 26 F

33 CCCC 27 M

22 DDDD 28 F

77 WWWW 25 M

55 AAAA 24 F

;

RUN;

PROC PRINT;

RUN;


PROC SORT DATA=AA1;

BY ID;

RUN;



DATA BB1;

INPUT ID NAME$ AGE CHAR$;

CARDS;

88 EEEE 23 F

66 FFFF 24 M

77 GGGG 25 M

55 HHHH 27 F

;

RUN;

PROC PRINT;

RUN;


PROC SORT DATA=BB1;

BY ID;

RUN;


DATA CC1;

INPUT ID NAME$ AGE CHAR$;

CARDS;

99 IIII 24 M

12 JJJJ 27 F

10 KKKK 26 F

11 LLLL 24 M

;

PROC PRINT;

RUN;


PROC SORT DATA=CC1;

BY ID;

RUN;


DATA MERGEE;

MERGE AA1

      BB1

  CC1;

BY ID;

RUN;

PROC PRINT;

RUN;




--->HOW TO SUBSET THE OBSERVATIONS IN MERGING(IN= OPTION)???

--->BY USING IN OPTION

--->WHAT IS MATCH MERGE???


DATA MATCH;

MERGE AA1(IN=XX)

      BB1(IN=YY);

BY ID;

AA1=XX;

BB1=YY;

RUN;

PROC PRINT;

RUN;


DATA MATCH2;

MERGE AA1(IN=XX)

      BB1(IN=YY);

BY ID;

AA1=XX;

BB1=YY;

IF XX=1;

RUN;

PROC PRINT;

RUN;


DATA MATCH22;

MERGE AA1(IN=XX)

      BB1(IN=YY);

BY ID;

AA1=XX;

BB1=YY;

IF YY=1;

RUN;

PROC PRINT;

RUN;



DATA MATCH2;

MERGE AA1(IN=XX)

      BB1(IN=YY);

BY ID;

AA1=XX;

BB1=YY;

IF XX=1 AND YY=1;

RUN;

PROC PRINT;

RUN;



*****SORT PROCEDURE*****;

--SORT THE DATA

--DEFAULT 'ASCENDING ORDER' AND IF WANT 'DESCENDING ORDER' THEN ADD 'DESCCENDING' BEFORE THE VARIABLE


DATA SORTT;

INPUT ID NAME$ SEX$;

CARDS;

11 AA M

33 BB M

22 CC F

66 DD M

44 EE F

66 FF M

44 EE F

22 CC M

;

RUN;

PROC PRINT;

RUN;


PROC SORT DATA=SORTT OUT=SORTED;

BY ID ;

RUN;


PROC SORT DATA=SORTT OUT=SORTED;

BY  DESCENDING ID;

RUN;


--->WHAT IS THE DIFFERENCE BETWEEN 'NODUP,NODUPKEY,DUPOUT'

--->TO REMOVE THE DUPLICATES FROM THE DATASET.

--->NODUP : IT WILL REMOVE ALL THE DUPLICATES


PROC SORT DATA=SORTED NODUP OUT=DUPLICATE;

BY ID;

RUN;


--->NODUPKEY :IT WILL COMPARE THE 'BY' STATEMENT AND REMOVE THE DUPLICATES


PROC SORT DATA=SORTED NODUPKEY OUT=DUP;

BY ID;

RUN;


--->DUPOUT: IS REMOVED DUPLICATES ARE STORED IN DATASET



PROC SORT DATA=SORTED NODUP DUPOUT=ALLDUP;

BY ID;

RUN;


PROC SORT DATA=SORTED NODUPKEY DUPOUT=ALLDUP2;

BY ID;

RUN;


Saturday, 19 October 2024

9.CREATING NEW VARIABLES

                           CREATING NEW VARIABLES

--->1.ASSIGNMENT STATEMENT (XXXX=)


--->2.CONDITIONAL STATEMENT (HAVE TO A CONDITION FOR VAR);


--->3.SUM STATEMENT(XXX+YYY)


--->1.ASSIGNMENT STATEMENT (XXXX=)


DATA SAS;

SET SASHELP.CLASS;

RUN;

PROC PRINT;

RUN;


DATA SAS;

SET SASHELP.CLASS;

NEWAGE=AGE;

gender=sex;

RUN;

PROC PRINT;

RUN;


--->3.SUM STATEMENT(XXX+YYY)

DATA SAS;

SET SASHELP.CLASS;

NEWAGE=AGE+2;         *11+2=13(NEWAGE);

RUN;

PROC PRINT;

RUN;


DATA SAS;

SET SASHELP.CLASS;

ANIL=AGE-2;

RUN;

PROC PRINT;

RUN;


DATA SAS;

SET SASHELP.CLASS;

ANIL=AGE*2;

RUN;

PROC PRINT;

RUN;


DATA SASAS;

SET PROG1.SALES;

BONUS=500;

TOTAL=SALARY+BONUS;*25000+500=25500;

RUN;

PROC PRINT;

RUN;


--->2.CONDITIONAL STATEMENT (HAVE TO A CONDITION FOR VARIABLES);

THESE ARE THE CONDITIONAL STATEMEMTS:

       1.IF THEN      2.IF THEN ELSE    3.IF THEN ELSE IF


       4.IF THEN DO   5.IF THEN DELETE    6.IF THEN OUTPUT;


1.IF THEN

DATA SAS;

SET PROG1.SALES;

IF COUNTRY='US' THEN BONUS=500;

RUN;

PROC PRINT;

RUN;


DATA SAS1;

SET PROG1.SALES;

IF COUNTRY='AU' THEN BONUS=300;

RUN;

PROC PRINT;

RUN;


2.IF THEN ELSE

DATA SAS2;

SET PROG1.SALES;

IF COUNTRY='US' THEN BONUS=500;

ELSE BONUS=300;

RUN;

PROC PRINT;

RUN;


3.IF THEN ELSE IF(AU(300),US(500),ASIA(800),UK(1000),EUROPE,AFRICA);

DATA SAS;

SET PROG1.SALES;

IF COUNTRY='US' THEN BONUS=500;

ELSE IF COUNTRY='AU' THEN BONUS=300;

ELSE IF COUNTRY='UK' THEN BONUS=580;

RUN;

PROC PRINT;

RUN;


DATA SAS;

SET PROG1.SALES;

IF COUNTRY='US' THEN BONUS=500;

ELSE IF COUNTRY='AU' THEN BONUS=300;

ELSE IF COUNTRY='UK' THEN BONUS=1000;

ELSE IF COUNTRY='IN' THEN BONUS=1200;

RUN;

PROC PRINT;

RUN;


DATA SAS333;

LENGTH ROLE $10;

SET PROG1.SALES;

IF SALARY <= 25000 THEN ROLE='FRESHER';

ELSE IF SALARY < 28000 AND 49999 THEN ROLE='PROGRAMMER';

ELSE IF SALARY > 50000 THEN ROLE='TL';

RUN;

PROC PRINT;

RUN;


4.IF THEN DELETE

--REMOVE THE GIVEN CONDITION


DATA SAS1;

SET PROG1.SALES;

IF COUNTRY='AU' THEN DELETE;

RUN;

PROC PRINT;

RUN;



DATA SAS1;

SET PROG1.SALES;

IF COUNTRY='IN' THEN DELETE;

RUN;

PROC PRINT;

RUN;


5.IF THEN OUTPUT

--GIVE THE OUTPUT OF THE GIVEN CONDITION


DATA SAS1;

SET PROG1.SALES;

IF COUNTRY='IN' THEN OUTPUT;

RUN;

PROC PRINT;

RUN;



DATA SAS121;

SET PROG1.SALES;

IF COUNTRY='UK' THEN OUTPUT;

RUN;

PROC PRINT;

RUN;



6.IF THEN DO STATEMENT:::


   ---IF YOU USING THE 'DO' STATEMENT NEED TO CLOSE THE DO BLOCK BY USING

                     'END' STATEMENT 


   ---WE CAN CREATE THE "MULTIPLE VARIABLES" IN "IF THEN DO" STATEMENT 

      WHICH IS NOT POSSIBLE IN "IF THEN ELSE IF"


DATA SA11S;

SET PROG1.SALES;

IF COUNTRY='US' THEN DO;

     BONUS=500;

    BONUSMNTH='ONCE IN YEAR';

    PACKAGE=4.5; 

END;

IF COUNTRY='AU' THEN DO;

     BONUS=300;

     BONUSMNTH='TWICE IN YEAR';

     PACKAGE=5.2;

END;

IF COUNTRY='IN' THEN DO;

     BONUS=1200;

     BONUSMNTH='THRICE IN YEAR';

     PACKAGE=8;

END;

RUN;

PROC PRINT;

RUN;


DATA SS;

KEEP STATUS DTHSTATUS;

SET SASHELP.HEART;

IF Status='Dead' THEN DTHSTATUS='YES';

ELSE DTHSTATUS='NO';

RUN;

PROC PRINT;

RUN;


NOTE:

        WHERE                                                                               IF


1.IT WILL WORK IN BOTH DATA STEP                1.IT WILL WORK IN ONLY DATASTEP

  AND PROC STEP                                                         NOT WORK IN PROC STEP


2.WE CAN'T CREATE NEW VARIABLE                2.WE CAN CREATE NEW VARIABLE


3.USE TO SUBSET THE OBSERVATIONS              3.USE TO SUBSET THE OBS BY                                                                                                                                      CONDITIONALLY


4.WORK BEFORE 'PDV'                                             4.WORK AFTER THE 'PDV'


PDV:PROGRAM DATA VECTOR



Friday, 18 October 2024

8.GLOBAL OPTIONS

                                CREATING USER(OWN) DEFINED FORMATS


--->BY USING PROC FORMAT:::WE CAN USE USER DEFINED FORMAT

              ****VALUE STATEMENT IS NEED IN USER DEFINED FORMAT

  -->IF CHARACTER---DOLLAR SYMBOL IS USED

  -->IF NUMERIC---NO NEED OF DOLLAR(LETTER/UNDERSCORE) SYMBOL

  -->CANNOT BE MORE THAN 32 CHARACTER


IF CHARACTER---DOLLAR SYMBOL

PROC FORMAT;

   VALUE $RAMU  'AU'='AUSTRALIA'

                                  'US'='UNITED STATES'

                          'OTHER'='MISSING'

        'IN'='INDIA';

RUN;


PROC PRINT DATA=PROG1.SALES;

VAR COUNTRY GENDER;

FORMAT COUNTRY $RAMU.     ;

RUN;


-->IF NUMERIC---NO NEED OF DOLLAR


PROC FORMAT;

VALUE  RANGA   25000-49999='JUNIOR LEVEL'

                                50000-99999='SENIOR LEVEL'

                            100000-250000='MANAGER LEVEL';

RUN;

 

PROC PRINT DATA=PROG1.SALES;

VAR COUNTRY GENDER SALARY;

FORMAT COUNTRY $RAMU. SALARY RANGA.;

RUN;


APPLY THE SAS FORMATS(NOT USER DEFINED)


PROC PRINT DATA=PROG1.SALES;

VAR COUNTRY GENDER SALARY;

FORMAT COUNTRY $1. SALARY DOLLAR13.2;

RUN;



                  GLOBAL STATEMENTS::::



-->THESE STATEMENTS YOU  CAN USE 'ANY WHERE IN SAS ENVIRONMENT'


1.OPTIONS


2.TITLES


3.FOOTNOTES


4.ODS(OUTPUT DELIVERY SYSTEM)::: FILE FORMATS

    

   -->HTML:: HYPER TEXT MARKUP LANGUAGE(OPEN IN WEB BROWSERS-              CHROME,FIREFOX,ME)


   -->PDF:: PORTABLE DOCUMENT FORMAT(OPEN IN ADOBE READER)


   -->RTF:: RICH TEXT FORMAT(OPEN IN MS-WORD)

  

   --> CSVALL:: COMMA SEPARATED VALUE (OPEN IN EXCEL)


1.OPTIONS


--->DATE(DEFAULT):: DISPLAY THE DATE AND TIME

--->NODATE:: DO NOT DISPLAY THE DATE AND TIME

--->NUMBER(DEFAULT):: PRINT THE PAGE NUMBER

--->NONUMBER:: WON'T PRINT THE PAGE NUMBER

--->CENTER(DEFAULT): OUTPUT WILL BE IN CENTER

--->NOCENTER:::OUTPUT WILL ALIGN TO LEFT SIDE

--->PAGE SIZE(PS=):: NUMBER OF LINES(15-32767)

--->LINE SIZE(LS=):: NUMBER OF LINE SIZE(64-256)



OPTIONS NODATE NONUMBER NOCENTER LS=70 PS=18;

PROC MEANS DATA=SASHELP.CLASS;

RUN;


IF U NEED ALL DEFAULT OPTIONS


OPTIONS DATE NUMBER CENTER;

PROC PRINT DATA=SASHELP.CLASS;

RUN;


2.TITLES::


->WILL APPEAR AT THE 'TOP OF THE PAGE'

->DEFAULT TITLE IS 'The SAS System'

->WE CAN GIVE 1-10 TITLES AT A TIME

->AN UN-NUMBERED TITLE IS EQUAL TO TITLE1


TITLE 'ALL STUDENTS DATA';

PROC MEANS DATA=SASHELP.CLASS;

TITLE 'DATA OF THE STUDENTS';

TITLE1 ' STUDENTS DATA';

TITLE2 'ALL  DATA';

TITLE3 'ALL STUDENTS ';

TITLE4 'ALL STUDENT DATA';

TITLE5 ' DATA';

TITLE6 'ALL STU DATA';

TITLE7 'ALL ENTS DATA';

TITLE8 'ALL TS DATA';

TITLE9 'ALL STUDENTS DATA';

TITLE10 'ATS DATA';

RUN;


DATA SAS;

SET SASHELP.CLASS;

TITLE1 ' STUDENTS DATA';

TITLE2 'ALL  DATA';

TITLE3 'ALL STUDENTS ';

TITLE4 'ALL STUDENTS DATA';

TITLE5 ' DATA';

RUN;

PROC PRINT;

RUN;


DATA SAS;

SET SASHELP.CLASS;

TITLE ' STUDENTS DATA';

RUN;

PROC PRINT;

RUN;

TO REMOVE THE TITLE : JUST WRITE TITLE AT THE END .

TITLE;  AND RUN THE PROGRAM


3.FOOTNOTES:::

->WILL APPEAR AT THE 'BOTTOM OF THE PAGE'

->WE CAN GIVE 1-10 FOOTNOTES AT A TIME

->AN UN-NUMBERED FOOTNOTES IS EQUAL TO FOOTNOTES1;


FOOTNOTE 'CLASS DATA OF ALL STUDENTS';

PROC SUMMARY DATA=SASHELP.CLASS PRINT;

FOOTNOTE 'COUNT OF THR OBS FROM SASHELP';

FOOTNOTE1 'COUNT THR OBS FROM SASHELP';

FOOTNOTE2 'COUNT OF FROM SASHELP';

FOOTNOTE3 'COUNT OF  OBS FROM SASHELP';

FOOTNOTE4 'COUNT OF THR OBS FROM SASHELP';

FOOTNOTE5 'COUNT OF THR  SASHELP';

FOOTNOTE6 'COUNT OF THR OBS FROM SASHELP';

FOOTNOTE7 'T OF THR OBS SASHELP';

FOOTNOTE8 'COUNT OF THR OBS M SASHELP';

RUN;


FOOTNOTE 'DDGYFHFYTDYFVTFTD';

PROC SUMMARY DATA=SASHELP.CLASS PRINT;

RUN;

TITLE 'COUNT OF OBSERVATIONS';


DATA SAS12;

SET SASHELP.CLASS;

FOOTNOTE 'COUNT OF THR OBS FROM SASHELP';

FOOTNOTE1 'COUNT THR OBS FROM SASHELP';

FOOTNOTE2 'COUNT OF FROM SASHELP';

FOOTNOTE3 'COUNT OF  OBS FROM SASHELP';

RUN;

PROC PRINT;

RUN;


PROC PRINT DATA=SASHELP.CLASS;

TITLE1 ' STUDENTS DATA';

TITLE2 'ALL  DATA';

TITLE3 'ALL STUDENTS ';

FOOTNOTE1 'COUNT THR OBS FROM SASHELP';

FOOTNOTE2 'COUNT OF FROM SASHELP';

FOOTNOTE3 'COUNT OF  OBS FROM SASHELP';

RUN;

TO DELETE THE FOOTNOTE JUST WRITE FOOTNOTE AND SEMICOLON RUN .

FOOTNOTE; RUN THE PROGRAM


4.ODS(OUTPUT DELIVERY SYSTEM)::: FILE FORMATS

    

   -->HTML:: HYPER TEXT MARKUP LANGUAGE(OPEN IN WEB BROWSERS-CHROME,FIREFOX,ME)


   -->PDF:: PORTABLE DOCUMENT FORMAT(OPEN IN ADOBE READER)


   -->RTF:: RICH TEXT FORMAT(OPEN IN MS-WORD)

     

    -->CSVALL:: COMMA SEPARATED VALUE (OPEN IN EXCEL)


ODS HTML FILE="C:\Users\sumed\OneDrive\Desktop\ODS\CARS.HTML";

PROC MEANS DATA=SASHELP.CARS;

RUN;

ODS HTML CLOSE;



ODS PDF FILE="C:\Users\sumed\OneDrive\Desktop\ODS\CARS12.PDF";

PROC MEANS DATA=SASHELP.CARS;

RUN;

ODS PDF CLOSE;


ODS RTF FILE="C:\Users\sumed\OneDrive\Desktop\ODS\CARS1233.RTF" STYLE=JOURNAL;

PROC MEANS DATA=SASHELP.CARS;

RUN;

ODS RTF CLOSE;



ODS CSVALL FILE="C:\Users\sumed\OneDrive\Desktop\ODS\CARS23.CSV" STYLE=JOURNAL;

PROC MEANS DATA=SASHELP.CARS;

RUN;

ODS CSVALL CLOSE;



ODS RTF FILE="C:\Users\sumed\OneDrive\Desktop\ODS\CLAS123.RTF" STYLE=OCEAN;

DATA SS;

INFILE "C:\Users\sumed\OneDrive\Desktop\SALE1.TXT";

INPUT NAME$ AGE GEND$ HT WT;

RUN;

PROC PRINT;

RUN;

ODS RTF CLOSE;


Thursday, 17 October 2024

7.VALIDATION AND CLEANING TECHNIQUES

           DATA VALIDATION AND DATA CLEANING:::


-->WE CAN CLEAN THE DATA OUTSIDE OF THE SAS ENVIRONMENT LIKE TEXT AND EXCEL FILE


-->EDIT THE RAW DATA BY USING RIGHT CLICK-----> "EDIT MODE"--->BROWSE MODE OPTION


-->BY PROGRAMATICALLY CAN WE CLEAN THE DATA?

YES WE CAN,

--->PROC PRINT:: BOTH CHARACTER  AND NUMERIC

--->PROC FREQ:: BOTH CHARACTER AND NUMERIC

--->PROC MEANS:: NUMERIC

--->PROC UNIVARIATE:: NUMERIC

--->PROC SUMMARY:: NUMERIC

EG1:

PROC PRINT DATA=SASHELP.CLASS;

WHERE NAME IS MISSING;

RUN;

EG2:

PROC PRINT DATA=SASHELP.CLASS NOOBS ;

WHERE SEX IS MISSING;

RUN;


--->PROC MEANS::: IT WILL GIVE THE "SUMMARY STATISTICS FOR ALL NON MISSING 

                        NUMERIC VARIABLES"


SYNTAX::

PROC MEANS DATA=XXXX.YYY;

RUN;

OPTIONS::::

---VAR    ---N  ---MEAN   ---STANDARD DEVIATION(STD)   --MIN   --MAX 

 --NMISS(NO OF MISSING NUMERICALS VALUES)  --OUTPUT OUT=(CREATING NEW DATAASET)


 DEFAULT OUTPUT::::WILL BE

"N MEAN STD MIN MAX" FOR ALL NON MISSING NUMERICAL DATA


1.WITHOUT "VAR" STATEMENT IT WILL STATISTICS FOR ALL NON MISSING NUMERICAL VARIABLES

EG1:

PROC MEANS DATA=SASHELP.CLASS;

RUN;

2.WITH "VAR" STATEMENT IT WILL STATISTICS FOR PARTICULAR NUMERICAL VARIABLES

EG2:

PROC MEANS DATA=SASHELP.CLASS;

VAR AGE HEIGHT;

RUN;

EG3:

PROC MEANS DATA=SASHELP.CLASS;

VAR AGE;

RUN; 

3.TO SUPPRESS/CONTROLLING THE DEFAULT OUTPUT OF MEANS????

     ---USE N MEAN STD MIN MAX AFTER THE DATASET

EG1:

PROC MEANS DATA=SASHELP.CLASS N STD;

RUN;

EG2:

PROC MEANS DATA=SASHELP.CLASS MIN MEAN;

VAR AGE;

RUN;

4.WHAT IF THE DATA HAVING THE MISSING VALUES IN NUMERICAL VARIABLES????

   ---USE THE "NMISS" AFTER THE DATASET

EG1:

PROC MEANS DATA=SASHELP.CLASS NMISS;

RUN;

EG2:

PROC MEANS DATA=SASHELP.CLASS NMISS;

VAR AGE HEIGHT;

RUN;

5.CREATING NEW DATASET IN PROC MEANS??

---USE OUTPUT OUT=

EG1:

PROC MEANS DATA=SASHELP.CLASS;

VAR AGE HEIGHT;

OUTPUT OUT=WORK.SAS1; * DEFAULT VAR WILL BE CREATED (_TPYE_ _FREQ_ _STATUS_);

RUN;


--->PROC SUMMARY:::NUMERIC

1. BY DEFAULT IT WILL GIVE "COUNT OF THE OBSERVATIONS"

2.IF WE USING THE "VAR" STATEMENT IN SUMMARY PROCEDURE "SAME OUTPUT LIKE PROC MEANS"

3."NO PRINT" IS DEFAULT SO HAVE TO GIVE "PRINT" AFTER THE DATASETNAME

SYNTAX:::

EG1:

PROC SUMMARY DATA=XXXX.YYY;

RUN;

--->WITHOUT "PRINT" OPTION NO OUTPUT

EG2:

PROC SUMMARY DATA=SASHELP.CLASS;

RUN;

--->WITH PRINT OPTION IT WILL GIVE A COUNT OF OBS

EG3:

PROC SUMMARY DATA=SASHELP.CLASS PRINT;

RUN;

EG4:

PROC SUMMARY DATA=SASHELP.HEART PRINT;

RUN;


2.IF WE USING THE "VAR" STATEMENT IN SUMMARY PROCEDURE "SAME OUTPUT LIKE MEANS"

EG1:

PROC SUMMARY DATA=SASHELP.CLASS PRINT N MEAN;

VAR AGE HEIGHT;

RUN;


---->PROC UNIVARIATE:::: NUMERIC

IT WILL GIVE EXTREME OBSERVATIONS,QUANTILES,BASIC STATS,TEST FOR LOCATIONS,MOMENTS


SYNTAX::

EG1:

PROC UNIVARIATE DATA=XXXX.YYYY;

VAR NUMERIC;

RUN;


WITHOUT VAR STATEMENT ALL NUMERIC GIVE ALL NUMERIC VARIABLES 

EG2:

PROC UNIVARIATE DATA=SASHELP.CLASS;

RUN;


WITH VAR IT WILL EXTREME OBS GIVE FOR PARTICULAR VAR

EG3:

PROC UNIVARIATE DATA=SASHELP.CLASS;

VAR AGE;

RUN;


--->PROC FREQ

-->FOR CHARACTER 

-->FOR NUMERIC


--->IT WILL GIVE 'FREQUENCY TABLES AND N-WAY FREQ TABLES' FOR THE VARIABLES


SYNTAX:::

EG1:

PROC FREQ DATA=XXXX.XXXX;

TABLE XXX XXXX XXX;

RUN;


DEFAULT OUTPUT::IT WILL GIVE 

1.FREQUENCY  2.PERCENTAGE  3.CUMULATIVE FREQUENCY  4.CUMULATIVE PERCENTAGE


WITH "TABLE" STATEMENT IT WILL GIVE FREQ TABLES FOR PARTICULAR VARIABLES

EG1:

PROC FREQ DATA=SASHELP.CLASS;

TABLE NAME HEIGHT;

RUN;


WITHOUT "TABLE" STATEMENT IT WILL GIVE FREQ TABLES FOR ALL VARIABLES

EG2:

PROC FREQ DATA=SASHELP.CLASS;

RUN;


NLEVELS:::GIVE THE LEVELS/COUNT OF OBS FOR VARIABLES

EG3:

PROC FREQ DATA=SASHELP.CLASS NLEVELS;

TABLE NAME SEX AGE;

RUN;

EG4:

PROC FREQ DATA=SASHELP.CLASS NLEVELS;

TABLE NAME SEX;

RUN;

EG5:

PROC FREQ DATA=PROG1.SALES NLEVELS;

TABLE JOB_TITLE GENDER;

RUN;

EG6:

PROC FREQ DATA=PROG1.NONSALES NLEVELS;

TABLE JOB_TITLE GENDER;

RUN;


SUPPRESS/REMOVE THE DEFAULT OUTPUT:SO WE CAN ADD NOPERCENT NOCUM 

EG7:

PROC FREQ DATA=SASHELP.CLASS;

TABLE NAME HEIGHT/ NOPERCENT;

RUN;


WITH SPACE IN B/W THE TWO VARIABLES CALLED ONE-WAY FREQUENCY

EG8:

PROC FREQ DATA=SASHELP.CLASS;

TABLE NAME HEIGHT;

RUN;


ONE WAY:: SPACE B/W THE TWO VARIABLES

EG9:

PROC FREQ DATA=SASHELP.HEART;

TABLE STATUS SEX;

RUN;


TWO WAY:: ASTRICK *  B/W THE TWO VARIABLES

EG10:

PROC FREQ DATA=SASHELP.HEART;

TABLE STATUS*SEX; *DEFAULT OUTPUT:: FREQ PERCENT ROWPCT COLPCT;

RUN;


THREE WAY:: 2 ASTRICK IN B/W THREE VAR

EG11:

PROC FREQ DATA=SASHELP.HEART;

TABLE STATUS*SEX*AgeAtStart;

RUN;


TWO-WAY FREQ(ASTRICK(*) IN B/W TWO VARIABLES  OUTPUT IS 

--->OUTPUT:: 1.FREQ 2.PERCENT 3.ROW PER 4.COL PER

EG12:

PROC FREQ DATA=SASHELP.CLASS;

TABLE NAME*HEIGHT;

RUN;


THREE-WAY FREQ

EG13:

PROC FREQ DATA=SASHELP.CLASS;

TABLE NAME*HEIGHT*SEX;

RUN;


SUPPRESSING/REMOVING THE  DEFAULT OUTPUT

EG14:

PROC FREQ DATA=SASHELP.CLASS;

TABLE NAME*HEIGHT/NOROW;

RUN;

EG15:

PROC FREQ DATA=SASHELP.CLASS;

TABLE NAME*HEIGHT/NOCOL;

RUN;


Wednesday, 16 October 2024

6.SUBSETTING OBSERVATIONS AND VARIABLES

                SUBSETTING/CONTROLLING OBSERVATIONS AND VARIABLE


FOR OBSERVATIONS:::TO SUBSET OBSERVATION THERE ARE STATEMENTS LIKE 

         1.WHERE

  2.IF THEN 

  3.IF THEN ELSE

  4.FIRST OBS AND OBS

  5.FIRST. AND LAST.      ETC


FOR VARIABLES::::TO SUBSET VARIABLES THERE ARE STATEMENTS LIKE

        1.KEEP:INCLUDING THE VARIABLES

2.DROP:EXCLUDING THE VARIABLES


 1.WHERE:

 -->WE CAN USE CHARACTER AND NUMERIC DATA

 -->WHERE DATA WILL BE 'CASE SENSITIVE'(A a)

 -->FOR CHARACTER OBSERVATION NEED '    '  (QUOTES)

 -->FOR NUMERIC NO NEED TO GIVE ANY QUOTES

 --> WHERE STATEMENT WILL WORK IN 'DATA' STEP AND 'PROC'STEP*/;

EG1:

DATA SAS;

SET SASHELP.CLASS;

WHERE SEX='M';

RUN;

PROC PRINT DATA=SAS;

RUN;

--->WHERE STATEMENT IS CASE SENSITIVE SO IF ANY CASE SENSITIVE CODE IN SYNTAX IT WILL SHOW ERROR.

EG2:

DATA SAS;

SET SASHELP.CLASS;

WHERE SEX='m';

RUN;

PROC PRINT DATA=SAS;

RUN;


WHERE OPERATORS::THERE ARE OPERATORS LIKE

1.COMPARISON OPERATOR:

  --EQUAL                                      EQ            =

  --NOT EQUAL                             NE          ^=

  --GREATER THAN                     GT            >

  --LESSER THAN                        LT             <

  --GREATER THAN EQUAL      GE            >=

  --LESSER THEN  EQUAL          LE           <=

  --  IN OPERATOR


2.ARTHEMATIC OPERATOR:


---   *   MULTIPLICATION

---   /   DIVISION

---   +   ADDITION

---   -   SUBSTRACTION


3.LOGICAL OPERATOR::


--    &  AND

--    |  OR

--   ^  NOT


4.SPECIAL WHERE OPERATOR::


--BETWEEN-AND

--IS NULL 

--IS MISSING

--CONTAINS


1.COMPARISON OPERATOR:

EG1:

DATA SAS1111;

SET SASHELP.CLASS;

WHERE sex="F";

RUN;

EG2:

DATA SAS1;

SET SASHELP.CLASS;

WHERE AGE >14;

RUN;

PROC PRINT;

RUN;

EG3:

DATA SAS2;

SET SASHELP.CLASS;

WHERE AGE <14;

RUN;

PROC PRINT;

RUN;

EG4:

DATA SAS3;

SET SASHELP.CLASS;

WHERE AGE <=14;

RUN;

PROC PRINT;

RUN;

EG5:

DATA SALES;

SET PROG1.SALES;

WHERE COUNTRY IN('AU','IN');

RUN;

PROC PRINT;

RUN;

EG6:

DATA SALES;

SET PROG1.SALES;

WHERE SALARY NE .;

RUN;

PROC PRINT;

RUN;


2.ARTHEMATIC OPERATOR:

EG1:

DATA SALE;

SET PROG1.SALES;

WHERE SALARY / 12 *1.10 >=7500;

RUN;

PROC PRINT;

RUN;

EG2:

DATA SALE2;

SET PROG1.SALES;

WHERE (SALARY / 12) *1.10 >=7500; 

RUN;

PROC PRINT;

RUN;


3.LOGICAL OPERATOR::

EG1:

DATA SALE22;

SET PROG1.SALES;

WHERE GENDER NE 'M' AND SALARY >=50000; 

RUN;

PROC PRINT;

RUN;

EG2:

DATA SALE33;

SET PROG1.SALES;

WHERE GENDER NE 'F' OR SALARY >=50000; 

RUN;

PROC PRINT;

RUN;

EG3:

DATA SALE;

SET PROG1.SALES;

WHERE COUNTRY='US' OR COUNTRY='AU'; 

RUN;

PROC PRINT;

RUN;

EG4:

DATA SALE44;

SET PROG1.SALES;

WHERE COUNTRY NOT IN ('US'); 

RUN;

PROC PRINT;

RUN;


4.SPECIAL WHERE OPERATOR::

EG1:

DATA SALE66;

SET PROG1.SALES;

WHERE SALARY BETWEEN 50000 AND 100000;

RUN;

PROC PRINT;

RUN;

EG2:

DATA SALE55;

SET PROG1.SALES;

WHERE SALARY NOT BETWEEN 50000 AND 100000; 

RUN;

PROC PRINT;

RUN;

EG3:

DATA SALE77;

SET PROG1.SALES;

WHERE SALARY IS NULL; 

RUN;

PROC PRINT;

RUN;

EG4:

DATA SALE775;

SET PROG1.SALES;

WHERE GENDER IS MISSING; 

RUN;

EG5:

DATA SALE123;

SET PROG1.SALES;

WHERE JOB_TITLE CONTAINS 'Manager'; 

RUN;

PROC PRINT;

RUN;

EG6:

DATA SALE90;

SET PROG1.SALES;

WHERE JOB_TITLE CONTAINS 'Officer'; 

RUN;

EG7:

DATA SALE901;

SET PROG1.SALES;

WHERE JOB_TITLE CONTAINS 'Rep'; 

RUN;


FOR VARIABLES::::TO SUBSET THE VARIABLES THERE ARE STATEMENTS LIKE

          1.KEEP:INCLUDING THE VARIABLES

  2.DROP:EXCLUDING THE VARIABLES


HERE KEEP AND DROP WILL WORK IN DATA STEP NOT IN 'PROC STEP'


1.KEEP:INCLUDING THE VARIABLES

EG1:

DATA SAS125;

SET SASHELP.CLASS;

KEEP NAME AGE;

RUN;

EG2:

DATA SAS09;

SET SASHELP.CLASS;

drop NAME AGE;

RUN;

PROC PRINT;

RUN;


2.DROP:EXCLUDING THE VARIABLES

EG1:

DATA SAS301;

SET SASHELP.CARS;

DROP MAKE MODEL MSRP;

RUN;

EG2:

DATA SAS322;

SET SASHELP.CARS;

keep MAKE MODEL MSRP;

RUN;

PROC PRINT;

RUN;

EG3:

DATA WW;

SET SASHELP.CLASS;

KEEP AGE SEX;

WHERE AGE > 13;

RUN;

EG4:

DATA EWE;

KEEP       N R U;

INPUT     N R U I O Y P Y;

CARDS;

1 2 3 4 5 6 7 8

9 8 7 6 5 4 3 2

1 2 3 4 5 6 7 8

9 8 7 6 5 4 3 2

1 2 3 4 5 6 7 8

9 8 7 6 5 4 3 2

1 2 3 4 5 6 7 8

9 8 7 6 5 4 3 2

;

RUN;



********  statments are working in DATASTEP AND PROC STEP;


DATA SS;

SET;

INFILE;

INPUT;

KEEP;

DROP;

WHERE;

RUN;

more......


PROC PRINT;

SET;

INFILE;

INPUT;

KEEP;

DROP;

WHERE;

VAR;

RUN;


PROC PRINT DATA=SASHELP.CLASS;

VAR AGE SEX;

WHERE AGE > 15;

RUN;




PROC PRINT DATA=SASHELP.HEART;

VAR SYSTOLIC DIASTOLIC;

WHERE DIASTOLIC > 89;

RUN;


Tuesday, 15 October 2024

5.IMPORT AND EXPORT OF DATASETS IN SAS

 PROC PRINT::

-->FOR TO PRINT THE ALL OBSERVATIONS AND VARIABLES FROM DATASETS

-->TO GET THE OUTPUT RESULTS OF DATASET


DATA SAS;

SET SASHELP.CLASS;

RUN;

PROC PRINT DATA=SAS;

RUN;


OPTIONS USED IN PROC PRINT:::

--VAR:: INCLUDING THE VARIABLES

--NOOBS:: REMOVE/SUPPRESS THE OBSERVATIONS COUNT NUMBER

--WHERE::TO SUBSET THE OBSERVATIONS

--TITLE::TO ENHACE THE DATASETS

--FOOTNOTES::TO ENHANCE THE DATASETS

--LABELS:;TO ENHANCE THE DATASETS

--FORMATS::TO CONTROLLING THE VALUES IN DATASETS


EG: PROC PRINT DATA=PROG1.COUNTRY;

       RUN;


USING OF 'VAR' STATEMENT::

EG1:

PROC PRINT DATA=SASHELP.CLASS;

VAR AGE NAME;

RUN;

EG2:

PROC PRINT DATA=SASHELP.CLASS;

VAR SEX;

RUN;


FOR SUPPRESSING/ELIMINATING THE OBSERVATIONS NUMBER*/

-->USE 'NOOBS' OPTION

PROC PRINT DATA=SASHELP.CLASS NOOBS;

VAR AGE NAME;

RUN;


PRINTING THE VAR BY USING ONE STARTING LETTER::

WE CAN ALSO USE ONLY ONE LETTER OF VARIABLES IN THE SYNTAX

EG1:

PROC PRINT DATA=SASHELP.CLASS;

VAR W:  ;

RUN;

EG2:

PROC PRINT DATA=SASHELP.CLASS;

VAR N:   ;

RUN;

EG3:

PROC PRINT DATA=SASHELP.CLASS;

VAR S:  ;

RUN;


DATASETS ACCESSING BY IMPORT AND EXPORT METHOD:::


1.PROC IMPORT

2.PROC EXPORT

3.PROC PRINT

4.PROC CONTENTS


4.PROC CONTENTS:::

-->   TO DISPLAY THE 'DESCRIPTOR(DESCRIBE THE SAS DATASET) PORTION' OF THE SAS DATASET

-->DATASET NAME,NUMBER OF OBSERVATIONS AND VARIABLES,LENGTH,FORMATS, INFORMATS,FILE, LOCATION ETC.

EG1:

PROC CONTENTS DATA=SASHELP.CLASS;

RUN;

EG2:

PROC CONTENTS DATA=SASHELP.HEART;

RUN;


NOTE:PROC PRINT AND CONTENTS CAN USE  FOR NEWLY CREATED DATASET/EXISTING DATASET


IMPORT AND EXPORT METHODS:::

WE CAN IMPORT AND EXPORT THE DATA BY TWO METHODS 

1.HARD CODE

2.SETUP WIZARD 


1. IMPORTING DATA::: WE CAN IMPORT THE EXCEL,TEXT AND OTHER FILES INTO                                                         'SAS ENVIRONMENT'

--->>>EXCEL FILE WILL CONVERT INTO 'RAW DATASET'

HARDCODE METHOD:::

EG1:

PROC IMPORT DATAFILE="STORAGE PATH"

             OUT=SQL.CLASS1

DBMS=XLS

REPLACE;

RUN;

EG2:

PROC IMPORT DATAFILE="C:\Users\sumed\OneDrive\Desktop\CLASS11.txt"

            OUT=SQL.BBBB

REPLACE;

RUN;


PROC IMPORT DATAFILE="C:\Users\sumed\OneDrive\Desktop\HEART.xls"

            OUT=WORK.HEART

DBMS=XLS

REPLACE;

RUN;

SETUP WIZARD METHOD:::

FIRST CLICK ON THE FILE OPTION AND SELECT THE IMPORT METHOD..

  1. Select Microsoft Excel on PC Files Server and click Next.
    Import Wizard First Panel
  2. Click Browse in the Connect to Ms Excel dialog box.
    Import Wizard Second Panel
  3. In the Files of type drop-down list, select the file extension of the Excel file:
    Select All Files
  4. Navigate to the location of the Excel file and click Open.
    Selection Menu
  5. The Import Wizard - Select table dialog box gives you a drop-down list of worksheets or tables to read. From the drop-down list, select the table that you want to read.
    Table Selection Menu
    Selecting the Table
  6. Click Options to bring up the Excel Options dialog box. These options are set by default. If you have datetime data in a column of your workbook (such as 01jan2008:08:13:22), deselect the Use DATE.format for a Date/Time column check box. Otherwise, you get only the date. You can also adjust the 1024 setting to capture column text lengths in your spreadsheet that are longer than 1024 characters. The maximum value is 32,767 characters. Click OK to close the Excel Options dialog box. Then, back on the Import Wizard - Select table dialog box, click Next.
    Options Button and Window
  7. Now select the location to store the Excel worksheet in a SAS data set. Choose the SAS libref in which you want to store the SAS data set. If you want the data set to be removed after you exit SAS, select the default WORK library. If you want the data set to remain after you exit SAS, then select another library, such as SASUSER. If the SAS data set does not exist, enter the name in the Member field of the panel. If the SAS data set already exists, choose the member to replace and click Next.
    image label
  8. The next dialog box, Import Wizard - Create SAS statements, enables you to create your own SAS PROC IMPORT code. You can do either of two things: a) Enter the full location of where you want to store the SAS code as shown in the following image:
    image label
9.Click On The Finish..

2.EXPORTING DATA:::: WE ARE EXPORTING THE RAW DATA(SAS ENVIRONMENT) TO OTHER/EXTERNAL ENVIRONMENT

 -->>RAW DATASET WILL BE CONVERTED AS EXCEL FILE

HARDCODE METHOD:::

EG1::

PROC EXPORT DATA=PROG2.DAYSALES

           OUTFILE="C:\Users\sumed\OneDrive\Desktop\DAYSALE.xls"

           DBMS=XLS 

   REPLACE;

RUN;

EG2:

PROC EXPORT DATA=SASHELP.CARS

            OUTFILE="C:\Users\sumed\OneDrive\Desktop\BOT JULY EDTORS\PROG 1\CARS.XLS"

            DBMS=XLS

           REPLACE;

RUN;

SETUP WIZARD METHOD:::

FIRST CLICK ON THE FILE OPTION AND SELECT THE EXPORT METHOD..

  1. Select File ►Export Data. A Windows dialog box will appear.

    SAS dataset selection box

    In this Windows dialog box select your SAS library libref and then select the member of the library, which is the SAS dataset. Always select the SAS library libref first. In this example we selected the SASHELP library libref and then selected a SAS dataset associated with the libref. After this is complete select the Next button.

    Select Export Type dialog box will appear as shown below:

    image label

  2. Do not select Microsoft Excel Workbook. This is for 64-bit Office.
  3. Select Microsoft Excel on PC Files Server, as shown in this image, and click Next.

    Connect to Excel dialog box will appear:

    image label

  4. There you will see a location for entering the Excel filename that you wish to export, please read the entire section here before proceeding.
  5. If the Excel file already exists you can browse to its location using the browse dialog box. The Excel file can be either a XLS, XLSX, or XLSB file.
  6. If the Excel file does not already exist you must type in the full directory path and the Excel filename you want to create. You cannot use the Browse button. You can copy the directory path using Windows Explorer.
  7. Any new Excel file must have a .XLSB extension on it. It cannot have any other extension type. This is a limitation of the Microsoft Excel ODBC driver when creating new Excel files. If you need to have a different extension, open Excel and save a default XLSX file or XLS file that SAS can use.

Using the XLSB extension

After you have entered in the name of the Excel file click on the OK and proceed to the table selection panel.

Table Selection Panel

This panel allows you to enter the name of the worksheet that you wish to add to your Excel table. If the worksheet already exists you can select the down arrow on the right to select the sheet. If the sheet does not already exist or the table does not already exist then simply type the name of the worksheet and click either Next or Finish. If you click the next button you will have the opportunity for the wizard to create a SAS program for you. If you click Finish you will proceed beyond the next panel and you are done. In this case click the Next and you will see the next panel.

Create SAS Statements

8.Click On The Finish..























4.CREATING NEW DATASETS

       CREATING NEW DATASETS

1.DATA AND SET METHOD:: ACCESSING THE EXISTING LIB AND DATASET

2.DATA INPUT AND DATALINES/CARDS METHOD:: FOR CREATING THE OWN DATASET

3.DATA INFILE AND INPUT METHOD:: EXTERNAL FILES INTON SAS DATASET


1.DATA AND SET METHOD:: ACCESSING THE EXISTING LIB AND DATASET

DATA RAMU;

  SET SASHELP.CLASS;

RUN;


DATA RAMU01;

SET WORK.RAMU;

RUN;



/*IF YOU ARE USING ONLY DATA STEP::: STORE IN THE WORK LIB*/

/*IF YOU ARE USING DATA STEP AND PROC PRINT STEP:: STORE IN LIBRARY AND GIVE OUTPUT IN OUTPUT WINDOW*/

/*TO CREATE THE OUTPUT OR PRINTING THE ALL OBSERVATIONS  AND VARIABLES FROM THE DATASET*/

/*----USE "PROC PRINT" */

/**/

/*SYNTAX::*/

/**/

/*PROC PRINT DATA=XXXX(LIB).YYYY(DSN);*/

/*RUN;*/


DATA RAMU01;

SET WORK.RAMU;

RUN;

PROC PRINT DATA=WORK.RAMU01;

RUN;

/**/




/*--TO CONTROL THE VARIABLES IN PROC PRINT*/

/*    **USE "VAR(VARIABLE)" STATEMENT */


/*TO SUPRESS/REMOVE THE OBS NUMBER::: USE "NOOBS" OPTION*/


PROC PRINT DATA=WORK.RAMU01 NOOBS;

VAR NAME AGE;

RUN;


PROC PRINT DATA=ADB.ADSL;

VAR SEX AGE RACE;

RUN;


2.DATA INPUT AND DATALINES/CARDS METHOD:: FOR CREATING THE OWN DATASET


/*      ----INPUT:: GIVE "VARIABLE" INFORMATION*/

/*  ----FOR THE CHARACTER VARIABLE::: USE "DOLLAR($)" SYMBOL:: FOR                                      CHARACTER IF THERE IS NO DATA THEN IT SHOW BLANK SPACE( )*/

/* ----FOR THE NUMERICAL VARIABLE:: NO NEED OF DOLLAR($) SIGN::  IF THERE IS                   NO DATA IT SHOWS DOT(.) SYMBOL */

/*   ---- DATALINES/CARDS::: GIVE THE "OBSERVATION" INFORMATION*/


DATA RAMU02;

INPUT ID NAME$ AGE GENDER$ HEIGHT;

DATALINES;

101 RAGHU 24 M 5

102 PADMA 23 F 5

103 VISHWA 24 M 6

104 RAMYA 24 F 5

;

RUN;

PROC PRINT DATA=WORK.RAMU02;

RUN;



/*WHAT IF NOT USING THE DOLLAR SYMBOL FOR THE CHARACTER VARIABLES?????*/


DATA SAS1;

INPUT ID NAME AGE$ GENDER$ HEIGHT;

DATALINES;

101 RAGHU 24 M 5

102 PADMA 23 F 5

103 VISHWA 24 M 6

104 RAMYA 24 F 5

;

RUN;

PROC PRINT DATA=WORK.SAS1;

RUN;



/*IF THE OBSERVATIONS VALUE HAS MORE LENGTH::: USE "LENGTH" STATEMENT*/


DATA SAS2;

LENGTH NAME$ 35;

INPUT NAME$ AGE GENDER$;

CARDS;

PADMAVATHIIDDCR 24 F

RANGARAOBHUPATHIHYDERBAD 23 M

;

RUN;

PROC PRINT;

RUN;



3.DATA INFILE AND INPUT METHOD:: EXTERNAL FILES---DATASET( 4 SCENARIO'S)


1.DEFAULT BLANK/SPACES:: INFILE AND INPUT

2.COMMA/ANY SPECIAL CHARACTERS:::: DLM=','

3.DATA MISSNG IN B/W THE RECORD:::: DSD

4.DATA MISSING IN B/W AND IN LAST RECORD:::: DSD MISSOVER*/


/*----INFILE::: WILL GIVE THE FILE PATH OF THE TEXT FILE*/


/*----INPUT:: GIVE "VARIABLE" INFORMATION*/*/

/* --FOR THE CHARACTER VARIABLE::: USE "DOLLAR($)" SYMBOL:: BLANK SPACE( )*/

/*--FOR THE NUMERICAL VARIABLE:: NO NEED OF DOLLAR($)::     DOT(.) */;


1.BY DEFAULT THE DATA IS SEPARATED WITH "SPACE/BLANK"


DATA RAGHU;

INFILE "C:\Users\sumed\OneDrive\Desktop\SS.txt";

INPUT NAME$ AGE GENDER$ HEIGHT;

RUN;

PROC PRINT;

RUN;


2.WHAT IF THE DATA IS SEPARATED WITH COMMA OR ANY OTHER SPECIAL CHARACTER(@#$%&*)


**** USE THE DLM(DELIMETER @#$^%&*) " DLM=',' OPTION IN INFILE STATEMENT;


DATA RAGHU01;

INFILE "C:\Users\sumed\OneDrive\Desktop\SS1.txt" DLM=',';

INPUT NAME$ AGE GENDER$ HEIGHT;

RUN;

PROC PRINT;

RUN;


3. WHAT IF THE DATA IS MISSING IN B/W THE RECORD/FILE???

/******USE "DSD" OPTION (DELIMITED SENSITIVE DATA) IN INFILE STATEMENT */

DATA RAGHU02;

INFILE "C:\Users\sumed\OneDrive\Desktop\SS2.txt" DSD;

INPUT NAME$ AGE GENDER$ HEIGHT;

RUN;

PROC PRINT;

RUN;


4. WHAT IF THE DATA IS MISSING IN THE LAST RECORD/LAST OBSERVATIONS??

/**** USE DSD AND MISSOVER*/

/*/*MISSOVER:WORKS ONLY FOR MISSING END VALUES*/*/;


DATA RAGHU3;

INFILE "C:\Users\sumed\OneDrive\Desktop\SS3.txt" DSD MISSOVER;

INPUT NAME$ AGE GENDER$ HEIGHT;

RUN;

PROC PRINT;

RUN;


DATA SAS; 

SET SASHELP.CLASS;

RUN;

PROC PRINT;

RUN;