151.SAS QUESTIONS - 2

                              SAS QUESTIONS - 2


 DATA SALES_DATA;

 INPUT Region $ Product $ Sales;

 DATALINES;

North A 1000

North B 1500

South A 2000

South C 2500

East B 1800

West A 2200

;

RUN;

PROC  PRINT;RUN;


/*Question 1: Given the dataset sales_data below, calculate the total sales for each Region using PROC SQL.*/


/*Hint: Use the GROUP BY clause in PROC SQL to aggregate sales by region.*/

PROC SQL;

 SELECT REGION,SUM(SALES) AS TOTAL

  FROM SALES_DATA

   GROUP BY REGION;

QUIT;



DATA EMPLOYEE_DATA;

 INPUT EmployeeID Name $ Salary;

 DATALINES;

1 John 55000

2 Jane 62000

3 Jim 58000

4 Jack 60000

5 Jill 59000

;

RUN;

PROC PRINT;RUN;


/*Question 2: Create a macro named %TopN that accepts a dataset name, variable name,and a number N, and returns the top N observations sorted by the specified variable in descending order. Test it on the employee_data dataset:*/


/*Hint: Within the macro, use PROC SORT with the OUTOBS= option to limit the number of observations.*/

%MACRO NUM1(DATA=,VAR=,N=);

PROC SORT DATA=&DATA OUT=SORTDATA;

 BY DESCENDING &VAR;

RUN;


DATA TOP&N;

 SET SORTDATA;

 IF _N_ > &N THEN STOP;

RUN;


PROC PRINT DATA=TOP&N;

RUN;

%MEND;


%NUM1(DATA=EMPLOYEE_DATA,VAR=SALARY,N=2)


/*ALTERNATE METHOD*/

%MACRO NUM(DATA,VAR,N,VARI);

 PROC SQL OUTOBS=&N;

  SELECT &VAR

   FROM &DATA

    ORDER BY SALARY DESC;

 QUIT;

%MEND;


%NUM(EMPLOYEE_DATA,*,3)



DATA STUDENT;

 INPUT StudentID Name $ Score;

 DATALINES;

1 Alice 85

2 Bob 92

3 Charlie 88

4 David 95

5 Eva 90

;

RUN;

PROC PRINT;RUN;


/*Question 3: Given the dataset student_scores, write a PROC SQL query to find the second highest score.*/


/*Hint: Use a subquery to identify the maximum score less than the overall maximum.*/


PROC SQL;

SELECT MAX(SCORE) AS MAX

 FROM STUDENT 

  WHERE SCORE < (SELECT MAX(SCORE)

                  FROM STUDENT);

QUIT;



/*ALTERNATE METHOD-1 */

/* Step 1: Sort the data in descending order */

proc sql;

    create table sorted_scores as

    select *

    from student

    order by Score desc;

quit;


/* Step 2: Select the second highest score */

proc sql outobs=1;

    select Score

    from sorted_scores

    where monotonic() = 2;

quit;


data _null_;

    set sorted_scores;

    if _N_ = 2 then do;

        call symput('second_highest_score', Score);

        stop;

    end;

run;


%put The second highest score is &second_highest_score.;


/*ALTERNATE METHOD-2 */


    /* Step 1: Determine the overall maximum score */

proc sql;

    select max(Score) into :max_score

    from studenT;


    /* Step 2: Identify the maximum score less than the overall maximum */

    select max(Score) as SecondHighestScore

    from student

    where Score < &max_score;

quit;



DATA SURVEY;

 INPUT ID Age Height Weight;

    datalines;

1 25 . 70

2 . 68 160

3 30 70 .

4 22 65 130

;

RUN;

PROC PRINT;RUN;


/*Question 4: Create a macro %MissingReport that takes a dataset name as input and generates a report of the number of missing values for each variable.Test it on the survey dataset:*/


/*Hint: Use PROC MEANS with the NMISS option to count missing values.*/


%MACRO MISSINGREPORT(DATA,OPT);

PROC MEANS DATA=&DATA &OPT;

RUN;

%MEND;


%MISSINGREPORT(SURVEY,NMISS)



/*ALTERNATE METHOD-1 */

%MACRO MISS(DATA);

PROC MEANS DATA=&DATA N NMISS;

 VAR _NUMERIC_;

RUN;


PROC FORMAT;

 VALUE $MISS_FMT   " " = "MISSING" 

                  OTHER= "NON MISSING";

RUN;


PROC FREQ DATA=&DATA;

 FORMAT _CHAR_ $MISS_FMT.;

 TABLES _CHAR_ / MISSING MISSPRINT NOCUM NOPERCENT;

RUN;

%MEND;


%MISS(SURVEY)




DATA TRANSACTIONS;

 INPUT TransactionID CustomerID Amount;

 DATALINES;

1 101 50

2 102 75

3 101 100

4 103 60

5 102 80

;

RUN;

PROC PRINT;RUN;


/*Question 5: Given the dataset transactions, write a PROC SQL query to identify customers who have made more than one purchase.*/


/*Hint: Use the HAVING clause with a COUNT function to filter customers with multiple transactions.*/


PROC SQL;

 SELECT CUSTOMERID

  FROM TRANSACTIONS

   GROUP BY CUSTOMERID

    HAVING COUNT(*) > 1;

QUIT;



DATA PROLIST;

 INPUT ProductID $ ProductName $ Price;

 DATALINES;

P001 Widget 25.99

P002 Gizmo 15.49

P003 Gadget 22.75

;

RUN;

PROC PRINT;RUN;


/*Question 6: Create a macro %VarType that accepts a dataset name and a variable name, and prints whether the variable is numeric or character.Test it on the product_list dataset:*/


/*Hint: Use PROC CONTENTS to retrieve variable metadata and the VARTYPE function to determine the variable type.*/


%macro VarType(dataset=, var=);

    %local dsid varnum vartype rc;


    /* Open the dataset */

    %let dsid = %sysfunc(open(&dataset, i));

    %if &dsid = 0 %then %do;

        %put ERROR: Dataset &dataset could not be opened.;

        %return;

    %end;


    /* Get the variable number */

    %let varnum = %sysfunc(varnum(&dsid, &var));

    %if &varnum = 0 %then %do;

        %put ERROR: Variable &var not found in dataset &dataset.;

        %let rc = %sysfunc(close(&dsid));

        %return;

    %end;


    /* Get the variable type */

    %let vartype = %sysfunc(vartype(&dsid, &varnum));

    %if &vartype = C %then %do;

        %put NOTE: Variable &var in dataset &dataset is of type CHARACTER.;

    %end;

    %else %if &vartype = N %then %do;

        %put NOTE: Variable &var in dataset &dataset is of type NUMERIC.;

    %end;

    %else %do;

        %put ERROR: Unable to determine the type of variable &var in dataset &dataset.;

    %end;


    /* Close the dataset */

    %let rc = %sysfunc(close(&dsid));

%mend VarType;


%VarType(dataset=prolist, var=ProductID);

%VarType(dataset=prolist, var=ProductName);

%VarType(dataset=prolist, var=Price);



DATA ATTENDANCE;

 INPUT Class $ Date :date9. Attendance;

 FORMAT DATE DATE9.;

 DATALINES;

Math 01JAN2021 30

Math 02JAN2021 28

Science 01JAN2021 25

Science 02JAN2021 27

History 01JAN2021 20

History 02JAN2021 22

;

RUN;

PROC PRINT;RUN;



/*Question 7: Given the dataset attendance, write a PROC SQL query to calculate the average attendance per Class.*/


/*Hint: Use the AVG function in PROC SQL to compute the average attendance grouped by class.*/


PROC SQL;

 SELECT CLASS,AVG(ATTENDANCE) AS ATTENDANCE

  FROM ATTENDANCE

   GROUP BY CLASS;

QUIT;



DATA ORDERS;

 INPUT OrderID CustomerID ProductID;

 DATALINES;

1 1001 2001

2 1002 2002

3 1001 2003

4 1003 2001

5 1002 2002

;

RUN;

PROC PRINT;RUN;


/*Question 8: Create a macro %DistinctCount that takes a dataset name and a variable name, and returns the number of distinct values for that variable.Test it on the orders dataset:*/


/*Hint: Use the DISTINCT keyword in PROC SQL to count unique values.*/


PROC SQL;

 SELECT COUNT(DISTINCT *)

  FROM ORDERS;

QUIT;


%MACRO DISTINCTCOUNT(DATA,VAR);

%LOCAL DSID VARNUM RC;


/*OPEN THE DATASET*/

%LET DSID = %SYSFUNC(OPEN(&DATA));


/*CHECK IF DATA EXIXTS*/

%IF &DSID %THEN %DO;

/*GET VARIABLE NUMBER*/

%LET VARNUM=%SYSFUNC(VARNUM(&DSID,&VAR));


/*CHECK IF VARIABLE EXISTS*/

%IF &VARNUM %THEN %DO;

/*CLOSE THE DATASET */

%LET RC =%SYSFUNC(CLOSE(&DSID));


/*EXECUTE PROC SQL DISTINCT COUNT*/


PROC SQL;

 SELECT COUNT(DISTINCT &VAR) INTO :DISTNICT_COUNT

 FROM &DATA;

QUIT;


/*DISPLAY THE RESULT*/

%PUT THE NUMBER OF DISTINCT VALUES IN &VAR IS &DISTINCT_COUNT.;

%END;

%ELSE %DO;

%PUT  ERROR:VARIABLE &VAR DOES NOT EXIST IN THE DATASET &DATA.;

%LET RC =%SYSFUNC(CLOSE(&DSID));

%END;

%END;

%ELSE %DO;

 %PUT  ERROR:DATASET &DATA DOES NOT EXIST.;

%END;

%MEND;


%DISTINCTCOUNT(ORDERS,CUSTOMERID)


%DISTINCTCOUNT(ORDERS,PRODUCTID)



DATA INVENTORY;

 INPUT ProductID $ ProductName $ StockLevel;

 DATALINES;

P001 Widget 150

P002 Gizmo 80

P003 Gadget 120

P004 Doohickey 60

;

RUN;

PROC PRINT;RUN;



/*Question 9: Given the dataset inventory, write a PROC SQL query to list products with a stock level below the average stock level.*/


/*Hint: Calculate the average stock level using a subquery and compare each product's stock level against it.*/


PROC SQL;

 SELECT *

  FROM INVENTORY

   WHERE STOCKLEVEL < (SELECT MEAN(STOCKLEVEL) FROM INVENTORY);

QUIT;


DATA DATASET1;

 INPUT ID NAME$;

 DATALINES;

1 John

2 Jane

3 Jim

;

RUN;


DATA DATASET2;

 INPUT ID AGE;

 DATALINES;

1 28

2 32

4 25

;

RUN;


/*Question 10: Create a macro %MergeDatasets that accepts two dataset names and merges them by a common variable ID. Test it on the dataset1 and dataset2 datasets:*/

%MACRO OLD(DATA1,DATA2,OUT);

PROC SORT DATA=&DATA1;

 BY ID;

RUN;


PROC SORT DATA=&DATA2;

 BY ID;

RUN;


DATA NEW;

 MERGE &DATA1 (IN=A)

       &DATA2 (IN=B);

 BY ID;

 IN_DATA1=A;

 IN_DATA2=B;

RUN;

%MEND;

%OLD(DATASET1,DATASET2,NEW);

PROC PRINT DATA=NEW;RUN;


PRACTICE AND COMMENT YOUR CODE: 

-->PLEASE FOLLOW OUR BLOG FOR MORE UPDATES.


TO FOLLOW OUR TELEGRAM CHANNEL CLICK HERE

Comments