SAS QUESTIONS - 2
DATA SALES_DATA;
INPUT Region $ Product $ Sales;
DATALINES;
North A 1000
North B 1500
South A 2000
South C 2500
East B 1800
West A 2200
;
RUN;
PROC PRINT;RUN;
/*Question 1: Given the dataset sales_data below, calculate the total sales for each Region using PROC SQL.*/
/*Hint: Use the GROUP BY clause in PROC SQL to aggregate sales by region.*/
PROC SQL;
SELECT REGION,SUM(SALES) AS TOTAL
FROM SALES_DATA
GROUP BY REGION;
QUIT;
DATA EMPLOYEE_DATA;
INPUT EmployeeID Name $ Salary;
DATALINES;
1 John 55000
2 Jane 62000
3 Jim 58000
4 Jack 60000
5 Jill 59000
;
RUN;
PROC PRINT;RUN;
/*Question 2: Create a macro named %TopN that accepts a dataset name, variable name,and a number N, and returns the top N observations sorted by the specified variable in descending order. Test it on the employee_data dataset:*/
/*Hint: Within the macro, use PROC SORT with the OUTOBS= option to limit the number of observations.*/
%MACRO NUM1(DATA=,VAR=,N=);
PROC SORT DATA=&DATA OUT=SORTDATA;
BY DESCENDING &VAR;
RUN;
DATA TOP&N;
SET SORTDATA;
IF _N_ > &N THEN STOP;
RUN;
PROC PRINT DATA=TOP&N;
RUN;
%MEND;
%NUM1(DATA=EMPLOYEE_DATA,VAR=SALARY,N=2)
/*ALTERNATE METHOD*/
%MACRO NUM(DATA,VAR,N,VARI);
PROC SQL OUTOBS=&N;
SELECT &VAR
FROM &DATA
ORDER BY SALARY DESC;
QUIT;
%MEND;
%NUM(EMPLOYEE_DATA,*,3)
DATA STUDENT;
INPUT StudentID Name $ Score;
DATALINES;
1 Alice 85
2 Bob 92
3 Charlie 88
4 David 95
5 Eva 90
;
RUN;
PROC PRINT;RUN;
/*Question 3: Given the dataset student_scores, write a PROC SQL query to find the second highest score.*/
/*Hint: Use a subquery to identify the maximum score less than the overall maximum.*/
PROC SQL;
SELECT MAX(SCORE) AS MAX
FROM STUDENT
WHERE SCORE < (SELECT MAX(SCORE)
FROM STUDENT);
QUIT;
/*ALTERNATE METHOD-1 */
/* Step 1: Sort the data in descending order */
proc sql;
create table sorted_scores as
select *
from student
order by Score desc;
quit;
/* Step 2: Select the second highest score */
proc sql outobs=1;
select Score
from sorted_scores
where monotonic() = 2;
quit;
data _null_;
set sorted_scores;
if _N_ = 2 then do;
call symput('second_highest_score', Score);
stop;
end;
run;
%put The second highest score is &second_highest_score.;
/*ALTERNATE METHOD-2 */
/* Step 1: Determine the overall maximum score */
proc sql;
select max(Score) into :max_score
from studenT;
/* Step 2: Identify the maximum score less than the overall maximum */
select max(Score) as SecondHighestScore
from student
where Score < &max_score;
quit;
DATA SURVEY;
INPUT ID Age Height Weight;
datalines;
1 25 . 70
2 . 68 160
3 30 70 .
4 22 65 130
;
RUN;
PROC PRINT;RUN;
/*Question 4: Create a macro %MissingReport that takes a dataset name as input and generates a report of the number of missing values for each variable.Test it on the survey dataset:*/
/*Hint: Use PROC MEANS with the NMISS option to count missing values.*/
%MACRO MISSINGREPORT(DATA,OPT);
PROC MEANS DATA=&DATA &OPT;
RUN;
%MEND;
%MISSINGREPORT(SURVEY,NMISS)
/*ALTERNATE METHOD-1 */
%MACRO MISS(DATA);
PROC MEANS DATA=&DATA N NMISS;
VAR _NUMERIC_;
RUN;
PROC FORMAT;
VALUE $MISS_FMT " " = "MISSING"
OTHER= "NON MISSING";
RUN;
PROC FREQ DATA=&DATA;
FORMAT _CHAR_ $MISS_FMT.;
TABLES _CHAR_ / MISSING MISSPRINT NOCUM NOPERCENT;
RUN;
%MEND;
%MISS(SURVEY)
DATA TRANSACTIONS;
INPUT TransactionID CustomerID Amount;
DATALINES;
1 101 50
2 102 75
3 101 100
4 103 60
5 102 80
;
RUN;
PROC PRINT;RUN;
/*Question 5: Given the dataset transactions, write a PROC SQL query to identify customers who have made more than one purchase.*/
/*Hint: Use the HAVING clause with a COUNT function to filter customers with multiple transactions.*/
PROC SQL;
SELECT CUSTOMERID
FROM TRANSACTIONS
GROUP BY CUSTOMERID
HAVING COUNT(*) > 1;
QUIT;
DATA PROLIST;
INPUT ProductID $ ProductName $ Price;
DATALINES;
P001 Widget 25.99
P002 Gizmo 15.49
P003 Gadget 22.75
;
RUN;
PROC PRINT;RUN;
/*Question 6: Create a macro %VarType that accepts a dataset name and a variable name, and prints whether the variable is numeric or character.Test it on the product_list dataset:*/
/*Hint: Use PROC CONTENTS to retrieve variable metadata and the VARTYPE function to determine the variable type.*/
%macro VarType(dataset=, var=);
%local dsid varnum vartype rc;
/* Open the dataset */
%let dsid = %sysfunc(open(&dataset, i));
%if &dsid = 0 %then %do;
%put ERROR: Dataset &dataset could not be opened.;
%return;
%end;
/* Get the variable number */
%let varnum = %sysfunc(varnum(&dsid, &var));
%if &varnum = 0 %then %do;
%put ERROR: Variable &var not found in dataset &dataset.;
%let rc = %sysfunc(close(&dsid));
%return;
%end;
/* Get the variable type */
%let vartype = %sysfunc(vartype(&dsid, &varnum));
%if &vartype = C %then %do;
%put NOTE: Variable &var in dataset &dataset is of type CHARACTER.;
%end;
%else %if &vartype = N %then %do;
%put NOTE: Variable &var in dataset &dataset is of type NUMERIC.;
%end;
%else %do;
%put ERROR: Unable to determine the type of variable &var in dataset &dataset.;
%end;
/* Close the dataset */
%let rc = %sysfunc(close(&dsid));
%mend VarType;
%VarType(dataset=prolist, var=ProductID);
%VarType(dataset=prolist, var=ProductName);
%VarType(dataset=prolist, var=Price);
DATA ATTENDANCE;
INPUT Class $ Date :date9. Attendance;
FORMAT DATE DATE9.;
DATALINES;
Math 01JAN2021 30
Math 02JAN2021 28
Science 01JAN2021 25
Science 02JAN2021 27
History 01JAN2021 20
History 02JAN2021 22
;
RUN;
PROC PRINT;RUN;
/*Question 7: Given the dataset attendance, write a PROC SQL query to calculate the average attendance per Class.*/
/*Hint: Use the AVG function in PROC SQL to compute the average attendance grouped by class.*/
PROC SQL;
SELECT CLASS,AVG(ATTENDANCE) AS ATTENDANCE
FROM ATTENDANCE
GROUP BY CLASS;
QUIT;
DATA ORDERS;
INPUT OrderID CustomerID ProductID;
DATALINES;
1 1001 2001
2 1002 2002
3 1001 2003
4 1003 2001
5 1002 2002
;
RUN;
PROC PRINT;RUN;
/*Question 8: Create a macro %DistinctCount that takes a dataset name and a variable name, and returns the number of distinct values for that variable.Test it on the orders dataset:*/
/*Hint: Use the DISTINCT keyword in PROC SQL to count unique values.*/
PROC SQL;
SELECT COUNT(DISTINCT *)
FROM ORDERS;
QUIT;
%MACRO DISTINCTCOUNT(DATA,VAR);
%LOCAL DSID VARNUM RC;
/*OPEN THE DATASET*/
%LET DSID = %SYSFUNC(OPEN(&DATA));
/*CHECK IF DATA EXIXTS*/
%IF &DSID %THEN %DO;
/*GET VARIABLE NUMBER*/
%LET VARNUM=%SYSFUNC(VARNUM(&DSID,&VAR));
/*CHECK IF VARIABLE EXISTS*/
%IF &VARNUM %THEN %DO;
/*CLOSE THE DATASET */
%LET RC =%SYSFUNC(CLOSE(&DSID));
/*EXECUTE PROC SQL DISTINCT COUNT*/
PROC SQL;
SELECT COUNT(DISTINCT &VAR) INTO :DISTNICT_COUNT
FROM &DATA;
QUIT;
/*DISPLAY THE RESULT*/
%PUT THE NUMBER OF DISTINCT VALUES IN &VAR IS &DISTINCT_COUNT.;
%END;
%ELSE %DO;
%PUT ERROR:VARIABLE &VAR DOES NOT EXIST IN THE DATASET &DATA.;
%LET RC =%SYSFUNC(CLOSE(&DSID));
%END;
%END;
%ELSE %DO;
%PUT ERROR:DATASET &DATA DOES NOT EXIST.;
%END;
%MEND;
%DISTINCTCOUNT(ORDERS,CUSTOMERID)
%DISTINCTCOUNT(ORDERS,PRODUCTID)
DATA INVENTORY;
INPUT ProductID $ ProductName $ StockLevel;
DATALINES;
P001 Widget 150
P002 Gizmo 80
P003 Gadget 120
P004 Doohickey 60
;
RUN;
PROC PRINT;RUN;
/*Question 9: Given the dataset inventory, write a PROC SQL query to list products with a stock level below the average stock level.*/
/*Hint: Calculate the average stock level using a subquery and compare each product's stock level against it.*/
PROC SQL;
SELECT *
FROM INVENTORY
WHERE STOCKLEVEL < (SELECT MEAN(STOCKLEVEL) FROM INVENTORY);
QUIT;
DATA DATASET1;
INPUT ID NAME$;
DATALINES;
1 John
2 Jane
3 Jim
;
RUN;
DATA DATASET2;
INPUT ID AGE;
DATALINES;
1 28
2 32
4 25
;
RUN;
/*Question 10: Create a macro %MergeDatasets that accepts two dataset names and merges them by a common variable ID. Test it on the dataset1 and dataset2 datasets:*/
%MACRO OLD(DATA1,DATA2,OUT);
PROC SORT DATA=&DATA1;
BY ID;
RUN;
PROC SORT DATA=&DATA2;
BY ID;
RUN;
DATA NEW;
MERGE &DATA1 (IN=A)
&DATA2 (IN=B);
BY ID;
IN_DATA1=A;
IN_DATA2=B;
RUN;
%MEND;
%OLD(DATASET1,DATASET2,NEW);
PROC PRINT DATA=NEW;RUN;
PRACTICE AND COMMENT YOUR CODE:
-->PLEASE FOLLOW OUR BLOG FOR MORE UPDATES.
TO FOLLOW OUR TELEGRAM CHANNEL CLICK HERE
Comments
Post a Comment