133.NEW EXAMPLE EMPLOYEES : INPUT | PROC MEANS | PROC SORT | PROC FREQ |PROC REPORT | PROC SGPLOT | PROC TABULATE | PROC FREQ CHISQ | PROC EXPORT | PROC CORR | PROC SQL PROCEDURES

NEW EXAMPLE EMPLOYEES : INPUT  | PROC MEANS | PROC SORT | PROC FREQ |PROC REPORT | PROC SGPLOT | PROC TABULATE | PROC FREQ CHISQ | PROC EXPORT | PROC CORR | PROC SQL PROCEDURES


/* Creating a dataset for Employee Details with additional employees */

DATA Employee;

    INPUT EmpID Name $ Age Department $ Salary Experience Gender $;

    DATALINES;

    101 John 25 HR 50000 2 M

    102 Alice 28 IT 60000 4 F

    103 Bob 24 Finance 55000 1 M

    104 Clara 30 IT 75000 6 F

    105 David 27 HR 58000 3 M

    106 Eve 26 Finance 62000 2 F

    107 Frank 29 IT 70000 5 M

    108 Grace 31 HR 65000 7 F

    109 Henry 35 IT 80000 10 M

    110 Irene 29 Finance 67000 5 F

    111 Jack 32 HR 72000 8 M

    112 Kate 26 IT 62000 2 F

    113 Liam 33 HR 75000 9 M

    114 Mia 27 IT 68000 3 F

    115 Noah 29 Finance 71000 6 M

    116 Olivia 30 IT 73000 7 F

    117 Paul 34 HR 77000 10 M

    118 Quinn 28 Finance 69000 4 F

    119 Ryan 31 IT 76000 8 M

    120 Sophia 25 HR 54000 2 F

    ;

RUN;


/* Displaying the dataset */

PROC PRINT DATA=Employee;

RUN;


Output:

Obs EmpID Name Age Department Salary Experience Gender
1 101 John 25 HR 50000 2 M
2 102 Alice 28 IT 60000 4 F
3 103 Bob 24 Finance 55000 1 M
4 104 Clara 30 IT 75000 6 F
5 105 David 27 HR 58000 3 M
6 106 Eve 26 Finance 62000 2 F
7 107 Frank 29 IT 70000 5 M
8 108 Grace 31 HR 65000 7 F
9 109 Henry 35 IT 80000 10 M
10 110 Irene 29 Finance 67000 5 F
11 111 Jack 32 HR 72000 8 M
12 112 Kate 26 IT 62000 2 F
13 113 Liam 33 HR 75000 9 M
14 114 Mia 27 IT 68000 3 F
15 115 Noah 29 Finance 71000 6 M
16 116 Olivia 30 IT 73000 7 F
17 117 Paul 34 HR 77000 10 M
18 118 Quinn 28 Finance 69000 4 F
19 119 Ryan 31 IT 76000 8 M
20 120 Sophia 25 HR 54000 2 F


/* Checking basic statistics */

PROC MEANS DATA=Employee;

    VAR Salary Age Experience;

RUN;


Output:

                                                            The MEANS Procedure

Variable N Mean Std Dev Minimum Maximum
Salary
Age
Experience
20
20
20
66950.00
28.9500000
5.2000000
8444.71
3.0860467
2.8580450
50000.00
24.0000000
1.0000000
80000.00
35.0000000
10.0000000


/* Sorting the dataset by Department and then by Salary */

PROC SORT DATA=Employee;

    BY Department Salary;

RUN;

PROC PRINT DATA=Employee;

RUN;


Output:

Obs EmpID Name Age Department Salary Experience Gender
1 103 Bob 24 Finance 55000 1 M
2 106 Eve 26 Finance 62000 2 F
3 110 Irene 29 Finance 67000 5 F
4 118 Quinn 28 Finance 69000 4 F
5 115 Noah 29 Finance 71000 6 M
6 101 John 25 HR 50000 2 M
7 120 Sophia 25 HR 54000 2 F
8 105 David 27 HR 58000 3 M
9 108 Grace 31 HR 65000 7 F
10 111 Jack 32 HR 72000 8 M
11 113 Liam 33 HR 75000 9 M
12 117 Paul 34 HR 77000 10 M
13 102 Alice 28 IT 60000 4 F
14 112 Kate 26 IT 62000 2 F
15 114 Mia 27 IT 68000 3 F
16 107 Frank 29 IT 70000 5 M
17 116 Olivia 30 IT 73000 7 F
18 104 Clara 30 IT 75000 6 F
19 119 Ryan 31 IT 76000 8 M
20 109 Henry 35 IT 80000 10 M


/* Summarizing salary and experience details by Department */

PROC MEANS DATA=Employee MEAN MIN MAX;

    CLASS Department;

    VAR Salary Experience;

RUN;


Output:

                                                            The MEANS Procedure

Department N Obs Variable Mean Minimum Maximum
Finance 5
Salary
Experience
64800.00
3.6000000
55000.00
1.0000000
71000.00
6.0000000
HR 7
Salary
Experience
64428.57
5.8571429
50000.00
2.0000000
77000.00
10.0000000
IT 8
Salary
Experience
70500.00
5.6250000
60000.00
2.0000000
80000.00
10.0000000


/* Finding the frequency of employees per department and gender */

PROC FREQ DATA=Employee;

    TABLES Department Gender;

RUN;


Output:

                                                               The FREQ Procedure

Department Frequency Percent Cumulative
Frequency
Cumulative
Percent
Finance 5 25.00 5 25.00
HR 7 35.00 12 60.00
IT 8 40.00 20 100.00

Gender Frequency Percent Cumulative
Frequency
Cumulative
Percent
F 10 50.00 10 50.00
M 10 50.00 20 100.00

/* Generating a report with more details */

PROC REPORT DATA=Employee NOWD;

    COLUMN EmpID Name Age Gender Department Salary Experience;

    DEFINE EmpID / DISPLAY 'Employee ID';

    DEFINE Name / DISPLAY 'Employee Name';

    DEFINE Age / ANALYSIS MEAN 'Average Age';

    DEFINE Gender / GROUP 'Gender';

    DEFINE Department / GROUP 'Department';

    DEFINE Salary / ANALYSIS SUM 'Total Salary';

    DEFINE Experience / ANALYSIS MEAN 'Avg Experience';

RUN;


Output:

Employee ID Employee Name Average Age Gender Department Total Salary Avg Experience
106 Eve 26 F Finance 62000 2
110 Irene 29     67000 5
118 Quinn 28     69000 4
120 Sophia 25   HR 54000 2
108 Grace 31     65000 7
102 Alice 28   IT 60000 4
112 Kate 26     62000 2
114 Mia 27     68000 3
116 Olivia 30     73000 7
104 Clara 30     75000 6
103 Bob 24 M Finance 55000 1
115 Noah 29     71000 6
101 John 25   HR 50000 2
105 David 27     58000 3
111 Jack 32     72000 8
113 Liam 33     75000 9
117 Paul 34     77000 10
107 Frank 29   IT 70000 5
119 Ryan 31     76000 8
109 Henry 35     80000 10


/* Creating an output dataset with calculated bonus (10% of Salary) and tax (5% of Salary) */

DATA Employee_Bonus_Tax;

    SET Employee;

    Bonus = Salary * 0.10;

    Tax = Salary * 0.05;

    Net_Salary = Salary + Bonus - Tax;

RUN;

/* Displaying the new dataset */

PROC PRINT DATA=Employee_Bonus_Tax;

RUN;


Output:

Obs EmpID Name Age Department Salary Experience Gender Bonus Tax Net_Salary
1 103 Bob 24 Finance 55000 1 M 5500 2750 57750
2 106 Eve 26 Finance 62000 2 F 6200 3100 65100
3 110 Irene 29 Finance 67000 5 F 6700 3350 70350
4 118 Quinn 28 Finance 69000 4 F 6900 3450 72450
5 115 Noah 29 Finance 71000 6 M 7100 3550 74550
6 101 John 25 HR 50000 2 M 5000 2500 52500
7 120 Sophia 25 HR 54000 2 F 5400 2700 56700
8 105 David 27 HR 58000 3 M 5800 2900 60900
9 108 Grace 31 HR 65000 7 F 6500 3250 68250
10 111 Jack 32 HR 72000 8 M 7200 3600 75600
11 113 Liam 33 HR 75000 9 M 7500 3750 78750
12 117 Paul 34 HR 77000 10 M 7700 3850 80850
13 102 Alice 28 IT 60000 4 F 6000 3000 63000
14 112 Kate 26 IT 62000 2 F 6200 3100 65100
15 114 Mia 27 IT 68000 3 F 6800 3400 71400
16 107 Frank 29 IT 70000 5 M 7000 3500 73500
17 116 Olivia 30 IT 73000 7 F 7300 3650 76650
18 104 Clara 30 IT 75000 6 F 7500 3750 78750
19 119 Ryan 31 IT 76000 8 M 7600 3800 79800
20 109 Henry 35 IT 80000 10 M 8000 4000 84000


/* Generating a boxplot for Salary distribution by Gender and Department */

PROC SGPLOT DATA=Employee;

    VBOX Salary / CATEGORY=Department GROUP=Gender;

    TITLE 'Salary Distribution by Department and Gender';

RUN;


/* Applying a filter to select IT employees with experience greater than 3 years */

DATA IT_Experienced_Employees;

    SET Employee;

    WHERE Department = 'IT' AND Experience > 3;

RUN;

/* Displaying the filtered dataset */

PROC PRINT DATA=IT_Experienced_Employees;

RUN;


Output:


Obs EmpID Name Age Department Salary Experience Gender
1 102 Alice 28 IT 60000 4 F
2 107 Frank 29 IT 70000 5 M
3 116 Olivia 30 IT 73000 7 F
4 104 Clara 30 IT 75000 6 F
5 119 Ryan 31 IT 76000 8 M
6 109 Henry 35 IT 80000 10 M


/* Generating a summary table with PROC TABULATE */

PROC TABULATE DATA=Employee;

    CLASS Department Gender;

    VAR Salary Experience;

    TABLE Department*Gender, (Salary Experience)*(MEAN MIN MAX);

RUN;


Output:

  Salary Experience
Mean Min Max Mean Min Max
Department Gender 66000.00 62000.00 69000.00 3.67 2.00 5.00
Finance F
M 63000.00 55000.00 71000.00 3.50 1.00 6.00
HR F 59500.00 54000.00 65000.00 4.50 2.00 7.00
M 66400.00 50000.00 77000.00 6.40 2.00 10.00
IT F 67600.00 60000.00 75000.00 4.40 2.00 7.00
M 75333.33 70000.00 80000.00 7.67 5.00 10.00


/* Generating a cross-tabulation */

PROC FREQ DATA=Employee;

    TABLES Department*Age / CHISQ;

RUN;


Output:

                                                                                                                      The FREQ Procedure

Frequency
Percent
Row Pct
Col Pct
Table of Department by Age
Department Age
24 25 26 27 28 29 30 31 32 33 34 35 Total
Finance
1
5.00
20.00
100.00
0
0.00
0.00
0.00
1
5.00
20.00
50.00
0
0.00
0.00
0.00
1
5.00
20.00
50.00
2
10.00
40.00
66.67
0
0.00
0.00
0.00
0
0.00
0.00
0.00
0
0.00
0.00
0.00
0
0.00
0.00
0.00
0
0.00
0.00
0.00
0
0.00
0.00
0.00
5
25.00
 
 
HR
0
0.00
0.00
0.00
2
10.00
28.57
100.00
0
0.00
0.00
0.00
1
5.00
14.29
50.00
0
0.00
0.00
0.00
0
0.00
0.00
0.00
0
0.00
0.00
0.00
1
5.00
14.29
50.00
1
5.00
14.29
100.00
1
5.00
14.29
100.00
1
5.00
14.29
100.00
0
0.00
0.00
0.00
7
35.00
 
 
IT
0
0.00
0.00
0.00
0
0.00
0.00
0.00
1
5.00
12.50
50.00
1
5.00
12.50
50.00
1
5.00
12.50
50.00
1
5.00
12.50
33.33
2
10.00
25.00
100.00
1
5.00
12.50
50.00
0
0.00
0.00
0.00
0
0.00
0.00
0.00
0
0.00
0.00
0.00
1
5.00
12.50
100.00
8
40.00
 
 
Total
1
5.00
2
10.00
2
10.00
2
10.00
2
10.00
3
15.00
2
10.00
2
10.00
1
5.00
1
5.00
1
5.00
1
5.00
20
100.00

Statistics for Table of Department by Age

Statistic DF Value Prob
Chi-Square 22 23.8095 0.3573
Likelihood Ratio Chi-Square 22 28.3117 0.1656
Mantel-Haenszel Chi-Square 1 1.4468 0.2290
Phi Coefficient   1.0911  
Contingency Coefficient   0.7372  
Cramer's V   0.7715  
WARNING: 100% of the cells have expected counts less
than 5. Chi-Square may not be a valid test.

Sample Size = 20


/* Exporting the dataset to a CSV file for further analysis */

PROC EXPORT DATA=Employee

    OUTFILE='Employee_Data.csv'

    DBMS=CSV

    REPLACE;

RUN;


/* Generating a histogram for salary distribution */

PROC SGPLOT DATA=Employee;

    HISTOGRAM Salary / BINWIDTH=5000;

    DENSITY Salary / TYPE=NORMAL;

    TITLE 'Histogram of Salary Distribution';

RUN;


/* Generating a correlation matrix for Salary and Experience */

PROC CORR DATA=Employee;

    VAR Salary Experience;

RUN;


Output:

                                                                   The CORR Procedure

2 Variables: Salary Experience


Simple Statistics
Variable N Mean Std Dev Sum Minimum Maximum
Salary 20 66950 8445 1339000 50000 80000
Experience 20 5.20000 2.85804 104.00000 1.00000 10.00000


Pearson Correlation Coefficients, N = 20
Prob > |r| under H0: Rho=0
  Salary Experience
Salary
1.00000
 
0.86835
<.0001
Experience
0.86835
<.0001
1.00000
 


/* Creating a new dataset categorizing employees based on salary range */

DATA Employee_Categories;

    SET Employee;

    LENGTH Salary_Category $15;

    IF Salary < 60000 THEN Salary_Category = 'Low';

   ELSE IF 60000 <= Salary <= 75000 THEN Salary_Category = 'Medium';

    ELSE Salary_Category = 'High';

RUN;

/* Displaying categorized data */

PROC PRINT DATA=Employee_Categories;

RUN;


Output:

Obs EmpID Name Age Department Salary Experience Gender Salary_Category
1 103 Bob 24 Finance 55000 1 M Low
2 106 Eve 26 Finance 62000 2 F Medium
3 110 Irene 29 Finance 67000 5 F Medium
4 118 Quinn 28 Finance 69000 4 F Medium
5 115 Noah 29 Finance 71000 6 M Medium
6 101 John 25 HR 50000 2 M Low
7 120 Sophia 25 HR 54000 2 F Low
8 105 David 27 HR 58000 3 M Low
9 108 Grace 31 HR 65000 7 F Medium
10 111 Jack 32 HR 72000 8 M Medium
11 113 Liam 33 HR 75000 9 M Medium
12 117 Paul 34 HR 77000 10 M High
13 102 Alice 28 IT 60000 4 F Medium
14 112 Kate 26 IT 62000 2 F Medium
15 114 Mia 27 IT 68000 3 F Medium
16 107 Frank 29 IT 70000 5 M Medium
17 116 Olivia 30 IT 73000 7 F Medium
18 104 Clara 30 IT 75000 6 F Medium
19 119 Ryan 31 IT 76000 8 M High
20 109 Henry 35 IT 80000 10 M High


/* Calculating the average salary per department and gender using PROC SQL */

PROC SQL;

    CREATE TABLE Avg_Salary AS

    SELECT Department, Gender, AVG(Salary) AS Avg_Salary

    FROM Employee

    GROUP BY Department, Gender;

QUIT;

/* Displaying the average salary table */

PROC PRINT DATA=Avg_Salary;

RUN;


Output:

Obs Department Gender Avg_Salary
1 Finance F 66000.00
2 Finance M 63000.00
3 HR F 59500.00
4 HR M 66400.00
5 IT F 67600.00
6 IT M 75333.33


/* Finding the highest paid employee in each department */

PROC SQL;

    CREATE TABLE Max_Salary AS

    SELECT Department, Name, Salary

    FROM Employee

    WHERE Salary IN (SELECT MAX(Salary) FROM Employee GROUP BY Department);

QUIT;

/* Displaying the highest paid employees */

PROC PRINT DATA=Max_Salary;

RUN;


Output:

Obs Department Name Salary
1 Finance Noah 71000
2 HR Paul 77000
3 IT Henry 80000

PRACTICE AND COMMENT YOUR CODE: 

-->PLEASE FOLLOW OUR BLOG FOR MORE UPDATES.

TO FOLLOW OUR TELEGRAM CHANNEL CLICK HERE

Comments