133.NEW EXAMPLE EMPLOYEES : INPUT | PROC MEANS | PROC SORT | PROC FREQ |PROC REPORT | PROC SGPLOT | PROC TABULATE | PROC FREQ CHISQ | PROC EXPORT | PROC CORR | PROC SQL PROCEDURES
- Get link
- X
- Other Apps
NEW EXAMPLE EMPLOYEES : INPUT | PROC MEANS | PROC SORT | PROC FREQ |PROC REPORT | PROC SGPLOT | PROC TABULATE | PROC FREQ CHISQ | PROC EXPORT | PROC CORR | PROC SQL PROCEDURES
/* Creating a dataset for Employee Details with additional employees */
DATA Employee;
INPUT EmpID Name $ Age Department $ Salary Experience Gender $;
DATALINES;
101 John 25 HR 50000 2 M
102 Alice 28 IT 60000 4 F
103 Bob 24 Finance 55000 1 M
104 Clara 30 IT 75000 6 F
105 David 27 HR 58000 3 M
106 Eve 26 Finance 62000 2 F
107 Frank 29 IT 70000 5 M
108 Grace 31 HR 65000 7 F
109 Henry 35 IT 80000 10 M
110 Irene 29 Finance 67000 5 F
111 Jack 32 HR 72000 8 M
112 Kate 26 IT 62000 2 F
113 Liam 33 HR 75000 9 M
114 Mia 27 IT 68000 3 F
115 Noah 29 Finance 71000 6 M
116 Olivia 30 IT 73000 7 F
117 Paul 34 HR 77000 10 M
118 Quinn 28 Finance 69000 4 F
119 Ryan 31 IT 76000 8 M
120 Sophia 25 HR 54000 2 F
;
RUN;
/* Displaying the dataset */
PROC PRINT DATA=Employee;
RUN;
Output:
Obs | EmpID | Name | Age | Department | Salary | Experience | Gender |
---|---|---|---|---|---|---|---|
1 | 101 | John | 25 | HR | 50000 | 2 | M |
2 | 102 | Alice | 28 | IT | 60000 | 4 | F |
3 | 103 | Bob | 24 | Finance | 55000 | 1 | M |
4 | 104 | Clara | 30 | IT | 75000 | 6 | F |
5 | 105 | David | 27 | HR | 58000 | 3 | M |
6 | 106 | Eve | 26 | Finance | 62000 | 2 | F |
7 | 107 | Frank | 29 | IT | 70000 | 5 | M |
8 | 108 | Grace | 31 | HR | 65000 | 7 | F |
9 | 109 | Henry | 35 | IT | 80000 | 10 | M |
10 | 110 | Irene | 29 | Finance | 67000 | 5 | F |
11 | 111 | Jack | 32 | HR | 72000 | 8 | M |
12 | 112 | Kate | 26 | IT | 62000 | 2 | F |
13 | 113 | Liam | 33 | HR | 75000 | 9 | M |
14 | 114 | Mia | 27 | IT | 68000 | 3 | F |
15 | 115 | Noah | 29 | Finance | 71000 | 6 | M |
16 | 116 | Olivia | 30 | IT | 73000 | 7 | F |
17 | 117 | Paul | 34 | HR | 77000 | 10 | M |
18 | 118 | Quinn | 28 | Finance | 69000 | 4 | F |
19 | 119 | Ryan | 31 | IT | 76000 | 8 | M |
20 | 120 | Sophia | 25 | HR | 54000 | 2 | F |
/* Checking basic statistics */
PROC MEANS DATA=Employee;
VAR Salary Age Experience;
RUN;
Output:
Variable | N | Mean | Std Dev | Minimum | Maximum | ||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
|
|
|
|
/* Sorting the dataset by Department and then by Salary */
PROC SORT DATA=Employee;
BY Department Salary;
RUN;
PROC PRINT DATA=Employee;
RUN;
Output:
Obs | EmpID | Name | Age | Department | Salary | Experience | Gender |
---|---|---|---|---|---|---|---|
1 | 103 | Bob | 24 | Finance | 55000 | 1 | M |
2 | 106 | Eve | 26 | Finance | 62000 | 2 | F |
3 | 110 | Irene | 29 | Finance | 67000 | 5 | F |
4 | 118 | Quinn | 28 | Finance | 69000 | 4 | F |
5 | 115 | Noah | 29 | Finance | 71000 | 6 | M |
6 | 101 | John | 25 | HR | 50000 | 2 | M |
7 | 120 | Sophia | 25 | HR | 54000 | 2 | F |
8 | 105 | David | 27 | HR | 58000 | 3 | M |
9 | 108 | Grace | 31 | HR | 65000 | 7 | F |
10 | 111 | Jack | 32 | HR | 72000 | 8 | M |
11 | 113 | Liam | 33 | HR | 75000 | 9 | M |
12 | 117 | Paul | 34 | HR | 77000 | 10 | M |
13 | 102 | Alice | 28 | IT | 60000 | 4 | F |
14 | 112 | Kate | 26 | IT | 62000 | 2 | F |
15 | 114 | Mia | 27 | IT | 68000 | 3 | F |
16 | 107 | Frank | 29 | IT | 70000 | 5 | M |
17 | 116 | Olivia | 30 | IT | 73000 | 7 | F |
18 | 104 | Clara | 30 | IT | 75000 | 6 | F |
19 | 119 | Ryan | 31 | IT | 76000 | 8 | M |
20 | 109 | Henry | 35 | IT | 80000 | 10 | M |
/* Summarizing salary and experience details by Department */
PROC MEANS DATA=Employee MEAN MIN MAX;
CLASS Department;
VAR Salary Experience;
RUN;
Output:
Department | N Obs | Variable | Mean | Minimum | Maximum | ||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Finance | 5 |
|
|
|
| ||||||||
HR | 7 |
|
|
|
| ||||||||
IT | 8 |
|
|
|
|
/* Finding the frequency of employees per department and gender */
PROC FREQ DATA=Employee;
TABLES Department Gender;
RUN;
Output:
The FREQ Procedure
Department | Frequency | Percent | Cumulative Frequency |
Cumulative Percent |
---|---|---|---|---|
Finance | 5 | 25.00 | 5 | 25.00 |
HR | 7 | 35.00 | 12 | 60.00 |
IT | 8 | 40.00 | 20 | 100.00 |
Gender | Frequency | Percent | Cumulative Frequency |
Cumulative Percent |
---|---|---|---|---|
F | 10 | 50.00 | 10 | 50.00 |
M | 10 | 50.00 | 20 | 100.00 |
/* Generating a report with more details */
PROC REPORT DATA=Employee NOWD;
COLUMN EmpID Name Age Gender Department Salary Experience;
DEFINE EmpID / DISPLAY 'Employee ID';
DEFINE Name / DISPLAY 'Employee Name';
DEFINE Age / ANALYSIS MEAN 'Average Age';
DEFINE Gender / GROUP 'Gender';
DEFINE Department / GROUP 'Department';
DEFINE Salary / ANALYSIS SUM 'Total Salary';
DEFINE Experience / ANALYSIS MEAN 'Avg Experience';
RUN;
Output:
Employee ID | Employee Name | Average Age | Gender | Department | Total Salary | Avg Experience |
---|---|---|---|---|---|---|
106 | Eve | 26 | F | Finance | 62000 | 2 |
110 | Irene | 29 | 67000 | 5 | ||
118 | Quinn | 28 | 69000 | 4 | ||
120 | Sophia | 25 | HR | 54000 | 2 | |
108 | Grace | 31 | 65000 | 7 | ||
102 | Alice | 28 | IT | 60000 | 4 | |
112 | Kate | 26 | 62000 | 2 | ||
114 | Mia | 27 | 68000 | 3 | ||
116 | Olivia | 30 | 73000 | 7 | ||
104 | Clara | 30 | 75000 | 6 | ||
103 | Bob | 24 | M | Finance | 55000 | 1 |
115 | Noah | 29 | 71000 | 6 | ||
101 | John | 25 | HR | 50000 | 2 | |
105 | David | 27 | 58000 | 3 | ||
111 | Jack | 32 | 72000 | 8 | ||
113 | Liam | 33 | 75000 | 9 | ||
117 | Paul | 34 | 77000 | 10 | ||
107 | Frank | 29 | IT | 70000 | 5 | |
119 | Ryan | 31 | 76000 | 8 | ||
109 | Henry | 35 | 80000 | 10 |
/* Creating an output dataset with calculated bonus (10% of Salary) and tax (5% of Salary) */
DATA Employee_Bonus_Tax;
SET Employee;
Bonus = Salary * 0.10;
Tax = Salary * 0.05;
Net_Salary = Salary + Bonus - Tax;
RUN;
/* Displaying the new dataset */
PROC PRINT DATA=Employee_Bonus_Tax;
RUN;
Output:
Obs | EmpID | Name | Age | Department | Salary | Experience | Gender | Bonus | Tax | Net_Salary |
---|---|---|---|---|---|---|---|---|---|---|
1 | 103 | Bob | 24 | Finance | 55000 | 1 | M | 5500 | 2750 | 57750 |
2 | 106 | Eve | 26 | Finance | 62000 | 2 | F | 6200 | 3100 | 65100 |
3 | 110 | Irene | 29 | Finance | 67000 | 5 | F | 6700 | 3350 | 70350 |
4 | 118 | Quinn | 28 | Finance | 69000 | 4 | F | 6900 | 3450 | 72450 |
5 | 115 | Noah | 29 | Finance | 71000 | 6 | M | 7100 | 3550 | 74550 |
6 | 101 | John | 25 | HR | 50000 | 2 | M | 5000 | 2500 | 52500 |
7 | 120 | Sophia | 25 | HR | 54000 | 2 | F | 5400 | 2700 | 56700 |
8 | 105 | David | 27 | HR | 58000 | 3 | M | 5800 | 2900 | 60900 |
9 | 108 | Grace | 31 | HR | 65000 | 7 | F | 6500 | 3250 | 68250 |
10 | 111 | Jack | 32 | HR | 72000 | 8 | M | 7200 | 3600 | 75600 |
11 | 113 | Liam | 33 | HR | 75000 | 9 | M | 7500 | 3750 | 78750 |
12 | 117 | Paul | 34 | HR | 77000 | 10 | M | 7700 | 3850 | 80850 |
13 | 102 | Alice | 28 | IT | 60000 | 4 | F | 6000 | 3000 | 63000 |
14 | 112 | Kate | 26 | IT | 62000 | 2 | F | 6200 | 3100 | 65100 |
15 | 114 | Mia | 27 | IT | 68000 | 3 | F | 6800 | 3400 | 71400 |
16 | 107 | Frank | 29 | IT | 70000 | 5 | M | 7000 | 3500 | 73500 |
17 | 116 | Olivia | 30 | IT | 73000 | 7 | F | 7300 | 3650 | 76650 |
18 | 104 | Clara | 30 | IT | 75000 | 6 | F | 7500 | 3750 | 78750 |
19 | 119 | Ryan | 31 | IT | 76000 | 8 | M | 7600 | 3800 | 79800 |
20 | 109 | Henry | 35 | IT | 80000 | 10 | M | 8000 | 4000 | 84000 |
/* Generating a boxplot for Salary distribution by Gender and Department */
PROC SGPLOT DATA=Employee;
VBOX Salary / CATEGORY=Department GROUP=Gender;
TITLE 'Salary Distribution by Department and Gender';
RUN;
/* Applying a filter to select IT employees with experience greater than 3 years */
DATA IT_Experienced_Employees;
SET Employee;
WHERE Department = 'IT' AND Experience > 3;
RUN;
/* Displaying the filtered dataset */
PROC PRINT DATA=IT_Experienced_Employees;
RUN;
Output:
Obs | EmpID | Name | Age | Department | Salary | Experience | Gender |
---|---|---|---|---|---|---|---|
1 | 102 | Alice | 28 | IT | 60000 | 4 | F |
2 | 107 | Frank | 29 | IT | 70000 | 5 | M |
3 | 116 | Olivia | 30 | IT | 73000 | 7 | F |
4 | 104 | Clara | 30 | IT | 75000 | 6 | F |
5 | 119 | Ryan | 31 | IT | 76000 | 8 | M |
6 | 109 | Henry | 35 | IT | 80000 | 10 | M |
/* Generating a summary table with PROC TABULATE */
PROC TABULATE DATA=Employee;
CLASS Department Gender;
VAR Salary Experience;
TABLE Department*Gender, (Salary Experience)*(MEAN MIN MAX);
RUN;
Output:
Salary | Experience | ||||||
---|---|---|---|---|---|---|---|
Mean | Min | Max | Mean | Min | Max | ||
Department | Gender | 66000.00 | 62000.00 | 69000.00 | 3.67 | 2.00 | 5.00 |
Finance | F | ||||||
M | 63000.00 | 55000.00 | 71000.00 | 3.50 | 1.00 | 6.00 | |
HR | F | 59500.00 | 54000.00 | 65000.00 | 4.50 | 2.00 | 7.00 |
M | 66400.00 | 50000.00 | 77000.00 | 6.40 | 2.00 | 10.00 | |
IT | F | 67600.00 | 60000.00 | 75000.00 | 4.40 | 2.00 | 7.00 |
M | 75333.33 | 70000.00 | 80000.00 | 7.67 | 5.00 | 10.00 |
/* Generating a cross-tabulation */
PROC FREQ DATA=Employee;
TABLES Department*Age / CHISQ;
RUN;
Output:
|
|
Statistics for Table of Department by Age |
Statistic | DF | Value | Prob |
---|---|---|---|
Chi-Square | 22 | 23.8095 | 0.3573 |
Likelihood Ratio Chi-Square | 22 | 28.3117 | 0.1656 |
Mantel-Haenszel Chi-Square | 1 | 1.4468 | 0.2290 |
Phi Coefficient | 1.0911 | ||
Contingency Coefficient | 0.7372 | ||
Cramer's V | 0.7715 | ||
WARNING: 100% of the cells have expected counts
less than 5. Chi-Square may not be a valid test. |
Sample Size = 20 |
/* Exporting the dataset to a CSV file for further analysis */
PROC EXPORT DATA=Employee
OUTFILE='Employee_Data.csv'
DBMS=CSV
REPLACE;
RUN;
/* Generating a histogram for salary distribution */
PROC SGPLOT DATA=Employee;
HISTOGRAM Salary / BINWIDTH=5000;
DENSITY Salary / TYPE=NORMAL;
TITLE 'Histogram of Salary Distribution';
RUN;
/* Generating a correlation matrix for Salary and Experience */
PROC CORR DATA=Employee;
VAR Salary Experience;
RUN;
Output:
2 Variables: | Salary Experience |
---|
Simple Statistics | ||||||
---|---|---|---|---|---|---|
Variable | N | Mean | Std Dev | Sum | Minimum | Maximum |
Salary | 20 | 66950 | 8445 | 1339000 | 50000 | 80000 |
Experience | 20 | 5.20000 | 2.85804 | 104.00000 | 1.00000 | 10.00000 |
Pearson Correlation
Coefficients, N = 20 Prob > |r| under H0: Rho=0 | ||||||
---|---|---|---|---|---|---|
Salary | Experience | |||||
Salary |
|
| ||||
Experience |
|
|
/* Creating a new dataset categorizing employees based on salary range */
DATA Employee_Categories;
SET Employee;
LENGTH Salary_Category $15;
IF Salary < 60000 THEN Salary_Category = 'Low';
ELSE IF 60000 <= Salary <= 75000 THEN Salary_Category = 'Medium';
ELSE Salary_Category = 'High';
RUN;
/* Displaying categorized data */
PROC PRINT DATA=Employee_Categories;
RUN;
Output:
Obs | EmpID | Name | Age | Department | Salary | Experience | Gender | Salary_Category |
---|---|---|---|---|---|---|---|---|
1 | 103 | Bob | 24 | Finance | 55000 | 1 | M | Low |
2 | 106 | Eve | 26 | Finance | 62000 | 2 | F | Medium |
3 | 110 | Irene | 29 | Finance | 67000 | 5 | F | Medium |
4 | 118 | Quinn | 28 | Finance | 69000 | 4 | F | Medium |
5 | 115 | Noah | 29 | Finance | 71000 | 6 | M | Medium |
6 | 101 | John | 25 | HR | 50000 | 2 | M | Low |
7 | 120 | Sophia | 25 | HR | 54000 | 2 | F | Low |
8 | 105 | David | 27 | HR | 58000 | 3 | M | Low |
9 | 108 | Grace | 31 | HR | 65000 | 7 | F | Medium |
10 | 111 | Jack | 32 | HR | 72000 | 8 | M | Medium |
11 | 113 | Liam | 33 | HR | 75000 | 9 | M | Medium |
12 | 117 | Paul | 34 | HR | 77000 | 10 | M | High |
13 | 102 | Alice | 28 | IT | 60000 | 4 | F | Medium |
14 | 112 | Kate | 26 | IT | 62000 | 2 | F | Medium |
15 | 114 | Mia | 27 | IT | 68000 | 3 | F | Medium |
16 | 107 | Frank | 29 | IT | 70000 | 5 | M | Medium |
17 | 116 | Olivia | 30 | IT | 73000 | 7 | F | Medium |
18 | 104 | Clara | 30 | IT | 75000 | 6 | F | Medium |
19 | 119 | Ryan | 31 | IT | 76000 | 8 | M | High |
20 | 109 | Henry | 35 | IT | 80000 | 10 | M | High |
/* Calculating the average salary per department and gender using PROC SQL */
PROC SQL;
CREATE TABLE Avg_Salary AS
SELECT Department, Gender, AVG(Salary) AS Avg_Salary
FROM Employee
GROUP BY Department, Gender;
QUIT;
/* Displaying the average salary table */
PROC PRINT DATA=Avg_Salary;
RUN;
Output:
Obs | Department | Gender | Avg_Salary |
---|---|---|---|
1 | Finance | F | 66000.00 |
2 | Finance | M | 63000.00 |
3 | HR | F | 59500.00 |
4 | HR | M | 66400.00 |
5 | IT | F | 67600.00 |
6 | IT | M | 75333.33 |
/* Finding the highest paid employee in each department */
PROC SQL;
CREATE TABLE Max_Salary AS
SELECT Department, Name, Salary
FROM Employee
WHERE Salary IN (SELECT MAX(Salary) FROM Employee GROUP BY Department);
QUIT;
/* Displaying the highest paid employees */
PROC PRINT DATA=Max_Salary;
RUN;
Output:
Obs | Department | Name | Salary |
---|---|---|---|
1 | Finance | Noah | 71000 |
2 | HR | Paul | 77000 |
3 | IT | Henry | 80000 |
- Get link
- X
- Other Apps
Comments
Post a Comment