174.BUILDING A COMPREHENSIVE CARS DATASET USING PROC SQL | PROC MEANS | PROC FREQ | PROC REG | PROC CORR | MACROS IN SAS
- Get link
- X
- Other Apps
BUILDING A COMPREHENSIVE CARS DATASET USING PROC SQL | PROC MEANS | PROC FREQ | PROC REG | PROC CORR | MACROS IN SAS
/*Creating a unique dataset named Cars*/
/*Creating a Unique Car Dataset*/
data mycars;
input Make $ Model $ Year EngineSize Horsepower Weight MPG_City MPG_Highway Price Origin $;
datalines;
Toyota Corolla 2020 1.8 139 2840 30 38 20000 Asia
Ford Focus 2019 2.0 160 2950 28 36 19000 USA
Honda Civic 2021 2.0 158 2760 32 42 21000 Asia
BMW 320i 2020 2.0 180 3300 24 36 35000 Europe
Chevrolet Malibu 2018 1.5 160 3150 27 36 22000 USA
Hyundai Elantra 2021 2.0 147 2800 33 43 19500 Asia
Volkswagen Jetta 2019 1.4 147 2900 30 40 20500 Europe
Nissan Sentra 2020 2.0 149 2900 29 39 20000 Asia
Kia Forte 2021 2.0 147 2800 31 41 19500 Asia
Mazda 3 2020 2.5 186 3090 26 35 21000 Asia
;
run;
proc print data=mycars;
run;
Output:
Obs | Make | Model | Year | EngineSize | Horsepower | Weight | MPG_City | MPG_Highway | Price | Origin |
---|---|---|---|---|---|---|---|---|---|---|
1 | Toyota | Corolla | 2020 | 1.8 | 139 | 2840 | 30 | 38 | 20000 | Asia |
2 | Ford | Focus | 2019 | 2.0 | 160 | 2950 | 28 | 36 | 19000 | USA |
3 | Honda | Civic | 2021 | 2.0 | 158 | 2760 | 32 | 42 | 21000 | Asia |
4 | BMW | 320i | 2020 | 2.0 | 180 | 3300 | 24 | 36 | 35000 | Europe |
5 | Chevrole | Malibu | 2018 | 1.5 | 160 | 3150 | 27 | 36 | 22000 | USA |
6 | Hyundai | Elantra | 2021 | 2.0 | 147 | 2800 | 33 | 43 | 19500 | Asia |
7 | Volkswag | Jetta | 2019 | 1.4 | 147 | 2900 | 30 | 40 | 20500 | Europe |
8 | Nissan | Sentra | 2020 | 2.0 | 149 | 2900 | 29 | 39 | 20000 | Asia |
9 | Kia | Forte | 2021 | 2.0 | 147 | 2800 | 31 | 41 | 19500 | Asia |
10 | Mazda | 3 | 2020 | 2.5 | 186 | 3090 | 26 | 35 | 21000 | Asia |
/*Exploring the Dataset*/
proc contents data=mycars;
run;
Output:
Data Set Name | WORK.MYCARS | Observations | 10 |
---|---|---|---|
Member Type | DATA | Variables | 10 |
Engine | V9 | Indexes | 0 |
Created | 14/09/2015 00:02:33 | Observation Length | 80 |
Last Modified | 14/09/2015 00:02:33 | Deleted Observations | 0 |
Protection | Compressed | NO | |
Data Set Type | Sorted | NO | |
Label | |||
Data Representation | WINDOWS_64 | ||
Encoding | wlatin1 Western (Windows) |
Engine/Host Dependent Information | |
---|---|
Data Set Page Size | 65536 |
Number of Data Set Pages | 1 |
First Data Page | 1 |
Max Obs per Page | 817 |
Obs in First Data Page | 10 |
Number of Data Set Repairs | 0 |
ExtendObsCounter | YES |
Filename | C:\Users\Lenovo\AppData\Local\Temp\SAS Temporary Files\_TD5680_DESKTOP-QFAA4KV_\mycars.sas7bdat |
Release Created | 9.0401M2 |
Host Created | X64_8HOME |
Alphabetic List of Variables and Attributes | |||
---|---|---|---|
# | Variable | Type | Len |
4 | EngineSize | Num | 8 |
5 | Horsepower | Num | 8 |
7 | MPG_City | Num | 8 |
8 | MPG_Highway | Num | 8 |
1 | Make | Char | 8 |
2 | Model | Char | 8 |
10 | Origin | Char | 8 |
9 | Price | Num | 8 |
6 | Weight | Num | 8 |
3 | Year | Num | 8 |
/*Summary Statistics*/
proc means data=mycars mean std min max;
var EngineSize Horsepower Weight MPG_City MPG_Highway Price;
run;
Output:
Variable | Mean | Std Dev | Minimum | Maximum | ||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
|
|
|
/*Creating a New Table with Calculated Fields*/
proc sql;
create table car_efficiency as
select Make, Model, Year,
MPG_City, MPG_Highway,
(MPG_City + MPG_Highway)/2 as MPG_Average
from mycars;
quit;
proc print;run;
Output:
Obs | Make | Model | Year | MPG_City | MPG_Highway | MPG_Average |
---|---|---|---|---|---|---|
1 | Toyota | Corolla | 2020 | 30 | 38 | 34.0 |
2 | Ford | Focus | 2019 | 28 | 36 | 32.0 |
3 | Honda | Civic | 2021 | 32 | 42 | 37.0 |
4 | BMW | 320i | 2020 | 24 | 36 | 30.0 |
5 | Chevrole | Malibu | 2018 | 27 | 36 | 31.5 |
6 | Hyundai | Elantra | 2021 | 33 | 43 | 38.0 |
7 | Volkswag | Jetta | 2019 | 30 | 40 | 35.0 |
8 | Nissan | Sentra | 2020 | 29 | 39 | 34.0 |
9 | Kia | Forte | 2021 | 31 | 41 | 36.0 |
10 | Mazda | 3 | 2020 | 26 | 35 | 30.5 |
/*Filtering Data*/
proc sql;
select * from mycars
where Origin = 'Asia' and Price < 20000;
quit;
Output:
Make | Model | Year | EngineSize | Horsepower | Weight | MPG_City | MPG_Highway | Price | Origin |
---|---|---|---|---|---|---|---|---|---|
Hyundai | Elantra | 2021 | 2 | 147 | 2800 | 33 | 43 | 19500 | Asia |
Kia | Forte | 2021 | 2 | 147 | 2800 | 31 | 41 | 19500 | Asia |
/*Defining a Macro to Filter by Origin*/
%macro filter_origin(origin);
proc sql;
select * from mycars
where Origin = "&origin";
quit;
%mend filter_origin;
%filter_origin(Asia);
Output:
Make | Model | Year | EngineSize | Horsepower | Weight | MPG_City | MPG_Highway | Price | Origin |
---|---|---|---|---|---|---|---|---|---|
Toyota | Corolla | 2020 | 1.8 | 139 | 2840 | 30 | 38 | 20000 | Asia |
Honda | Civic | 2021 | 2 | 158 | 2760 | 32 | 42 | 21000 | Asia |
Hyundai | Elantra | 2021 | 2 | 147 | 2800 | 33 | 43 | 19500 | Asia |
Nissan | Sentra | 2020 | 2 | 149 | 2900 | 29 | 39 | 20000 | Asia |
Kia | Forte | 2021 | 2 | 147 | 2800 | 31 | 41 | 19500 | Asia |
Mazda | 3 | 2020 | 2.5 | 186 | 3090 | 26 | 35 | 21000 | Asia |
%filter_origin(USA);
Output:
Make | Model | Year | EngineSize | Horsepower | Weight | MPG_City | MPG_Highway | Price | Origin |
---|---|---|---|---|---|---|---|---|---|
Ford | Focus | 2019 | 2 | 160 | 2950 | 28 | 36 | 19000 | USA |
Chevrole | Malibu | 2018 | 1.5 | 160 | 3150 | 27 | 36 | 22000 | USA |
/*Macro with Parameters for Price Range*/
%macro filter_price(min_price, max_price);
proc sql;
select * from mycars
where Price between &min_price and &max_price;
quit;
%mend filter_price;
%filter_price(20000, 25000);
Output:
Make | Model | Year | EngineSize | Horsepower | Weight | MPG_City | MPG_Highway | Price | Origin |
---|---|---|---|---|---|---|---|---|---|
Toyota | Corolla | 2020 | 1.8 | 139 | 2840 | 30 | 38 | 20000 | Asia |
Honda | Civic | 2021 | 2 | 158 | 2760 | 32 | 42 | 21000 | Asia |
Chevrole | Malibu | 2018 | 1.5 | 160 | 3150 | 27 | 36 | 22000 | USA |
Volkswag | Jetta | 2019 | 1.4 | 147 | 2900 | 30 | 40 | 20500 | Europe |
Nissan | Sentra | 2020 | 2 | 149 | 2900 | 29 | 39 | 20000 | Asia |
Mazda | 3 | 2020 | 2.5 | 186 | 3090 | 26 | 35 | 21000 | Asia |
%filter_price(30000, 35000);
Output:
Make | Model | Year | EngineSize | Horsepower | Weight | MPG_City | MPG_Highway | Price | Origin |
---|---|---|---|---|---|---|---|---|---|
BMW | 320i | 2020 | 2 | 180 | 3300 | 24 | 36 | 35000 | Europe |
/*Correlation Analysis*/
proc corr data=mycars;
var EngineSize Horsepower Weight MPG_City MPG_Highway Price;
run;
Output:
6 Variables: | EngineSize Horsepower Weight MPG_City MPG_Highway Price |
---|
Simple Statistics | ||||||
---|---|---|---|---|---|---|
Variable | N | Mean | Std Dev | Sum | Minimum | Maximum |
EngineSize | 10 | 1.92000 | 0.30478 | 19.20000 | 1.40000 | 2.50000 |
Horsepower | 10 | 157.30000 | 15.18808 | 1573 | 139.00000 | 186.00000 |
Weight | 10 | 2949 | 176.47474 | 29490 | 2760 | 3300 |
MPG_City | 10 | 29.00000 | 2.78887 | 290.00000 | 24.00000 | 33.00000 |
MPG_Highway | 10 | 38.60000 | 2.83627 | 386.00000 | 35.00000 | 43.00000 |
Price | 10 | 21750 | 4739 | 217500 | 19000 | 35000 |
Pearson Correlation
Coefficients, N = 10 Prob > |r| under H0: Rho=0 | ||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
EngineSize | Horsepower | Weight | MPG_City | MPG_Highway | Price | |||||||||||||
EngineSize |
|
|
|
|
|
| ||||||||||||
Horsepower |
|
|
|
|
|
| ||||||||||||
Weight |
|
|
|
|
|
| ||||||||||||
MPG_City |
|
|
|
|
|
| ||||||||||||
MPG_Highway |
|
|
|
|
|
| ||||||||||||
Price |
|
|
|
|
|
|
/*Regression Analysis*/
proc reg data=mycars;
model Price = Horsepower MPG_City MPG_Highway Weight;
run;
quit;
Output:
Number of Observations Read | 10 |
---|---|
Number of Observations Used | 10 |
Analysis of Variance | |||||
---|---|---|---|---|---|
Source | DF | Sum of Squares |
Mean Square |
F Value | Pr > F |
Model | 4 | 171488740 | 42872185 | 7.00 | 0.0279 |
Error | 5 | 30636260 | 6127252 | ||
Corrected Total | 9 | 202125000 |
Root MSE | 2475.32867 | R-Square | 0.8484 |
---|---|---|---|
Dependent Mean | 21750 | Adj R-Sq | 0.7272 |
Coeff Var | 11.38082 |
Parameter Estimates | |||||
---|---|---|---|---|---|
Variable | DF | Parameter Estimate |
Standard Error |
t Value | Pr > |t| |
Intercept | 1 | -10984 | 77750 | -0.14 | 0.8932 |
Horsepower | 1 | -30.91872 | 88.85894 | -0.35 | 0.7420 |
MPG_City | 1 | -2848.79383 | 1621.65430 | -1.76 | 0.1393 |
MPG_Highway | 1 | 2341.92628 | 826.96611 | 2.83 | 0.0366 |
Weight | 1 | 10.10990 | 16.70622 | 0.61 | 0.5715 |
/*Storing Distinct Origins into a Macro Variable*/
proc sql noprint;
select distinct Origin into :origins separated by ', '
from mycars;
quit;
%put &origins;
Log:
65 %put &origins;
Asia, Europe, USA
/*Looping Through Macro Variables*/
%macro loop_origins;
%let count = %sysfunc(countw(%quote(&origins), %str(,)));
%do i = 1 %to &count;
%let origin = %scan(%quote(&origins), &i, %str(,));
%put Origin: &origin;
%end;
proc print;run;
%mend loop_origins;
%loop_origins;
Log:
111 %loop_origins;
Origin: Asia
Origin: Europe
Origin: USA
/*Exporting Data*/
proc export data=mycars
outfile='C:\Users\YourName\Documents\mycars.csv'
dbms=csv
replace;
run;
Find A Mistake and Comment It:
%macro loop_origins;
%let count = %sysfunc(countw(&origins, %str(,)));
%do i = 1 %to &count;
%let origin = %scan(&origins, &i, %str(,));
%put Origin: &origin;
%end;
%mend loop_origins;
- Get link
- X
- Other Apps
Comments
Post a Comment