174.BUILDING A COMPREHENSIVE CARS DATASET USING PROC SQL | PROC MEANS | PROC FREQ | PROC REG | PROC CORR | MACROS IN SAS

BUILDING A COMPREHENSIVE CARS DATASET USING PROC SQL | PROC MEANS | PROC FREQ | PROC REG | PROC CORR | MACROS IN SAS

 /*Creating a unique dataset named Cars*/

/*Creating a Unique Car Dataset*/

data mycars;

    input Make $ Model $ Year EngineSize Horsepower Weight MPG_City MPG_Highway Price Origin $;

    datalines;

Toyota Corolla 2020 1.8 139 2840 30 38 20000 Asia

Ford Focus 2019 2.0 160 2950 28 36 19000 USA

Honda Civic 2021 2.0 158 2760 32 42 21000 Asia

BMW 320i 2020 2.0 180 3300 24 36 35000 Europe

Chevrolet Malibu 2018 1.5 160 3150 27 36 22000 USA

Hyundai Elantra 2021 2.0 147 2800 33 43 19500 Asia

Volkswagen Jetta 2019 1.4 147 2900 30 40 20500 Europe

Nissan Sentra 2020 2.0 149 2900 29 39 20000 Asia

Kia Forte 2021 2.0 147 2800 31 41 19500 Asia

Mazda 3 2020 2.5 186 3090 26 35 21000 Asia

;

run;

proc print data=mycars;

run;

Output:

Obs Make Model Year EngineSize Horsepower Weight MPG_City MPG_Highway Price Origin
1 Toyota Corolla 2020 1.8 139 2840 30 38 20000 Asia
2 Ford Focus 2019 2.0 160 2950 28 36 19000 USA
3 Honda Civic 2021 2.0 158 2760 32 42 21000 Asia
4 BMW 320i 2020 2.0 180 3300 24 36 35000 Europe
5 Chevrole Malibu 2018 1.5 160 3150 27 36 22000 USA
6 Hyundai Elantra 2021 2.0 147 2800 33 43 19500 Asia
7 Volkswag Jetta 2019 1.4 147 2900 30 40 20500 Europe
8 Nissan Sentra 2020 2.0 149 2900 29 39 20000 Asia
9 Kia Forte 2021 2.0 147 2800 31 41 19500 Asia
10 Mazda 3 2020 2.5 186 3090 26 35 21000 Asia


/*Exploring the Dataset*/

proc contents data=mycars;

run;

Output:

                                                         The CONTENTS Procedure

Data Set Name WORK.MYCARS Observations 10
Member Type DATA Variables 10
Engine V9 Indexes 0
Created 14/09/2015 00:02:33 Observation Length 80
Last Modified 14/09/2015 00:02:33 Deleted Observations 0
Protection   Compressed NO
Data Set Type   Sorted NO
Label      
Data Representation WINDOWS_64    
Encoding wlatin1 Western (Windows)    


Engine/Host Dependent Information
Data Set Page Size 65536
Number of Data Set Pages 1
First Data Page 1
Max Obs per Page 817
Obs in First Data Page 10
Number of Data Set Repairs 0
ExtendObsCounter YES
Filename C:\Users\Lenovo\AppData\Local\Temp\SAS Temporary Files\_TD5680_DESKTOP-QFAA4KV_\mycars.sas7bdat
Release Created 9.0401M2
Host Created X64_8HOME


Alphabetic List of Variables and Attributes
# Variable Type Len
4 EngineSize Num 8
5 Horsepower Num 8
7 MPG_City Num 8
8 MPG_Highway Num 8
1 Make Char 8
2 Model Char 8
10 Origin Char 8
9 Price Num 8
6 Weight Num 8
3 Year Num 8


/*Summary Statistics*/

proc means data=mycars mean std min max;

    var EngineSize Horsepower Weight MPG_City MPG_Highway Price;

run;

Output:

                                                         The MEANS Procedure

Variable Mean Std Dev Minimum Maximum
EngineSize
Horsepower
Weight
MPG_City
MPG_Highway
Price
1.9200000
157.3000000
2949.00
29.0000000
38.6000000
21750.00
0.3047768
15.1880801
176.4747385
2.7888668
2.8362730
4739.02
1.4000000
139.0000000
2760.00
24.0000000
35.0000000
19000.00
2.5000000
186.0000000
3300.00
33.0000000
43.0000000
35000.00


/*Creating a New Table with Calculated Fields*/

proc sql;

    create table car_efficiency as

    select Make, Model, Year, 

           MPG_City, MPG_Highway,

           (MPG_City + MPG_Highway)/2 as MPG_Average

    from mycars;

quit;

proc print;run;

Output:

Obs Make Model Year MPG_City MPG_Highway MPG_Average
1 Toyota Corolla 2020 30 38 34.0
2 Ford Focus 2019 28 36 32.0
3 Honda Civic 2021 32 42 37.0
4 BMW 320i 2020 24 36 30.0
5 Chevrole Malibu 2018 27 36 31.5
6 Hyundai Elantra 2021 33 43 38.0
7 Volkswag Jetta 2019 30 40 35.0
8 Nissan Sentra 2020 29 39 34.0
9 Kia Forte 2021 31 41 36.0
10 Mazda 3 2020 26 35 30.5


/*Filtering Data*/

proc sql;

    select * from mycars

    where Origin = 'Asia' and Price < 20000;

quit;

Output:

Make Model Year EngineSize Horsepower Weight MPG_City MPG_Highway Price Origin
Hyundai Elantra 2021 2 147 2800 33 43 19500 Asia
Kia Forte 2021 2 147 2800 31 41 19500 Asia


/*Defining a Macro to Filter by Origin*/

%macro filter_origin(origin);

    proc sql;

        select * from mycars

        where Origin = "&origin";

    quit;

%mend filter_origin;


%filter_origin(Asia);

Output:

Make Model Year EngineSize Horsepower Weight MPG_City MPG_Highway Price Origin
Toyota Corolla 2020 1.8 139 2840 30 38 20000 Asia
Honda Civic 2021 2 158 2760 32 42 21000 Asia
Hyundai Elantra 2021 2 147 2800 33 43 19500 Asia
Nissan Sentra 2020 2 149 2900 29 39 20000 Asia
Kia Forte 2021 2 147 2800 31 41 19500 Asia
Mazda 3 2020 2.5 186 3090 26 35 21000 Asia


%filter_origin(USA);

Output:

Make Model Year EngineSize Horsepower Weight MPG_City MPG_Highway Price Origin
Ford Focus 2019 2 160 2950 28 36 19000 USA
Chevrole Malibu 2018 1.5 160 3150 27 36 22000 USA


/*Macro with Parameters for Price Range*/

%macro filter_price(min_price, max_price);

    proc sql;

        select * from mycars

        where Price between &min_price and &max_price;

    quit;

%mend filter_price;


%filter_price(20000, 25000);

Output:

Make Model Year EngineSize Horsepower Weight MPG_City MPG_Highway Price Origin
Toyota Corolla 2020 1.8 139 2840 30 38 20000 Asia
Honda Civic 2021 2 158 2760 32 42 21000 Asia
Chevrole Malibu 2018 1.5 160 3150 27 36 22000 USA
Volkswag Jetta 2019 1.4 147 2900 30 40 20500 Europe
Nissan Sentra 2020 2 149 2900 29 39 20000 Asia
Mazda 3 2020 2.5 186 3090 26 35 21000 Asia


%filter_price(30000, 35000);

Output:

Make Model Year EngineSize Horsepower Weight MPG_City MPG_Highway Price Origin
BMW 320i 2020 2 180 3300 24 36 35000 Europe


/*Correlation Analysis*/

proc corr data=mycars;

    var EngineSize Horsepower Weight MPG_City MPG_Highway Price;

run;

Output:

                                                                  The CORR Procedure

6 Variables: EngineSize Horsepower Weight MPG_City MPG_Highway Price


Simple Statistics
Variable N Mean Std Dev Sum Minimum Maximum
EngineSize 10 1.92000 0.30478 19.20000 1.40000 2.50000
Horsepower 10 157.30000 15.18808 1573 139.00000 186.00000
Weight 10 2949 176.47474 29490 2760 3300
MPG_City 10 29.00000 2.78887 290.00000 24.00000 33.00000
MPG_Highway 10 38.60000 2.83627 386.00000 35.00000 43.00000
Price 10 21750 4739 217500 19000 35000


Pearson Correlation Coefficients, N = 10
Prob > |r| under H0: Rho=0
  EngineSize Horsepower Weight MPG_City MPG_Highway Price
EngineSize
1.00000
 
0.54824
0.1008
0.04380
0.9044
-0.16994
0.6388
-0.15681
0.6653
0.04616
0.8992
Horsepower
0.54824
0.1008
1.00000
 
0.76289
0.0103
-0.77646
0.0083
-0.64690
0.0432
0.58314
0.0768
Weight
0.04380
0.9044
0.76289
0.0103
1.00000
 
-0.94819
<.0001
-0.80448
0.0050
0.76293
0.0103
MPG_City
-0.16994
0.6388
-0.77646
0.0083
-0.94819
<.0001
1.00000
 
0.91305
0.0002
-0.67676
0.0316
MPG_Highway
-0.15681
0.6653
-0.64690
0.0432
-0.80448
0.0050
0.91305
0.0002
1.00000
 
-0.36786
0.2956
Price
0.04616
0.8992
0.58314
0.0768
0.76293
0.0103
-0.67676
0.0316
-0.36786
0.2956
1.00000
 


/*Regression Analysis*/

proc reg data=mycars;

    model Price = Horsepower MPG_City MPG_Highway Weight;

run;

quit;

Output:

                                                                  The REG Procedure
                                                                    Model: MODEL1
                                                               Dependent Variable: Price

Number of Observations Read 10
Number of Observations Used 10


Analysis of Variance
Source DF Sum of
Squares
Mean
Square
F Value Pr > F
Model 4 171488740 42872185 7.00 0.0279
Error 5 30636260 6127252    
Corrected Total 9 202125000      


Root MSE 2475.32867 R-Square 0.8484
Dependent Mean 21750 Adj R-Sq 0.7272
Coeff Var 11.38082    


Parameter Estimates
Variable DF Parameter
Estimate
Standard
Error
t Value Pr > |t|
Intercept 1 -10984 77750 -0.14 0.8932
Horsepower 1 -30.91872 88.85894 -0.35 0.7420
MPG_City 1 -2848.79383 1621.65430 -1.76 0.1393
MPG_Highway 1 2341.92628 826.96611 2.83 0.0366
Weight 1 10.10990 16.70622 0.61 0.5715


/*Storing Distinct Origins into a Macro Variable*/

proc sql noprint;

    select distinct Origin into :origins separated by ', '

    from mycars;

quit;


%put &origins;

Log: 

65   %put &origins;

Asia, Europe, USA


/*Looping Through Macro Variables*/

%macro loop_origins;

    %let count = %sysfunc(countw(%quote(&origins), %str(,)));

    %do i = 1 %to &count;

        %let origin = %scan(%quote(&origins), &i, %str(,));

        %put Origin: &origin;

    %end;

proc print;run;

%mend loop_origins;


%loop_origins;

Log:

111  %loop_origins;

Origin: Asia

Origin: Europe

Origin: USA


/*Exporting Data*/

proc export data=mycars

    outfile='C:\Users\YourName\Documents\mycars.csv'

    dbms=csv

    replace;

run;


Find A Mistake and Comment It:

%macro loop_origins;

    %let count = %sysfunc(countw(&origins, %str(,)));

    %do i = 1 %to &count;

        %let origin = %scan(&origins, &i, %str(,));

        %put Origin: &origin;

    %end;

%mend loop_origins;


PRACTICE AND COMMENT YOUR CODE: 

--->PLEASE FOLLOW OUR BLOG FOR MORE UPDATES.

PLEASE FOLLOW OUR TELEGRAM CHANNEL CLICK HERE

PLEASE FOLLOW OUR FACEBOOK PAGE  CLICK HERE

Comments