185.BUILDING A POWERFUL VEHICLE DATA ANALYSIS DASHBOARD IN SAS USING PROC PRINT | PROC MEANS | PROC FREQ | PROC SORT | PROC REPORT | PROC SQL | PROC FORMAT | MACROS | A COMPLETE STEP-BY-STEP GUIDE TO MASTER DATA HANDLING, EXPLORATION, REPORTING, AND AUTOMATION IN SAS

BUILDING A POWERFUL VEHICLE DATA ANALYSIS DASHBOARD IN SAS USING PROC PRINT | PROC MEANS | PROC FREQ | PROC SORT | PROC REPORT | PROC SQL | PROC FORMAT | MACROS | A COMPLETE STEP-BY-STEP GUIDE TO MASTER DATA HANDLING, EXPLORATION, REPORTING, AND AUTOMATION IN SAS

/* Step 1: Create Vehicles Dataset */

data vehicles;

    length Make $15 Model $15 Type $10 Color $10 Fuel_Type $12 Transmission $10;

    input Vehicle_ID Make $ Model $ Year Type $ Color $ Price Mileage Engine_Size 

          Fuel_Type $ Transmission $;

    datalines;

1 Toyota Corolla 2018 Sedan White 18000 35000 1.8 Gasoline Automatic

2 Ford F150 2020 Truck Black 35000 22000 3.5 Gasoline Automatic

3 Tesla Model3 2022 Sedan Red 45000 15000 0 Electric Automatic

4 Honda CRV 2019 SUV Blue 25000 30000 2.4 Gasoline Automatic

5 BMW X5 2021 SUV Black 60000 10000 3.0 Diesel Automatic

6 Chevrolet Bolt 2020 Hatchback Silver 37000 12000 0 Electric Automatic

7 Ford Mustang 2017 Coupe Red 28000 45000 5.0 Gasoline Manual

8 Toyota Tacoma 2018 Truck White 32000 28000 3.5 Gasoline Manual

9 Nissan Leaf 2021 Hatchback Green 31000 8000 0 Electric Automatic

10 Jeep Wrangler 2019 SUV Yellow 40000 27000 3.6 Gasoline Manual

;

run;

/* Step 2: View the dataset using PROC PRINT */

proc print data=vehicles noobs label;

 var Vehicle_ID Make  Model  Year Type  Color  Price Mileage Engine_Size 

          Fuel_Type  Transmission ;

    title "List of Vehicles in the Dataset";

    label Vehicle_ID = "Vehicle ID"

          Make = "Manufacturer"

          Model = "Model Name"

          Year = "Year of Manufacture"

          Type = "Vehicle Type"

          Color = "Color"

          Price = "Price in USD"

          Mileage = "Mileage (miles)"

          Engine_Size = "Engine Size (L)"

          Fuel_Type = "Type of Fuel"

          Transmission = "Transmission Type";

run;

Output:

                                                                   List of Vehicles in the Dataset

Vehicle ID Manufacturer Model Name Year of Manufacture Vehicle Type Color Price in USD Mileage (miles) Engine Size
(L)
Type of Fuel Transmission Type
1 Toyota Corolla 2018 Sedan White 18000 35000 1.8 Gasoline Automatic
2 Ford F150 2020 Truck Black 35000 22000 3.5 Gasoline Automatic
3 Tesla Model3 2022 Sedan Red 45000 15000 0.0 Electric Automatic
4 Honda CRV 2019 SUV Blue 25000 30000 2.4 Gasoline Automatic
5 BMW X5 2021 SUV Black 60000 10000 3.0 Diesel Automatic
6 Chevrolet Bolt 2020 Hatchback Silver 37000 12000 0.0 Electric Automatic
7 Ford Mustang 2017 Coupe Red 28000 45000 5.0 Gasoline Manual
8 Toyota Tacoma 2018 Truck White 32000 28000 3.5 Gasoline Manual
9 Nissan Leaf 2021 Hatchback Green 31000 8000 0.0 Electric Automatic
10 Jeep Wrangler 2019 SUV Yellow 40000 27000 3.6 Gasoline Manual


/* Step 3: Basic descriptive statistics for numeric variables */

proc means data=vehicles mean median min max std n;

    var Price Mileage Engine_Size Year;

    title "Descriptive Statistics for Vehicle Numeric Attributes";

run;

Output:

                                      Descriptive Statistics for Vehicle Numeric Attributes

                                                        The MEANS Procedure

Variable Mean Median Minimum Maximum Std Dev N
Price
Mileage
Engine_Size
Year
35100.00
23200.00
2.2800000
2019.50
33500.00
24500.00
2.7000000
2019.50
18000.00
8000.00
0
2017.00
60000.00
45000.00
5.0000000
2022.00
11628.03
12007.41
1.7787636
1.5811388
10
10
10
10

/* Step 4: Frequency counts for categorical variables */

proc freq data=vehicles;

    tables Make Type Color Fuel_Type Transmission / nocum nopercent;

    title "Frequency Distribution of Categorical Variables";

run;

Output:

                                       Frequency Distribution of Categorical Variables

                                                                The FREQ Procedure

Make Frequency
BMW 1
Chevrolet 1
Ford 2
Honda 1
Jeep 1
Nissan 1
Tesla 1
Toyota 2

Type Frequency
Coupe 1
Hatchback 2
SUV 3
Sedan 2
Truck 2

Color Frequency
Black 2
Blue 1
Green 1
Red 2
Silver 1
White 2
Yellow 1

Fuel_Type Frequency
Diesel 1
Electric 3
Gasoline 6

Transmission Frequency
Automatic 7
Manual 3

/* Step 5: Sort dataset by Price descending */

proc sort data=vehicles out=vehicles_sorted;

    by descending Price;

run;

proc print data=vehicles_sorted noobs;

    title "Vehicles Sorted by Price (High to Low)";

run;

Output:

                                                                    Vehicles Sorted by Price (High to Low)

Make Model Type Color Fuel_Type Transmission Vehicle_ID Year Price Mileage Engine_Size
BMW X5 SUV Black Diesel Automatic 5 2021 60000 10000 3.0
Tesla Model3 Sedan Red Electric Automatic 3 2022 45000 15000 0.0
Jeep Wrangler SUV Yellow Gasoline Manual 10 2019 40000 27000 3.6
Chevrolet Bolt Hatchback Silver Electric Automatic 6 2020 37000 12000 0.0
Ford F150 Truck Black Gasoline Automatic 2 2020 35000 22000 3.5
Toyota Tacoma Truck White Gasoline Manual 8 2018 32000 28000 3.5
Nissan Leaf Hatchback Green Electric Automatic 9 2021 31000 8000 0.0
Ford Mustang Coupe Red Gasoline Manual 7 2017 28000 45000 5.0
Honda CRV SUV Blue Gasoline Automatic 4 2019 25000 30000 2.4
Toyota Corolla Sedan White Gasoline Automatic 1 2018 18000 35000 1.8

/* Step 6: PROC REPORT for a tabular summary */

proc report data=vehicles nowd;

    column Make Type Fuel_Type Price Mileage;

    define Make / group "Manufacturer";

    define Type / group "Vehicle Type";

    define Fuel_Type / group "Fuel Type";

    define Price / analysis mean format=dollar10.2 "Average Price";

    define Mileage / analysis mean format=8.0 "Average Mileage";

    title "Summary Report of Vehicles by Make, Type, and Fuel Type";

run;

Output:

                                 Summary Report of Vehicles by Make, Type, and Fuel Type

Manufacturer Vehicle Type Fuel Type Average Price Average Mileage
BMW SUV Diesel $60,000.00 10000
Chevrolet Hatchback Electric $37,000.00 12000
Ford Coupe Gasoline $28,000.00 45000
  Truck Gasoline $35,000.00 22000
Honda SUV Gasoline $25,000.00 30000
Jeep SUV Gasoline $40,000.00 27000
Nissan Hatchback Electric $31,000.00 8000
Tesla Sedan Electric $45,000.00 15000
Toyota Sedan Gasoline $18,000.00 35000
  Truck Gasoline $32,000.00 28000

/* Step 7: PROC SQL to create a subset of expensive vehicles */

proc sql;

    create table expensive_vehicles as

    select * from vehicles

    where Price > 30000

    order by Price desc;

quit;

proc print data=expensive_vehicles noobs;

    title "Vehicles with Price Above $30,000";

run;

Output:

                                                         Vehicles with Price Above $30,000

Make Model Type Color Fuel_Type Transmission Vehicle_ID Year Price Mileage Engine_Size
BMW X5 SUV Black Diesel Automatic 5 2021 60000 10000 3.0
Tesla Model3 Sedan Red Electric Automatic 3 2022 45000 15000 0.0
Jeep Wrangler SUV Yellow Gasoline Manual 10 2019 40000 27000 3.6
Chevrolet Bolt Hatchback Silver Electric Automatic 6 2020 37000 12000 0.0
Ford F150 Truck Black Gasoline Automatic 2 2020 35000 22000 3.5
Toyota Tacoma Truck White Gasoline Manual 8 2018 32000 28000 3.5
Nissan Leaf Hatchback Green Electric Automatic 9 2021 31000 8000 0.0

/* Step 8: PROC SQL for aggregation and group by */

proc sql;

    title "Average Price and Mileage by Vehicle Type";

    select Type, 

           avg(Price) format=dollar10.2 as Avg_Price,

           avg(Mileage) format=8.0 as Avg_Mileage,

           count(*) as Count

    from vehicles

    group by Type

    order by Avg_Price desc;

quit;

Output:

                                               Average Price and Mileage by Vehicle Type

Type Avg_Price Avg_Mileage Count
SUV $41,666.67 22333 3
Hatchback $34,000.00 10000 2
Truck $33,500.00 25000 2
Sedan $31,500.00 25000 2
Coupe $28,000.00 45000 1

/* Step 9: Create a macro to generate report for any vehicle make */

%macro vehicle_report(make=);

    proc sql;

        create table report_&make as

        select * from vehicles

        where upcase(Make) = upcase("&make")

        order by Price desc;

    quit;

    proc print data=report_&make noobs;

        title "Vehicle Report for &make";

    run;

%mend vehicle_report;


/* Run macro for Toyota */

%vehicle_report(make=Toyota);

Output:

                                                                Vehicle Report for Toyota

Make Model Type Color Fuel_Type Transmission Vehicle_ID Year Price Mileage Engine_Size
Toyota Tacoma Truck White Gasoline Manual 8 2018 32000 28000 3.5
Toyota Corolla Sedan White Gasoline Automatic 1 2018 18000 35000 1.8

/* Run macro for Ford */

%vehicle_report(make=Ford);

Output:

                                                                 Vehicle Report for Ford

Make Model Type Color Fuel_Type Transmission Vehicle_ID Year Price Mileage Engine_Size
Ford F150 Truck Black Gasoline Automatic 2 2020 35000 22000 3.5
Ford Mustang Coupe Red Gasoline Manual 7 2017 28000 45000 5.0

/* Step 10: Macro to summarize price by fuel type */

%macro price_summary;

    proc sql;

        select Fuel_Type,

               avg(Price) format=dollar10.2 as Avg_Price,

               min(Price) format=dollar10.2 as Min_Price,

               max(Price) format=dollar10.2 as Max_Price,

               count(*) as Count

        from vehicles

        group by Fuel_Type;

    quit;

%mend price_summary;


%price_summary;

Output:

Fuel_Type Avg_Price Min_Price Max_Price Count
Diesel $60,000.00 $60,000.00 $60,000.00 1
Electric $37,666.67 $31,000.00 $45,000.00 3
Gasoline $29,666.67 $18,000.00 $40,000.00 6

/* Step 11: Add a new variable using DATA step and PROC SQL */

/* Calculate Price per Mileage (Price per mile driven) */

data vehicles;

    set vehicles;

    if Mileage > 0 then Price_per_Mile = Price / Mileage;

    else Price_per_Mile = .;

run;

proc print data=vehicles noobs;

    var Vehicle_ID Make Model Price Mileage Price_per_Mile;

    title "Vehicles with Price per Mileage Ratio";

run;

Output:

                                                   Vehicles with Price per Mileage Ratio

Vehicle_ID Make Model Price Mileage Price_per_Mile
1 Toyota Corolla 18000 35000 0.51429
2 Ford F150 35000 22000 1.59091
3 Tesla Model3 45000 15000 3.00000
4 Honda CRV 25000 30000 0.83333
5 BMW X5 60000 10000 6.00000
6 Chevrolet Bolt 37000 12000 3.08333
7 Ford Mustang 28000 45000 0.62222
8 Toyota Tacoma 32000 28000 1.14286
9 Nissan Leaf 31000 8000 3.87500
10 Jeep Wrangler 40000 27000 1.48148


/* Step 12: Using PROC SQL to update a dataset */

/* Assume we want to increase price of Electric vehicles by 5% */

proc sql;

    update vehicles

    set Price = Price * 1.05

    where Fuel_Type = "Electric";

quit;

proc print data=vehicles noobs;

    var Vehicle_ID Make Model Price Fuel_Type;

    title "Updated Vehicle Prices (5% Increase for Electric Vehicles)";

run;

Output:

                                      Updated Vehicle Prices (5% Increase for Electric Vehicles)

Vehicle_ID Make Model Price Fuel_Type
1 Toyota Corolla 18000 Gasoline
2 Ford F150 35000 Gasoline
3 Tesla Model3 47250 Electric
4 Honda CRV 25000 Gasoline
5 BMW X5 60000 Diesel
6 Chevrolet Bolt 38850 Electric
7 Ford Mustang 28000 Gasoline
8 Toyota Tacoma 32000 Gasoline
9 Nissan Leaf 32550 Electric
10 Jeep Wrangler 40000 Gasoline

/* Step 13: Use PROC FORMAT for better readability */

proc format;

    value $transfmt

        "Automatic" = "Auto"

        "Manual" = "Manual";

    value $fuelfmt

        "Gasoline" = "Gas"

        "Diesel" = "Diesel"

        "Electric" = "Electric";

run;


proc print data=vehicles noobs;

    format Transmission $transfmt. Fuel_Type $fuelfmt. Price dollar10.2;

    title "Vehicles with Formatted Transmission and Fuel Type";

run;

Output:

                                                             Vehicles with Formatted Transmission and Fuel Type

Make Model Type Color Fuel_Type Transmission Vehicle_ID Year Price Mileage Engine_Size Price_per_Mile
Toyota Corolla Sedan White Gas Auto 1 2018 $18,000.00 35000 1.8 0.51429
Ford F150 Truck Black Gas Auto 2 2020 $35,000.00 22000 3.5 1.59091
Tesla Model3 Sedan Red Electric Auto 3 2022 $47,250.00 15000 0.0 3.00000
Honda CRV SUV Blue Gas Auto 4 2019 $25,000.00 30000 2.4 0.83333
BMW X5 SUV Black Diesel Auto 5 2021 $60,000.00 10000 3.0 6.00000
Chevrolet Bolt Hatchback Silver Electric Auto 6 2020 $38,850.00 12000 0.0 3.08333
Ford Mustang Coupe Red Gas Manual 7 2017 $28,000.00 45000 5.0 0.62222
Toyota Tacoma Truck White Gas Manual 8 2018 $32,000.00 28000 3.5 1.14286
Nissan Leaf Hatchback Green Electric Auto 9 2021 $32,550.00 8000 0.0 3.87500
Jeep Wrangler SUV Yellow Gas Manual 10 2019 $40,000.00 27000 3.6 1.48148

/* Step 14: Create a macro to generate summary statistics for any variable */

%macro summary_stat(var=);

    proc means data=vehicles mean median min max std n;

        var &var;

        title "Summary Statistics for &var";

    run;

%mend summary_stat;


/* Run macro for Mileage */

%summary_stat(var=Mileage);

Output:

                                                  Summary Statistics for Mileage

                                                      The MEANS Procedure

Analysis Variable : Mileage
Mean Median Minimum Maximum Std Dev N
23200.00 24500.00 8000.00 45000.00 12007.41 10

/* Run macro for Price_per_Mile */

%summary_stat(var=Price_per_Mile);

                                               Summary Statistics for Price_per_Mile

                                                           The MEANS Procedure

Analysis Variable : Price_per_Mile
Mean Median Minimum Maximum Std Dev N
2.2143422 1.5361953 0.5142857 6.0000000 1.7596543 10


/* Step 15: Create a report of vehicles with mileage less than a threshold using macro */

%macro low_mileage_report(threshold=);

    proc sql;

        create table low_mileage as

        select * from vehicles

        where Mileage < &threshold

        order by Mileage;

    quit;

    proc print data=low_mileage noobs;

        title "Vehicles with Mileage Less Than &threshold Miles";

    run;

%mend low_mileage_report;


/* Generate report for vehicles with mileage less than 20000 */

%low_mileage_report(threshold=20000);

Output:

                                              Vehicles with Mileage Less Than 20000 Miles

Make Model Type Color Fuel_Type Transmission Vehicle_ID Year Price Mileage Engine_Size Price_per_Mile
Nissan Leaf Hatchback Green Electric Automatic 9 2021 32550 8000 0 3.87500
BMW X5 SUV Black Diesel Automatic 5 2021 60000 10000 3 6.00000
Chevrolet Bolt Hatchback Silver Electric Automatic 6 2020 38850 12000 0 3.08333
Tesla Model3 Sedan Red Electric Automatic 3 2022 47250 15000 0 3.00000


PRACTICE AND COMMENT YOUR CODE: 

--->PLEASE FOLLOW OUR BLOG FOR MORE UPDATES.

PLEASE FOLLOW OUR TELEGRAM CHANNEL CLICK HERE

PLEASE FOLLOW OUR FACEBOOK PAGE  CLICK HERE

PLEASE FOLLOW OUR INSTAGRAM PAGE CLICK HERE

Comments