185.BUILDING A POWERFUL VEHICLE DATA ANALYSIS DASHBOARD IN SAS USING PROC PRINT | PROC MEANS | PROC FREQ | PROC SORT | PROC REPORT | PROC SQL | PROC FORMAT | MACROS | A COMPLETE STEP-BY-STEP GUIDE TO MASTER DATA HANDLING, EXPLORATION, REPORTING, AND AUTOMATION IN SAS

BUILDING A POWERFUL VEHICLE DATA ANALYSIS DASHBOARD IN SAS USING PROC PRINT | PROC MEANS | PROC FREQ | PROC SORT | PROC REPORT | PROC SQL | PROC FORMAT | MACROS | A COMPLETE STEP-BY-STEP GUIDE TO MASTER DATA HANDLING, EXPLORATION, REPORTING, AND AUTOMATION IN SAS

/* Step 1: Create Vehicles Dataset */

data vehicles;

    length Make $15 Model $15 Type $10 Color $10 Fuel_Type $12 Transmission $10;

    input Vehicle_ID Make $ Model $ Year Type $ Color $ Price Mileage Engine_Size 

          Fuel_Type $ Transmission $;

    datalines;

1 Toyota Corolla 2018 Sedan White 18000 35000 1.8 Gasoline Automatic

2 Ford F150 2020 Truck Black 35000 22000 3.5 Gasoline Automatic

3 Tesla Model3 2022 Sedan Red 45000 15000 0 Electric Automatic

4 Honda CRV 2019 SUV Blue 25000 30000 2.4 Gasoline Automatic

5 BMW X5 2021 SUV Black 60000 10000 3.0 Diesel Automatic

6 Chevrolet Bolt 2020 Hatchback Silver 37000 12000 0 Electric Automatic

7 Ford Mustang 2017 Coupe Red 28000 45000 5.0 Gasoline Manual

8 Toyota Tacoma 2018 Truck White 32000 28000 3.5 Gasoline Manual

9 Nissan Leaf 2021 Hatchback Green 31000 8000 0 Electric Automatic

10 Jeep Wrangler 2019 SUV Yellow 40000 27000 3.6 Gasoline Manual

;

run;

/* Step 2: View the dataset using PROC PRINT */

proc print data=vehicles noobs label;

 var Vehicle_ID Make  Model  Year Type  Color  Price Mileage Engine_Size 

          Fuel_Type  Transmission ;

    title "List of Vehicles in the Dataset";

    label Vehicle_ID = "Vehicle ID"

          Make = "Manufacturer"

          Model = "Model Name"

          Year = "Year of Manufacture"

          Type = "Vehicle Type"

          Color = "Color"

          Price = "Price in USD"

          Mileage = "Mileage (miles)"

          Engine_Size = "Engine Size (L)"

          Fuel_Type = "Type of Fuel"

          Transmission = "Transmission Type";

run;

Output:

                                                                   List of Vehicles in the Dataset

Vehicle ID Manufacturer Model Name Year of Manufacture Vehicle Type Color Price in USD Mileage (miles) Engine Size
(L)
Type of Fuel Transmission Type
1 Toyota Corolla 2018 Sedan White 18000 35000 1.8 Gasoline Automatic
2 Ford F150 2020 Truck Black 35000 22000 3.5 Gasoline Automatic
3 Tesla Model3 2022 Sedan Red 45000 15000 0.0 Electric Automatic
4 Honda CRV 2019 SUV Blue 25000 30000 2.4 Gasoline Automatic
5 BMW X5 2021 SUV Black 60000 10000 3.0 Diesel Automatic
6 Chevrolet Bolt 2020 Hatchback Silver 37000 12000 0.0 Electric Automatic
7 Ford Mustang 2017 Coupe Red 28000 45000 5.0 Gasoline Manual
8 Toyota Tacoma 2018 Truck White 32000 28000 3.5 Gasoline Manual
9 Nissan Leaf 2021 Hatchback Green 31000 8000 0.0 Electric Automatic
10 Jeep Wrangler 2019 SUV Yellow 40000 27000 3.6 Gasoline Manual


/* Step 3: Basic descriptive statistics for numeric variables */

proc means data=vehicles mean median min max std n;

    var Price Mileage Engine_Size Year;

    title "Descriptive Statistics for Vehicle Numeric Attributes";

run;

Output:

                                      Descriptive Statistics for Vehicle Numeric Attributes

                                                        The MEANS Procedure

Variable Mean Median Minimum Maximum Std Dev N
Price
Mileage
Engine_Size
Year
35100.00
23200.00
2.2800000
2019.50
33500.00
24500.00
2.7000000
2019.50
18000.00
8000.00
0
2017.00
60000.00
45000.00
5.0000000
2022.00
11628.03
12007.41
1.7787636
1.5811388
10
10
10
10

/* Step 4: Frequency counts for categorical variables */

proc freq data=vehicles;

    tables Make Type Color Fuel_Type Transmission / nocum nopercent;

    title "Frequency Distribution of Categorical Variables";

run;

Output:

                                       Frequency Distribution of Categorical Variables

                                                                The FREQ Procedure

Make Frequency
BMW 1
Chevrolet 1
Ford 2
Honda 1
Jeep 1
Nissan 1
Tesla 1
Toyota 2

Type Frequency
Coupe 1
Hatchback 2
SUV 3
Sedan 2
Truck 2

Color Frequency
Black 2
Blue 1
Green 1
Red 2
Silver 1
White 2
Yellow 1

Fuel_Type Frequency
Diesel 1
Electric 3
Gasoline 6

Transmission Frequency
Automatic 7
Manual 3

/* Step 5: Sort dataset by Price descending */

proc sort data=vehicles out=vehicles_sorted;

    by descending Price;

run;

proc print data=vehicles_sorted noobs;

    title "Vehicles Sorted by Price (High to Low)";

run;

Output:

                                                                    Vehicles Sorted by Price (High to Low)

Make Model Type Color Fuel_Type Transmission Vehicle_ID Year Price Mileage Engine_Size
BMW X5 SUV Black Diesel Automatic 5 2021 60000 10000 3.0
Tesla Model3 Sedan Red Electric Automatic 3 2022 45000 15000 0.0
Jeep Wrangler SUV Yellow Gasoline Manual 10 2019 40000 27000 3.6
Chevrolet Bolt Hatchback Silver Electric Automatic 6 2020 37000 12000 0.0
Ford F150 Truck Black Gasoline Automatic 2 2020 35000 22000 3.5
Toyota Tacoma Truck White Gasoline Manual 8 2018 32000 28000 3.5
Nissan Leaf Hatchback Green Electric Automatic 9 2021 31000 8000 0.0
Ford Mustang Coupe Red Gasoline Manual 7 2017 28000 45000 5.0
Honda CRV SUV Blue Gasoline Automatic 4 2019 25000 30000 2.4
Toyota Corolla Sedan White Gasoline Automatic 1 2018 18000 35000 1.8

/* Step 6: PROC REPORT for a tabular summary */

proc report data=vehicles nowd;

    column Make Type Fuel_Type Price Mileage;

    define Make / group "Manufacturer";

    define Type / group "Vehicle Type";

    define Fuel_Type / group "Fuel Type";

    define Price / analysis mean format=dollar10.2 "Average Price";

    define Mileage / analysis mean format=8.0 "Average Mileage";

    title "Summary Report of Vehicles by Make, Type, and Fuel Type";

run;

Output:

                                 Summary Report of Vehicles by Make, Type, and Fuel Type

Manufacturer Vehicle Type Fuel Type Average Price Average Mileage
BMW SUV Diesel $60,000.00 10000
Chevrolet Hatchback Electric $37,000.00 12000
Ford Coupe Gasoline $28,000.00 45000
  Truck Gasoline $35,000.00 22000
Honda SUV Gasoline $25,000.00 30000
Jeep SUV Gasoline $40,000.00 27000
Nissan Hatchback Electric $31,000.00 8000
Tesla Sedan Electric $45,000.00 15000
Toyota Sedan Gasoline $18,000.00 35000
  Truck Gasoline $32,000.00 28000

/* Step 7: PROC SQL to create a subset of expensive vehicles */

proc sql;

    create table expensive_vehicles as

    select * from vehicles

    where Price > 30000

    order by Price desc;

quit;

proc print data=expensive_vehicles noobs;

    title "Vehicles with Price Above $30,000";

run;

Output:

                                                         Vehicles with Price Above $30,000

Make Model Type Color Fuel_Type Transmission Vehicle_ID Year Price Mileage Engine_Size
BMW X5 SUV Black Diesel Automatic 5 2021 60000 10000 3.0
Tesla Model3 Sedan Red Electric Automatic 3 2022 45000 15000 0.0
Jeep Wrangler SUV Yellow Gasoline Manual 10 2019 40000 27000 3.6
Chevrolet Bolt Hatchback Silver Electric Automatic 6 2020 37000 12000 0.0
Ford F150 Truck Black Gasoline Automatic 2 2020 35000 22000 3.5
Toyota Tacoma Truck White Gasoline Manual 8 2018 32000 28000 3.5
Nissan Leaf Hatchback Green Electric Automatic 9 2021 31000 8000 0.0

/* Step 8: PROC SQL for aggregation and group by */

proc sql;

    title "Average Price and Mileage by Vehicle Type";

    select Type, 

           avg(Price) format=dollar10.2 as Avg_Price,

           avg(Mileage) format=8.0 as Avg_Mileage,

           count(*) as Count

    from vehicles

    group by Type

    order by Avg_Price desc;

quit;

Output:

                                               Average Price and Mileage by Vehicle Type

Type Avg_Price Avg_Mileage Count
SUV $41,666.67 22333 3
Hatchback $34,000.00 10000 2
Truck $33,500.00 25000 2
Sedan $31,500.00 25000 2
Coupe $28,000.00 45000 1

/* Step 9: Create a macro to generate report for any vehicle make */

%macro vehicle_report(make=);

    proc sql;

        create table report_&make as

        select * from vehicles

        where upcase(Make) = upcase("&make")

        order by Price desc;

    quit;

    proc print data=report_&make noobs;

        title "Vehicle Report for &make";

    run;

%mend vehicle_report;


/* Run macro for Toyota */

%vehicle_report(make=Toyota);

Output:

                                                                Vehicle Report for Toyota

Make Model Type Color Fuel_Type Transmission Vehicle_ID Year Price Mileage Engine_Size
Toyota Tacoma Truck White Gasoline Manual 8 2018 32000 28000 3.5
Toyota Corolla Sedan White Gasoline Automatic 1 2018 18000 35000 1.8

/* Run macro for Ford */

%vehicle_report(make=Ford);

Output:

                                                                 Vehicle Report for Ford

Make Model Type Color Fuel_Type Transmission Vehicle_ID Year Price Mileage Engine_Size
Ford F150 Truck Black Gasoline Automatic 2 2020 35000 22000 3.5
Ford Mustang Coupe Red Gasoline Manual 7 2017 28000 45000 5.0

/* Step 10: Macro to summarize price by fuel type */

%macro price_summary;

    proc sql;

        select Fuel_Type,

               avg(Price) format=dollar10.2 as Avg_Price,

               min(Price) format=dollar10.2 as Min_Price,

               max(Price) format=dollar10.2 as Max_Price,

               count(*) as Count

        from vehicles

        group by Fuel_Type;

    quit;

%mend price_summary;


%price_summary;

Output:

Fuel_Type Avg_Price Min_Price Max_Price Count
Diesel $60,000.00 $60,000.00 $60,000.00 1
Electric $37,666.67 $31,000.00 $45,000.00 3
Gasoline $29,666.67 $18,000.00 $40,000.00 6

/* Step 11: Add a new variable using DATA step and PROC SQL */

/* Calculate Price per Mileage (Price per mile driven) */

data vehicles;

    set vehicles;

    if Mileage > 0 then Price_per_Mile = Price / Mileage;

    else Price_per_Mile = .;

run;

proc print data=vehicles noobs;

    var Vehicle_ID Make Model Price Mileage Price_per_Mile;

    title "Vehicles with Price per Mileage Ratio";

run;

Output:

                                                   Vehicles with Price per Mileage Ratio

Vehicle_ID Make Model Price Mileage Price_per_Mile
1 Toyota Corolla 18000 35000 0.51429
2 Ford F150 35000 22000 1.59091
3 Tesla Model3 45000 15000 3.00000
4 Honda CRV 25000 30000 0.83333
5 BMW X5 60000 10000 6.00000
6 Chevrolet Bolt 37000 12000 3.08333
7 Ford Mustang 28000 45000 0.62222
8 Toyota Tacoma 32000 28000 1.14286
9 Nissan Leaf 31000 8000 3.87500
10 Jeep Wrangler 40000 27000 1.48148


/* Step 12: Using PROC SQL to update a dataset */

/* Assume we want to increase price of Electric vehicles by 5% */

proc sql;

    update vehicles

    set Price = Price * 1.05

    where Fuel_Type = "Electric";

quit;

proc print data=vehicles noobs;

    var Vehicle_ID Make Model Price Fuel_Type;

    title "Updated Vehicle Prices (5% Increase for Electric Vehicles)";

run;

Output:

                                      Updated Vehicle Prices (5% Increase for Electric Vehicles)

Vehicle_ID Make Model Price Fuel_Type
1 Toyota Corolla 18000 Gasoline
2 Ford F150 35000 Gasoline
3 Tesla Model3 47250 Electric
4 Honda CRV 25000 Gasoline
5 BMW X5 60000 Diesel
6 Chevrolet Bolt 38850 Electric
7 Ford Mustang 28000 Gasoline
8 Toyota Tacoma 32000 Gasoline
9 Nissan Leaf 32550 Electric
10 Jeep Wrangler 40000 Gasoline

/* Step 13: Use PROC FORMAT for better readability */

proc format;

    value $transfmt

        "Automatic" = "Auto"

        "Manual" = "Manual";

    value $fuelfmt

        "Gasoline" = "Gas"

        "Diesel" = "Diesel"

        "Electric" = "Electric";

run;


proc print data=vehicles noobs;

    format Transmission $transfmt. Fuel_Type $fuelfmt. Price dollar10.2;

    title "Vehicles with Formatted Transmission and Fuel Type";

run;

Output:

                                                             Vehicles with Formatted Transmission and Fuel Type

Make Model Type Color Fuel_Type Transmission Vehicle_ID Year Price Mileage Engine_Size Price_per_Mile
Toyota Corolla Sedan White Gas Auto 1 2018 $18,000.00 35000 1.8 0.51429
Ford F150 Truck Black Gas Auto 2 2020 $35,000.00 22000 3.5 1.59091
Tesla Model3 Sedan Red Electric Auto 3 2022 $47,250.00 15000 0.0 3.00000
Honda CRV SUV Blue Gas Auto 4 2019 $25,000.00 30000 2.4 0.83333
BMW X5 SUV Black Diesel Auto 5 2021 $60,000.00 10000 3.0 6.00000
Chevrolet Bolt Hatchback Silver Electric Auto 6 2020 $38,850.00 12000 0.0 3.08333
Ford Mustang Coupe Red Gas Manual 7 2017 $28,000.00 45000 5.0 0.62222
Toyota Tacoma Truck White Gas Manual 8 2018 $32,000.00 28000 3.5 1.14286
Nissan Leaf Hatchback Green Electric Auto 9 2021 $32,550.00 8000 0.0 3.87500
Jeep Wrangler SUV Yellow Gas Manual 10 2019 $40,000.00 27000 3.6 1.48148

/* Step 14: Create a macro to generate summary statistics for any variable */

%macro summary_stat(var=);

    proc means data=vehicles mean median min max std n;

        var &var;

        title "Summary Statistics for &var";

    run;

%mend summary_stat;


/* Run macro for Mileage */

%summary_stat(var=Mileage);

Output:

                                                  Summary Statistics for Mileage

                                                      The MEANS Procedure

Analysis Variable : Mileage
Mean Median Minimum Maximum Std Dev N
23200.00 24500.00 8000.00 45000.00 12007.41 10

/* Run macro for Price_per_Mile */

%summary_stat(var=Price_per_Mile);

                                               Summary Statistics for Price_per_Mile

                                                           The MEANS Procedure

Analysis Variable : Price_per_Mile
Mean Median Minimum Maximum Std Dev N
2.2143422 1.5361953 0.5142857 6.0000000 1.7596543 10


/* Step 15: Create a report of vehicles with mileage less than a threshold using macro */

%macro low_mileage_report(threshold=);

    proc sql;

        create table low_mileage as

        select * from vehicles

        where Mileage < &threshold

        order by Mileage;

    quit;

    proc print data=low_mileage noobs;

        title "Vehicles with Mileage Less Than &threshold Miles";

    run;

%mend low_mileage_report;


/* Generate report for vehicles with mileage less than 20000 */

%low_mileage_report(threshold=20000);

Output:

                                              Vehicles with Mileage Less Than 20000 Miles

Make Model Type Color Fuel_Type Transmission Vehicle_ID Year Price Mileage Engine_Size Price_per_Mile
Nissan Leaf Hatchback Green Electric Automatic 9 2021 32550 8000 0 3.87500
BMW X5 SUV Black Diesel Automatic 5 2021 60000 10000 3 6.00000
Chevrolet Bolt Hatchback Silver Electric Automatic 6 2020 38850 12000 0 3.08333
Tesla Model3 Sedan Red Electric Automatic 3 2022 47250 15000 0 3.00000


PRACTICE AND COMMENT YOUR CODE: 

--->PLEASE FOLLOW OUR BLOG FOR MORE UPDATES.

PLEASE FOLLOW OUR TELEGRAM CHANNEL CLICK HERE

PLEASE FOLLOW OUR FACEBOOK PAGE  CLICK HERE

PLEASE FOLLOW OUR INSTAGRAM PAGE CLICK HERE

Comments

Popular posts from this blog

409.Can We Build a Reliable Emergency Services Analytics & Fraud Detection System in SAS While Identifying and Fixing Intentional Errors?

397.If a satellite has excellent signal strength but very high latency, can it still deliver good quality communication? Why or why not?A Sas Study

401.How Efficient Are Global Data Centers? A Complete SAS Analytics Study