Saturday, 6 December 2025

331.E-COMMERCE ANALYTICS PROJECT USING DATA STEP | PROC SQL | PROC SUMMARY | PROC RANK | PROC SGPLOT WITH MACROS AND DATE FORMATS LIKE INTCK & INTNX

E-COMMERCE ANALYTICS PROJECT USING DATA STEP | PROC SQL | PROC SUMMARY | PROC RANK | PROC SGPLOT WITH MACROS AND DATE FORMATS LIKE INTCK & INTNX

options nocenter;

Step 1: Create a raw e-commerce dataset 

data ecommerce_raw;

    length Product_Type $20 Brand $15 User_Age_Group $7;

    format Order_Date Launch_Date date9.;

    input Product_ID Product_Type $ Brand $ Sales Rating Return_Rate User_Age_Group $

          Order_Date :date9. Launch_Date :date9.;

    datalines;

1 Smartphone   BrandX  95000 4.7  2.5 25-34 15JAN2025 01NOV2024

2 Laptop       BrandY 120000 4.5  3.0 25-34 20JAN2025 15OCT2024

3 Headphones   BrandZ  45000 4.2  5.0 18-24 05JAN2025 20SEP2024

4 Smartwatch   BrandX  60000 4.0  4.5 18-24 10JAN2025 10NOV2024

5 Shoes        BrandA  38000 3.8  6.5 35-44 18JAN2025 01AUG2024

6 Clothing     BrandB  52000 4.1  7.0 35-44 25JAN2025 05JUL2024

7 Groceries    BrandC  30000 4.6  1.5 45+   02JAN2025 01JAN2024

8 Beauty       BrandD  42000 4.3  3.5 25-34 12JAN2025 10JUL2024

9 Books        BrandE  25000 4.8  1.0 18-24 08JAN2025 01JAN2023

10 Furniture   BrandF 135000 4.4  2.0 35-44 28JAN2025 15SEP2024

11 Tablet      BrandY  78000 4.2  3.2 25-34 22JAN2025 10DEC2024

12 GameConsole BrandG 110000 4.9  2.8 18-24 30JAN2025 01NOV2024

;

run;

proc print data=ecommerce_raw;

run;

OUTPUT:

ObsProduct_TypeBrandUser_Age_GroupOrder_DateLaunch_DateProduct_IDSalesRatingReturn_Rate
1SmartphoneBrandX25-3415JAN202501NOV20241950004.72.5
2LaptopBrandY25-3420JAN202515OCT202421200004.53.0
3HeadphonesBrandZ18-2405JAN202520SEP20243450004.25.0
4SmartwatchBrandX18-2410JAN202510NOV20244600004.04.5
5ShoesBrandA35-4418JAN202501AUG20245380003.86.5
6ClothingBrandB35-4425JAN202505JUL20246520004.17.0
7GroceriesBrandC45+02JAN202501JAN20247300004.61.5
8BeautyBrandD25-3412JAN202510JUL20248420004.33.5
9BooksBrandE18-2408JAN202501JAN20239250004.81.0
10FurnitureBrandF35-4428JAN202515SEP2024101350004.42.0
11TabletBrandY25-3422JAN202510DEC202411780004.23.2
12GameConsoleBrandG18-2430JAN202501NOV2024121100004.92.8


Step 2: Macro to add date-based metrics 

%macro add_date_metrics(in=, out=, analysis_date=);

    data &out.;

        set &in.;

        format Order_Date Launch_Date Return_Window_End date9.;

        

        /* Days between launch and order (product maturity) */

        Days_Since_Launch = intck('day', Launch_Date, Order_Date);

        

        /* Months between launch and order */

        Months_Between_Launch_Order = intck('month', Launch_Date, Order_Date);

        

        /* 30-day return window end date using INTNX */

        Return_Window_End = intnx('day', Order_Date, 30, 'same');

        

        /* Days since order relative to analysis date */

        Days_Since_Order = intck('day', Order_Date, &analysis_date);

    run;

proc print data=&out.;

    run;

%mend;


%let analysis_date = '31JAN2025'd;


%add_date_metrics(in=ecommerce_raw,out=ecommerce_dates,analysis_date=&analysis_date);

OUTPUT:

ObsProduct_TypeBrandUser_Age_GroupOrder_DateLaunch_DateProduct_IDSalesRatingReturn_RateReturn_Window_EndDays_Since_LaunchMonths_Between_Launch_OrderDays_Since_Order
1SmartphoneBrandX25-3415JAN202501NOV20241950004.72.514FEB202575216
2LaptopBrandY25-3420JAN202515OCT202421200004.53.019FEB202597311
3HeadphonesBrandZ18-2405JAN202520SEP20243450004.25.004FEB2025107426
4SmartwatchBrandX18-2410JAN202510NOV20244600004.04.509FEB202561221
5ShoesBrandA35-4418JAN202501AUG20245380003.86.517FEB2025170513
6ClothingBrandB35-4425JAN202505JUL20246520004.17.024FEB202520466
7GroceriesBrandC45+02JAN202501JAN20247300004.61.501FEB20253671229
8BeautyBrandD25-3412JAN202510JUL20248420004.33.511FEB2025186619
9BooksBrandE18-2408JAN202501JAN20239250004.81.007FEB20257382423
10FurnitureBrandF35-4428JAN202515SEP2024101350004.42.027FEB202513543
11TabletBrandY25-3422JAN202510DEC202411780004.23.221FEB20254319
12GameConsoleBrandG18-2430JAN202501NOV2024121100004.92.801MAR20259021


Step 3: Macro for market segmentation 

%macro market_segment(in=, out=);

    data &out.;

        set &in.;

        length Market_Segment $15;

        if Sales >= 100000 and Return_Rate < 3 then Market_Segment = 'Premium Loyal';

        else if Sales >= 100000 and Return_Rate >= 3 then Market_Segment = 'Premium Risky';

        else if 60000 <= Sales < 100000 then Market_Segment = 'Mid-Market';

        else Market_Segment = 'Value Segment';

    run;

proc print data=&out.;

    run;

%mend;


%market_segment(in=ecommerce_dates,out=ecommerce_final);

 OUTPUT:   

ObsProduct_TypeBrandUser_Age_GroupOrder_DateLaunch_DateProduct_IDSalesRatingReturn_RateReturn_Window_EndDays_Since_LaunchMonths_Between_Launch_OrderDays_Since_OrderMarket_Segment
1SmartphoneBrandX25-3415JAN202501NOV20241950004.72.514FEB202575216Mid-Market
2LaptopBrandY25-3420JAN202515OCT202421200004.53.019FEB202597311Premium Risky
3HeadphonesBrandZ18-2405JAN202520SEP20243450004.25.004FEB2025107426Value Segment
4SmartwatchBrandX18-2410JAN202510NOV20244600004.04.509FEB202561221Mid-Market
5ShoesBrandA35-4418JAN202501AUG20245380003.86.517FEB2025170513Value Segment
6ClothingBrandB35-4425JAN202505JUL20246520004.17.024FEB202520466Value Segment
7GroceriesBrandC45+02JAN202501JAN20247300004.61.501FEB20253671229Value Segment
8BeautyBrandD25-3412JAN202510JUL20248420004.33.511FEB2025186619Value Segment
9BooksBrandE18-2408JAN202501JAN20239250004.81.007FEB20257382423Value Segment
10FurnitureBrandF35-4428JAN202515SEP2024101350004.42.027FEB202513543Premium Loyal
11TabletBrandY25-3422JAN202510DEC202411780004.23.221FEB20254319Mid-Market
12GameConsoleBrandG18-2430JAN202501NOV2024121100004.92.801MAR20259021Premium Loyal

            

Step 4: PROC SQL – Summaries by age group and market segment 

proc sql;

    create table seg_age_summary as

    select User_Age_Group,

           Market_Segment,

           count(*) as Product_Count,

           mean(Sales) as Avg_Sales format=comma12.,

           mean(Rating) as Avg_Rating format=8.2,

           mean(Return_Rate) as Avg_Return_Rate format=8.2

    from ecommerce_final

    group by User_Age_Group, Market_Segment

    order by User_Age_Group, Market_Segment;

quit;

proc print data=seg_age_summary;

run;

OUTPUT:

ObsUser_Age_GroupMarket_SegmentProduct_CountAvg_SalesAvg_RatingAvg_Return_Rate
118-24Mid-Market160,0004.004.50
218-24Premium Loyal1110,0004.902.80
318-24Value Segment235,0004.503.00
425-34Mid-Market286,5004.452.85
525-34Premium Risky1120,0004.503.00
625-34Value Segment142,0004.303.50
735-44Premium Loyal1135,0004.402.00
835-44Value Segment245,0003.956.75
945+Value Segment130,0004.601.50

/* Another SQL summary: Product type level metrics */

proc sql;

    create table product_type_summary as

    select Product_Type,

           count(*) as Product_Count,

           sum(Sales) as Total_Sales format=comma12.,

           mean(Rating) as Avg_Rating format=8.2,

           mean(Return_Rate) as Avg_Return_Rate format=8.2,

           mean(Days_Since_Launch) as Avg_Days_Since_Launch format=8.2

    from ecommerce_final

    group by Product_Type

    order by Total_Sales desc;

quit;

proc print data=product_type_summary;

run;

OUTPUT:               

ObsProduct_TypeProduct_CountTotal_SalesAvg_RatingAvg_Return_RateAvg_Days_Since_Launch
1Furniture1135,0004.402.00135.00
2Laptop1120,0004.503.0097.00
3GameConsole1110,0004.902.8090.00
4Smartphone195,0004.702.5075.00
5Tablet178,0004.203.2043.00
6Smartwatch160,0004.004.5061.00
7Clothing152,0004.107.00204.00
8Headphones145,0004.205.00107.00
9Beauty142,0004.303.50186.00
10Shoes138,0003.806.50170.00
11Groceries130,0004.601.50367.00
12Books125,0004.801.00738.00

   

Step 5: PROC SUMMARY – Brand level summary 

proc summary data=ecommerce_final nway;

    class Brand;

    var Sales Rating Return_Rate Days_Since_Launch;

    output out=brand_summary

        n(Sales) = Count_Products

        sum(Sales) = Total_Sales

        mean(Sales) = Avg_Sales

        mean(Rating) = Avg_Rating

        mean(Return_Rate) = Avg_Return_Rate

        mean(Days_Since_Launch) = Avg_Days_Since_Launch;

run;

proc print data=brand_summary;

run;                

OUTPUT:

ObsBrand_TYPE__FREQ_Count_ProductsTotal_SalesAvg_SalesAvg_RatingAvg_Return_RateAvg_Days_Since_Launch
1BrandA11138000380003.806.5170
2BrandB11152000520004.107.0204
3BrandC11130000300004.601.5367
4BrandD11142000420004.303.5186
5BrandE11125000250004.801.0738
6BrandF1111350001350004.402.0135
7BrandG1111100001100004.902.890
8BrandX122155000775004.353.568
9BrandY122198000990004.353.170
10BrandZ11145000450004.205.0107


Step 6: PROC RANK – Ranking by Sales and Rating 

proc rank data=ecommerce_final out=ecommerce_ranked ties=low descending;

    var Sales Rating;

    ranks Rank_Sales Rank_Rating;

run;

proc print data=ecommerce_ranked;

run; 

OUTPUT:

ObsProduct_TypeBrandUser_Age_GroupOrder_DateLaunch_DateProduct_IDSalesRatingReturn_RateReturn_Window_EndDays_Since_LaunchMonths_Between_Launch_OrderDays_Since_OrderMarket_SegmentRank_SalesRank_Rating
1SmartphoneBrandX25-3415JAN202501NOV20241950004.72.514FEB202575216Mid-Market43
2LaptopBrandY25-3420JAN202515OCT202421200004.53.019FEB202597311Premium Risky25
3HeadphonesBrandZ18-2405JAN202520SEP20243450004.25.004FEB2025107426Value Segment88
4SmartwatchBrandX18-2410JAN202510NOV20244600004.04.509FEB202561221Mid-Market611
5ShoesBrandA35-4418JAN202501AUG20245380003.86.517FEB2025170513Value Segment1012
6ClothingBrandB35-4425JAN202505JUL20246520004.17.024FEB202520466Value Segment710
7GroceriesBrandC45+02JAN202501JAN20247300004.61.501FEB20253671229Value Segment114
8BeautyBrandD25-3412JAN202510JUL20248420004.33.511FEB2025186619Value Segment97
9BooksBrandE18-2408JAN202501JAN20239250004.81.007FEB20257382423Value Segment122
10FurnitureBrandF35-4428JAN202515SEP2024101350004.42.027FEB202513543Premium Loyal16
11TabletBrandY25-3422JAN202510DEC202411780004.23.221FEB20254319Mid-Market58
12GameConsoleBrandG18-2430JAN202501NOV2024121100004.92.801MAR20259021Premium Loyal31


7a: Bar chart – Total sales by product type 

proc sgplot data=ecommerce_final;

    vbar Product_Type / response=Sales stat=sum datalabel;

    yaxis label="Total Sales";

    xaxis label="Product Type";

    title "Total Sales by Product Type";

run;

OUTPUT:

The SGPlot Procedure


Step 7b: Scatter plot – Sales vs Rating by age group 

proc sgplot data=ecommerce_final;

    scatter x=Rating y=Sales / group=User_Age_Group datalabel=Product_Type;

    xaxis label="Average Rating";

    yaxis label="Sales";

    title "Sales vs Rating by User Age Group";

run;

OUTPUT:
The SGPlot Procedure





To Visit My Previous Different Types Of Oils Dataset:Click Here
To Visit My Previous Different Types Of Series 2025 Dataset:Click Here
To Visit My Previous Analyzing Yoga Asanas Worldwide Dataset:Click Here
To Visit My Previous Analyzing Indian Languages Dataset:Click Here  



Follow Us On : 


 


--- FOLLOW OUR BLOG FOR MORE INFORMATION.

--->PLEASE DO COMMENTS AND SHARE OUR BLOG.



No comments:

Post a Comment