143.COMPREHENSIVE SAS ANALYSIS OF E-COMMERCE TRANSACTIONS SALES DATA PROCESSING | REPORTING AND VISUALIZATION
- Get link
- X
- Other Apps
COMPREHENSIVE SAS ANALYSIS OF E-COMMERCE TRANSACTIONS
SALES DATA PROCESSING | REPORTING AND VISUALIZATION
/* Step 1: Creating an E-Commerce Sales Dataset */
DATA ecom_sales;
LENGTH OrderID $10 CustomerID $8 ProductID $6 Category $15 PaymentMethod $10 Status $10;
FORMAT OrderDate DATE9. Amount DOLLAR8.2;
ARRAY categories[5] $15 _TEMPORARY_ ('Electronics', 'Clothing', 'Books', 'Home Decor', 'Beauty');
ARRAY payments[4] $10 _TEMPORARY_ ('Credit Card', 'Debit Card', 'PayPal', 'UPI');
ARRAY statuses[3] $10 _TEMPORARY_ ('Shipped', 'Pending', 'Cancelled');
DO i = 1 TO 5000;
OrderID = CAT('ORD', PUT(i, Z5.));
CustomerID = CAT('CUST', PUT(CEIL(RANUNI(0) * 1000), Z4.));
ProductID = CAT('PRD', PUT(CEIL(RANUNI(0) * 500), Z3.));
Category = categories[INT(RANUNI(0) * 5) + 1];
OrderDate = '01JAN2023'D + INT(RANUNI(0) * 365);
Amount = ROUND(RANUNI(0) * 500, .01);
PaymentMethod = payments[INT(RANUNI(0) * 4) + 1];
Status = statuses[INT(RANUNI(0) * 3) + 1];
OUTPUT;
END;
DROP i;
RUN;
/* Step 2: Viewing the first few rows */
PROC PRINT DATA=ecom_sales (OBS=10);
RUN;
OUTPUT:
Obs | OrderID | CustomerID | ProductID | Category | PaymentMethod | Status | OrderDate | Amount |
---|---|---|---|---|---|---|---|---|
1 | ORD00001 | CUST0311 | PRD477 | Beauty | UPI | Cancelled | 12JAN2023 | $116.43 |
2 | ORD00002 | CUST0032 | PRD456 | Clothing | Debit Card | Cancelled | 20JUL2023 | $162.23 |
3 | ORD00003 | CUST0407 | PRD146 | Electronics | Debit Card | Cancelled | 24AUG2023 | $203.99 |
4 | ORD00004 | CUST0747 | PRD336 | Home Decor | Debit Card | Pending | 23OCT2023 | $264.12 |
5 | ORD00005 | CUST0059 | PRD411 | Books | UPI | Shipped | 19FEB2023 | $310.22 |
6 | ORD00006 | CUST0698 | PRD483 | Home Decor | Debit Card | Shipped | 09JUL2023 | $188.23 |
7 | ORD00007 | CUST0633 | PRD491 | Electronics | Debit Card | Pending | 22FEB2023 | $312.75 |
8 | ORD00008 | CUST0221 | PRD474 | Beauty | Credit Car | Shipped | 06FEB2023 | $408.04 |
9 | ORD00009 | CUST0358 | PRD352 | Electronics | Debit Card | Shipped | 24MAR2023 | $65.89 |
10 | ORD00010 | CUST0978 | PRD495 | Electronics | UPI | Pending | 05APR2023 | $178.67 |
/* Step 3: Summary Statistics */
PROC MEANS DATA=ecom_sales MEAN MIN MAX SUM;
VAR Amount;
RUN;
OUTPUT:
Analysis Variable : Amount | |||
---|---|---|---|
Mean | Minimum | Maximum | Sum |
248.7539780 | 0.2500000 | 499.6500000 | 1243769.89 |
/* Step 4: Frequency of Order Status */
PROC FREQ DATA=ecom_sales;
TABLES Status / NOCUM NOROW NOCOL;
RUN;
OUTPUT:
Status | Frequency | Percent |
---|---|---|
Cancelled | 1662 | 33.24 |
Pending | 1648 | 32.96 |
Shipped | 1690 | 33.80 |
/* Step 5: Orders Grouped by Category */
PROC SQL;
SELECT Category, COUNT(*) AS TotalOrders, SUM(Amount) AS TotalSales FORMAT=DOLLAR8.2
FROM ecom_sales
GROUP BY Category;
QUIT;
OUTPUT:
Category | TotalOrders | TotalSales |
---|---|---|
Beauty | 980 | 243273.3 |
Books | 1056 | 268206.2 |
Clothing | 1023 | 248809.3 |
Electronics | 997 | 242269.7 |
Home Decor | 944 | 241211.4 |
/* Step 6: Data Transformation - Creating New Variables */
DATA ecom_sales_transformed;
SET ecom_sales;
/* Discount based on category */
IF Category = 'Electronics' THEN Discount = Amount * 0.10;
ELSE IF Category = 'Clothing' THEN Discount = Amount * 0.15;
ELSE IF Category = 'Books' THEN Discount = Amount * 0.05;
ELSE IF Category = 'Home Decor' THEN Discount = Amount * 0.08;
ELSE Discount = Amount * 0.12;
NetAmount = Amount - Discount;
/* Creating a New Column to Categorize Order Value */
IF Amount < 50 THEN OrderType = 'Low Value';
ELSE IF Amount >= 50 AND Amount < 200 THEN OrderType = 'Medium Value';
ELSE OrderType = 'High Value';
/* Adding a new variable to track large transactions */
IF NetAmount > 300 THEN HighValueOrder = 'Yes';
ELSE HighValueOrder = 'No';
RUN;
PROC PRINT;RUN;
OUTPUT:
Obs | OrderID | CustomerID | ProductID | Category | PaymentMethod | Status | OrderDate | Amount | Discount | NetAmount | OrderType | HighValueOrder |
---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | ORD00001 | CUST0311 | PRD477 | Beauty | UPI | Cancelled | 12JAN2023 | $116.43 | 13.9716 | 102.458 | Medium Va | No |
2 | ORD00002 | CUST0032 | PRD456 | Clothing | Debit Card | Cancelled | 20JUL2023 | $162.23 | 24.3345 | 137.896 | Medium Va | No |
3 | ORD00003 | CUST0407 | PRD146 | Electronics | Debit Card | Cancelled | 24AUG2023 | $203.99 | 20.3990 | 183.591 | High Valu | No |
4 | ORD00004 | CUST0747 | PRD336 | Home Decor | Debit Card | Pending | 23OCT2023 | $264.12 | 21.1296 | 242.990 | High Valu | No |
5 | ORD00005 | CUST0059 | PRD411 | Books | UPI | Shipped | 19FEB2023 | $310.22 | 15.5110 | 294.709 | High Valu | No |
6 | ORD00006 | CUST0698 | PRD483 | Home Decor | Debit Card | Shipped | 09JUL2023 | $188.23 | 15.0584 | 173.172 | Medium Va | No |
7 | ORD00007 | CUST0633 | PRD491 | Electronics | Debit Card | Pending | 22FEB2023 | $312.75 | 31.2750 | 281.475 | High Valu | No |
8 | ORD00008 | CUST0221 | PRD474 | Beauty | Credit Car | Shipped | 06FEB2023 | $408.04 | 48.9648 | 359.075 | High Valu | Yes |
9 | ORD00009 | CUST0358 | PRD352 | Electronics | Debit Card | Shipped | 24MAR2023 | $65.89 | 6.5890 | 59.301 | Medium Va | No |
10 | ORD00010 | CUST0978 | PRD495 | Electronics | UPI | Pending | 05APR2023 | $178.67 | 17.8670 | 160.803 | Medium Va | No |
/* Step 7: Customer Spending Analysis */
PROC SQL;
CREATE TABLE customer_spending AS
SELECT CustomerID, COUNT(OrderID) AS TotalOrders,
SUM(Amount) AS TotalSpent FORMAT=DOLLAR8.2,
AVG(Amount) AS AvgSpent FORMAT=DOLLAR8.2
FROM ecom_sales_transformed
GROUP BY CustomerID
ORDER BY TotalSpent DESC;
QUIT;
OUTPUT:
Obs | CustomerID | TotalOrders | TotalSpent | AvgSpent |
---|---|---|---|---|
1 | CUST0765 | 11 | $3734.45 | $339.50 |
2 | CUST0232 | 11 | $3659.36 | $332.67 |
3 | CUST0744 | 11 | $3541.19 | $321.93 |
4 | CUST0466 | 10 | $3280.04 | $328.00 |
5 | CUST0525 | 10 | $3272.72 | $327.27 |
6 | CUST0860 | 12 | $3236.28 | $269.69 |
7 | CUST0508 | 11 | $3217.59 | $292.51 |
8 | CUST0464 | 12 | $3203.76 | $266.98 |
9 | CUST0816 | 9 | $3169.03 | $352.11 |
10 | CUST0598 | 10 | $3167.33 | $316.73 |
/* Step 8: Finding the Most Popular Products */
PROC SQL;
CREATE TABLE popular_products AS
SELECT ProductID, Category, COUNT(OrderID) AS TotalSales,
SUM(Amount) AS TotalRevenue FORMAT=DOLLAR8.2
FROM ecom_sales_transformed
GROUP BY ProductID, Category
ORDER BY TotalSales DESC;
QUIT;
OUTPUT:
Obs | ProductID | Category | TotalSales | TotalRevenue |
---|---|---|---|---|
1 | PRD385 | Beauty | 7 | $2142.37 |
2 | PRD283 | Books | 7 | $1969.30 |
3 | PRD019 | Beauty | 7 | $2098.08 |
4 | PRD075 | Electronics | 7 | $1822.56 |
5 | PRD240 | Beauty | 7 | $1769.16 |
6 | PRD325 | Clothing | 7 | $1843.34 |
7 | PRD204 | Clothing | 7 | $1325.26 |
8 | PRD260 | Books | 7 | $1516.13 |
9 | PRD194 | Home Decor | 6 | $2073.34 |
10 | PRD431 | Clothing | 6 | $1330.60 |
11 | PRD487 | Clothing | 6 | $1161.03 |
12 | PRD025 | Clothing | 6 | $1700.83 |
13 | PRD267 | Books | 6 | $1448.89 |
14 | PRD024 | Beauty | 6 | $1599.69 |
15 | PRD273 | Electronics | 6 | $2278.55 |
/* Step 9: Report Generation */
PROC REPORT DATA=ecom_sales_transformed NOWD;
COLUMN OrderID CustomerID Category Amount Discount NetAmount PaymentMethod Status OrderType HighValueOrder;
DEFINE OrderID / DISPLAY;
DEFINE CustomerID / DISPLAY;
DEFINE Category / GROUP;
DEFINE Amount / ANALYSIS SUM FORMAT=DOLLAR8.2;
DEFINE Discount / ANALYSIS SUM FORMAT=DOLLAR8.2;
DEFINE NetAmount / ANALYSIS SUM FORMAT=DOLLAR8.2;
DEFINE PaymentMethod / GROUP;
DEFINE Status / GROUP;
DEFINE OrderType / GROUP;
DEFINE HighValueOrder / GROUP;
RUN;
OUTPUT:
OrderID | CustomerID | Category | Amount | Discount | NetAmount | PaymentMethod | Status | OrderType | HighValueOrder |
---|---|---|---|---|---|---|---|---|---|
ORD00080 | CUST0780 | Beauty | $274.04 | $32.88 | $241.16 | Credit Car | Cancelled | High Valu | No |
ORD00167 | CUST0298 | $337.68 | $40.52 | $297.16 | |||||
ORD00270 | CUST0083 | $265.29 | $31.83 | $233.46 | |||||
ORD00755 | CUST0044 | $275.49 | $33.06 | $242.43 | |||||
ORD00889 | CUST0622 | $216.49 | $25.98 | $190.51 | |||||
ORD00922 | CUST0988 | $300.79 | $36.09 | $264.70 | |||||
ORD01281 | CUST0032 | $238.75 | $28.65 | $210.10 | |||||
ORD01621 | CUST0740 | $302.41 | $36.29 | $266.12 | |||||
ORD01825 | CUST0726 | $323.19 | $38.78 | $284.41 | |||||
ORD02007 | CUST0432 | $307.20 | $36.86 | $270.34 | |||||
ORD02388 | CUST0845 | $205.76 | $24.69 | $181.07 | |||||
ORD02455 | CUST0481 | $324.37 | $38.92 | $285.45 | |||||
ORD02764 | CUST0375 | $222.25 | $26.67 | $195.58 | |||||
ORD03049 | CUST0837 | $340.38 | $40.85 | $299.53 | |||||
ORD03314 | CUST0900 | $251.19 | $30.14 | $221.05 |
/* Step 10: Monthly Sales Analysis */
DATA ecom_sales_monthly;
SET ecom_sales_transformed;
FORMAT Month YEAR4. MonthName $9.;
Month = YEAR(OrderDate) * 100 + MONTH(OrderDate);
MonthName = PUT(OrderDate, MONNAME9.);
RUN;
PROC PRINT;RUN;
OUTPUT:
Obs | OrderID | CustomerID | ProductID | Category | PaymentMethod | Status | OrderDate | Amount | Discount | NetAmount | OrderType | HighValueOrder | Month | MonthName |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | ORD00001 | CUST0311 | PRD477 | Beauty | UPI | Cancelled | 12JAN2023 | $116.43 | 13.9716 | 102.458 | Medium Va | No | 2513 | January |
2 | ORD00002 | CUST0032 | PRD456 | Clothing | Debit Card | Cancelled | 20JUL2023 | $162.23 | 24.3345 | 137.896 | Medium Va | No | 2513 | July |
3 | ORD00003 | CUST0407 | PRD146 | Electronics | Debit Card | Cancelled | 24AUG2023 | $203.99 | 20.3990 | 183.591 | High Valu | No | 2513 | August |
4 | ORD00004 | CUST0747 | PRD336 | Home Decor | Debit Card | Pending | 23OCT2023 | $264.12 | 21.1296 | 242.990 | High Valu | No | 2513 | October |
5 | ORD00005 | CUST0059 | PRD411 | Books | UPI | Shipped | 19FEB2023 | $310.22 | 15.5110 | 294.709 | High Valu | No | 2513 | February |
6 | ORD00006 | CUST0698 | PRD483 | Home Decor | Debit Card | Shipped | 09JUL2023 | $188.23 | 15.0584 | 173.172 | Medium Va | No | 2513 | July |
7 | ORD00007 | CUST0633 | PRD491 | Electronics | Debit Card | Pending | 22FEB2023 | $312.75 | 31.2750 | 281.475 | High Valu | No | 2513 | February |
8 | ORD00008 | CUST0221 | PRD474 | Beauty | Credit Car | Shipped | 06FEB2023 | $408.04 | 48.9648 | 359.075 | High Valu | Yes | 2513 | February |
9 | ORD00009 | CUST0358 | PRD352 | Electronics | Debit Card | Shipped | 24MAR2023 | $65.89 | 6.5890 | 59.301 | Medium Va | No | 2513 | March |
10 | ORD00010 | CUST0978 | PRD495 | Electronics | UPI | Pending | 05APR2023 | $178.67 | 17.8670 | 160.803 | Medium Va | No | 2513 | April |
PROC SQL;
SELECT MonthName, COUNT(OrderID) AS TotalOrders, SUM(Amount) AS MonthlySales FORMAT=DOLLAR8.2
FROM ecom_sales_monthly
GROUP BY MonthName
ORDER BY MonthName;
QUIT;
OUTPUT:
MonthName | TotalOrders | MonthlySales |
---|---|---|
May | 423 | 101560.4 |
July | 434 | 108752.0 |
June | 415 | 104076.8 |
April | 414 | 102758.7 |
March | 435 | 105299.1 |
August | 423 | 102671.3 |
January | 419 | 108387.4 |
October | 444 | 107899.4 |
December | 389 | 100616.8 |
February | 383 | 94189.63 |
November | 422 | 106063.9 |
September | 399 | 101494.5 |
/* Step 11: Exporting Data */
PROC EXPORT DATA=ecom_sales_transformed
OUTFILE='/home/user/ecom_sales_data.csv'
DBMS=CSV REPLACE;
PUTNAMES=YES;
RUN;
/* Step 12: Data Visualization - Bar Chart for Order Status */
PROC SGPLOT DATA=ecom_sales_transformed;
VBAR Status / RESPONSE=Amount STAT=SUM DATALABEL;
TITLE "Total Sales by Order Status";
RUN;
/* Step 13: Data Visualization - Pie Chart for Payment Methods */
PROC GCHART DATA=ecom_sales_transformed;
PIE PaymentMethod / SUMVAR=Amount VALUE=INSIDE SLICE=OUTSIDE;
TITLE "Payment Methods Distribution";
RUN;
QUIT;
/* Step 14: Additional Analysis - Average Spending by Order Type */
PROC SQL;
SELECT OrderType, AVG(NetAmount) AS AvgSpending FORMAT=DOLLAR8.2
FROM ecom_sales_transformed
GROUP BY OrderType;
QUIT;
OrderType | AvgSpending |
---|---|
High Value | $316.04 |
Low Value | $22.95 |
Medium Value | $111.59 |
- Get link
- X
- Other Apps
Comments
Post a Comment