- Get link
- X
- Other Apps
CREATING AND ANALYZING A UNIQUE CUSTOMER TRANSACTIONS DATASET IN SAS
/*1. Creating a Unique Dataset: Customer Transactions*/
/*A dataset named customer_transactions to simulate customer purchases across
different regions and product categories. This dataset includes:*/
/*CustomerID: Unique identifier for each customer.*/
/*TransactionID: Unique identifier for each transaction.*/
/*Region: Geographic region of the customer.*/
/*ProductCategory: Category of the purchased product.*/
/*Amount: Transaction amount.*/
/*TransactionDate: Date of the transaction.*/
/*Generating Sample Data:*/
data customer_transactions;
length CustomerID $10 TransactionID $15 Region $12 ProductCategory $15;
format Amount dollar8.2 TransactionDate date9.;
do i = 1 to 1000;
CustomerID = cats('CUST', put(floor(1 + rand("uniform")*5000), z5.));
TransactionID = cats('TXN', put(i, z6.));
Region = choosec(floor(1 + rand("uniform")*4), 'North', 'South', 'East', 'West');
ProductCategory = choosec(floor(1 + rand("uniform")*5), 'Electronics', 'Clothing', 'Home Goods', 'Books', 'Toys');
Amount = round(rand("uniform")*500, 0.01);
TransactionDate = '01JAN2025'd + floor(rand("uniform")*90);
output;
end;
drop i;
run;
/*This code generates 1,000 transactions with randomized attributes,
ensuring a diverse dataset for analysis.*/
/*2. Exploring the Dataset*/
/*Viewing the First Few Observations:*/
proc print data=customer_transactions (obs=10);
run;
Output:
Obs | CustomerID | TransactionID | Region | ProductCategory | Amount | TransactionDate |
---|---|---|---|---|---|---|
1 | CUST00436 | TXN000001 | South | Electronics | $378.03 | 28MAR2025 |
2 | CUST02913 | TXN000002 | South | Books | $58.88 | 11MAR2025 |
3 | CUST02896 | TXN000003 | North | Electronics | $219.43 | 02JAN2025 |
4 | CUST03197 | TXN000004 | South | Clothing | $412.10 | 25MAR2025 |
5 | CUST03501 | TXN000005 | North | Electronics | $120.32 | 14MAR2025 |
6 | CUST03761 | TXN000006 | East | Books | $238.60 | 10JAN2025 |
7 | CUST01428 | TXN000007 | West | Clothing | $295.06 | 04JAN2025 |
8 | CUST01641 | TXN000008 | West | Clothing | $467.81 | 04MAR2025 |
9 | CUST01855 | TXN000009 | North | Clothing | $114.94 | 28MAR2025 |
10 | CUST01351 | TXN000010 | East | Electronics | $155.69 | 12JAN2025 |
/*This displays the first 10 observations, providing a snapshot of the
dataset's structure and contents.*/
/*Generating Summary Statistics:*/
proc means data=customer_transactions n mean std min max;
var Amount;
run;
Output:
Analysis Variable : Amount | ||||
---|---|---|---|---|
N | Mean | Std Dev | Minimum | Maximum |
1000 | 246.8328100 | 139.5586275 | 0.0400000 | 499.9800000 |
/*This produces summary statistics for the Amount variable, including count, mean,*/
/*standard deviation, minimum, and maximum values.*/
/*3. Data Manipulation Techniques*/
/*Sorting the Data:*/
proc sort data=customer_transactions out=sorted_transactions;
by CustomerID TransactionDate;
run;
/*Sorting by CustomerID and TransactionDate organizes the data chronologically
for each customer, facilitating time-series analyses.*/
/*Removing Duplicate Transactions:*/
proc sort data=customer_transactions nodupkey out=unique_transactions;
by TransactionID;
run;proc print;run;
Output:
Obs | CustomerID | TransactionID | Region | ProductCategory | Amount | TransactionDate |
---|---|---|---|---|---|---|
1 | CUST00436 | TXN000001 | South | Electronics | $378.03 | 28MAR2025 |
2 | CUST02913 | TXN000002 | South | Books | $58.88 | 11MAR2025 |
3 | CUST02896 | TXN000003 | North | Electronics | $219.43 | 02JAN2025 |
4 | CUST03197 | TXN000004 | South | Clothing | $412.10 | 25MAR2025 |
5 | CUST03501 | TXN000005 | North | Electronics | $120.32 | 14MAR2025 |
6 | CUST03761 | TXN000006 | East | Books | $238.60 | 10JAN2025 |
7 | CUST01428 | TXN000007 | West | Clothing | $295.06 | 04JAN2025 |
8 | CUST01641 | TXN000008 | West | Clothing | $467.81 | 04MAR2025 |
9 | CUST01855 | TXN000009 | North | Clothing | $114.94 | 28MAR2025 |
10 | CUST01351 | TXN000010 | East | Electronics | $155.69 | 12JAN2025 |
11 | CUST04281 | TXN000011 | South | Books | $161.40 | 08JAN2025 |
12 | CUST04886 | TXN000012 | East | Books | $309.23 | 07MAR2025 |
13 | CUST04672 | TXN000013 | East | Home Goods | $186.94 | 23MAR2025 |
14 | CUST04592 | TXN000014 | North | Books | $250.48 | 07MAR2025 |
15 | CUST00748 | TXN000015 | North | Clothing | $295.68 | 06FEB2025 |
/*Using the NODUPKEY option ensures each TransactionID is unique, removing
any duplicate transactions from the dataset.*/
/*Filtering Data for a Specific Region:*/
data north_region_transactions;
set customer_transactions;
where Region = 'North';
run;proc print;run;
Output:
Obs | CustomerID | TransactionID | Region | ProductCategory | Amount | TransactionDate |
---|---|---|---|---|---|---|
1 | CUST02896 | TXN000003 | North | Electronics | $219.43 | 02JAN2025 |
2 | CUST03501 | TXN000005 | North | Electronics | $120.32 | 14MAR2025 |
3 | CUST01855 | TXN000009 | North | Clothing | $114.94 | 28MAR2025 |
4 | CUST04592 | TXN000014 | North | Books | $250.48 | 07MAR2025 |
5 | CUST00748 | TXN000015 | North | Clothing | $295.68 | 06FEB2025 |
6 | CUST03292 | TXN000019 | North | Toys | $354.89 | 21MAR2025 |
7 | CUST01521 | TXN000023 | North | Books | $419.42 | 17MAR2025 |
8 | CUST03299 | TXN000027 | North | Books | $302.63 | 23JAN2025 |
9 | CUST03038 | TXN000031 | North | Toys | $125.37 | 28FEB2025 |
10 | CUST02447 | TXN000042 | North | Home Goods | $234.24 | 05MAR2025 |
/*This creates a subset containing only transactions from the 'North' region,
enabling region-specific analyses.*/
/*4. Advanced Data Analysis*/
/*Calculating Total Sales per Region:*/
proc sql;
create table region_sales as
select Region, sum(Amount) as TotalSales format=dollar12.2
from customer_transactions
group by Region;
quit;proc print;run;
Output:
Obs | Region | TotalSales |
---|---|---|
1 | East | $62,837.21 |
2 | North | $69,481.70 |
3 | South | $56,695.72 |
4 | West | $57,818.18 |
/*This SQL procedure calculates total sales for each region, providing insights
into regional performance.*/
/*Identifying Top 5 Customers by Total Spend:*/
proc sql outobs=5;
create table top_customers as
select CustomerID, sum(Amount) as TotalSpent format=dollar12.2
from customer_transactions
group by CustomerID
order by TotalSpent desc;
quit;proc print;run;
Output:
Obs | CustomerID | TotalSpent |
---|---|---|
1 | CUST03037 | $1,218.99 |
2 | CUST04295 | $930.38 |
3 | CUST01332 | $852.85 |
4 | CUST03047 | $850.92 |
5 | CUST00737 | $846.98 |
/*This query identifies the top 5 customers based on total spending,
which is valuable for targeted marketing strategies.*/
/*Creating a Time Series of Daily Sales:*/
proc sort data=customer_transactions out=customer_transactions1;
by transactionDate;
run;
proc timeseries data=customer_transactions1 out=DailySales;
id TransactionDate interval=day accumulate=total;
var Amount;
sum;
run;proc print;run;
Output:
FIRST FIFTEEN OBSERVATIONS
Obs | TransactionDate | Amount |
---|---|---|
1 | 01JAN2025 | $947.22 |
2 | 02JAN2025 | $2441.11 |
3 | 03JAN2025 | $1431.06 |
4 | 04JAN2025 | $5199.61 |
5 | 05JAN2025 | $1899.26 |
6 | 06JAN2025 | $2851.73 |
7 | 07JAN2025 | $1595.13 |
8 | 08JAN2025 | $1629.22 |
9 | 09JAN2025 | $2491.96 |
10 | 10JAN2025 | $3143.57 |
11 | 11JAN2025 | $5028.69 |
12 | 12JAN2025 | $1960.46 |
13 | 13JAN2025 | $2654.61 |
14 | 14JAN2025 | $1966.30 |
15 | 15JAN2025 | $1517.88 |
80 | 21MAR2025 | $3025.42 |
---|---|---|
81 | 22MAR2025 | $3392.94 |
82 | 23MAR2025 | $3675.25 |
83 | 24MAR2025 | $5110.02 |
84 | 25MAR2025 | $2589.59 |
85 | 26MAR2025 | $2469.86 |
86 | 27MAR2025 | $4345.16 |
87 | 28MAR2025 | $4084.38 |
88 | 29MAR2025 | $3514.46 |
89 | 30MAR2025 | $2320.16 |
90 | 31MAR2025 | $3480.49 |
/*This procedure aggregates daily sales, preparing the data for time-series analysis
and forecasting.*/
/*5. Data Visualization*/
/*Bar Chart of Sales by Region:*/
proc sgplot data=region_sales;
vbar Region / response=TotalSales datalabel;
title "Total Sales by Region";
run;
/*This bar chart visually represents total sales across different regions,
highlighting regional performance disparities.*/
/*Line Plot of Daily Sales Over Time:*/
proc sgplot data=DailySales;
series x=TransactionDate y=Amount / markers;
title "Daily Sales Over Time";
run;
/*This line plot illustrates sales trends over time, aiding in the identification
of patterns and seasonal effects.*/
/*6. Data Integrity and Maintenance*/
/*Renaming Variables:*/
proc datasets library=work;
modify customer_transactions;
rename Amount=TransactionAmount;
run;
quit;
Log:
NOTE: Renaming variable Amount to TransactionAmount.
/*Renaming variables enhances clarity and consistency within the dataset.*/
/*Deleting Unnecessary Datasets:*/
proc datasets library=work;
delete sorted_transactions north_region_transactions;
run;
quit;
Output:
Directory | |
---|---|
Libref | WORK |
Engine | V9 |
Physical Name | C:\Users\Lenovo\AppData\Local\Temp\SAS Temporary Files\_TD12132_DESKTOP-QFAA4KV_ |
Filename | C:\Users\Lenovo\AppData\Local\Temp\SAS Temporary Files\_TD12132_DESKTOP-QFAA4KV_ |
# | Name | Member Type | File Size | Last Modified |
---|---|---|---|---|
1 | CUSTOMER_TRANSACTIONS | DATA | 262144 | 29/03/2025 08:52:23 |
2 | CUSTOMER_TRANSACTIONS1 | DATA | 196608 | 29/03/2025 08:48:32 |
3 | DAILYSALES | DATA | 131072 | 29/03/2025 08:48:42 |
4 | REGION_SALES | DATA | 131072 | 29/03/2025 08:42:05 |
5 | TOP_CUSTOMERS | DATA | 131072 | 29/03/2025 08:42:41 |
6 | UNIQUE_TRANSACTIONS | DATA | 196608 | 29/03/2025 08:39:46 |
/*Removing intermediate datasets helps maintain a clean workspace and
optimizes storage usage.*/
/*7. Splitting the Dataset by Product Category*/
/*To create separate datasets for each product category:*/
proc sort data=customer_transactions out=sorted_transactions;
by ProductCategory;
run;
data Electronics Clothing HomeGoods Books Toys;
set sorted_transactions;
select (ProductCategory);
when ('Electronics') output Electronics;
when ('Clothing') output Clothing;
when ('Home Goods') output HomeGoods;
when ('Books') output Books;
when ('Toys') output Toys;
otherwise;
end;
run;
Toys Output: Only 10 Observations
Obs | CustomerID | TransactionID | Region | ProductCategory | TransactionAmount | TransactionDate |
---|---|---|---|---|---|---|
1 | CUST03292 | TXN000019 | North | Toys | $354.89 | 21MAR2025 |
2 | CUST03279 | TXN000021 | South | Toys | $466.93 | 16FEB2025 |
3 | CUST04858 | TXN000026 | East | Toys | $74.55 | 12FEB2025 |
4 | CUST03038 | TXN000031 | North | Toys | $125.37 | 28FEB2025 |
5 | CUST00122 | TXN000037 | South | Toys | $136.80 | 17FEB2025 |
6 | CUST00337 | TXN000043 | East | Toys | $477.91 | 09MAR2025 |
7 | CUST00002 | TXN000045 | East | Toys | $230.46 | 17MAR2025 |
8 | CUST04097 | TXN000061 | South | Toys | $55.49 | 24FEB2025 |
9 | CUST02469 | TXN000064 | West | Toys | $19.18 | 20MAR2025 |
10 | CUST03763 | TXN000067 | East | Toys | $151.58 | 03JAN2025 |
/*This code sorts the data by ProductCategory and then creates separate datasets
for each category.*/
/*8. Identifying and Removing Duplicate Customers*/
/*To find and remove duplicate customer records:*/
proc sort data=customer_transactions nodupkey out=unique_customers;
by CustomerID;
run;
Output:
Obs | CustomerID | TransactionID | Region | ProductCategory | TransactionAmount | TransactionDate |
---|---|---|---|---|---|---|
1 | CUST00002 | TXN000045 | East | Toys | $230.46 | 17MAR2025 |
2 | CUST00005 | TXN000867 | West | Books | $378.73 | 04JAN2025 |
3 | CUST00015 | TXN000360 | South | Books | $354.02 | 15FEB2025 |
4 | CUST00031 | TXN000049 | West | Clothing | $253.29 | 07JAN2025 |
5 | CUST00043 | TXN000173 | South | Home Goods | $339.91 | 27MAR2025 |
6 | CUST00044 | TXN000831 | East | Home Goods | $304.19 | 13MAR2025 |
7 | CUST00045 | TXN000269 | West | Home Goods | $162.32 | 09MAR2025 |
8 | CUST00053 | TXN000395 | West | Clothing | $58.34 | 30JAN2025 |
9 | CUST00056 | TXN000422 | South | Clothing | $462.48 | 28JAN2025 |
10 | CUST00063 | TXN000508 | South | Electronics | $469.49 | 27MAR2025 |
/*This ensures that each CustomerID appears only once in the unique_customers dataset.*/
/*9. Appending New Transactions to the Existing Dataset*/
/*If new transaction data becomes available, it can be appended as follows:*/
proc append base=customer_transactions data=new_transactions;
run;
/*This adds the observations from new_transactions to the existing customer_transactions dataset.*/
/*10. Exporting the Dataset to a CSV File*/
/*To export the dataset for use in other applications:*/
proc export data=customer_transactions
outfile='/path/to/export/customer_transactions.csv'
dbms=csv
replace;
run;
/*This exports the customer_transactions dataset to a CSV file at the specified location.*/
/*By following these steps, you can effectively create, manipulate, analyze,
and visualize a unique dataset in SAS, leveraging various procedures to extract
meaningful insights and maintain data integrity.*/
- Get link
- X
- Other Apps
Comments
Post a Comment