BANK FRAUD CASE ANALYSIS USING DATA STEP | PROC SQL | PROC FREQ | PROC MEANS | PROC FORMAT | PROC SGPLOT WITH MACROS AND DATE FUNCTIONS
options nocenter;
STEP 1: CREATE RAW BANK FRAUD DATASET
data bank_fraud_raw;
length Fraud_Type $30 Bank_Name $30 Region $20 Mode $15 Detection_Date_char $9;
input Fraud_Type $ Bank_Name $ Region $ Mode $ Amount_Loss Risk_Score Detection_Date_char $;
datalines;
PHISHING GlobalSecure_Bank North_India ONLINE 350000 82 15JAN2024
CARD_SKIMMING Metro_Bank South_India ATM 125000 68 02FEB2024
LOAN_FRAUD Bharat_National_Bank West_India BRANCH 950000 91 20DEC2023
INTERNAL_FRAUD CityUnion_Bank Central_India BRANCH 1200000 95 05NOV2023
UPI_FRAUD DigitalFirst_Bank Metro MOBILE_APP 60000 45 28MAR2024
KYC_FRAUD SafeTrust_Bank East_India ONLINE 210000 55 11APR2024
INTERNET_BANKING Horizon_Bank North_India ONLINE 480000 73 19JAN2024
FAKE_CHEQUE Peoples_Bank South_India CHEQUE 320000 64 07MAY2024
CREDIT_CARD PrimeSecure_Bank Metro ONLINE 780000 88 23FEB2024
DEBIT_CARD Rural_Cooperative_Bank Rural ATM 90000 38 14MAR2024
SIM_SWAP UrbanTrust_Bank West_India MOBILE_APP 260000 72 30APR2024
ACCOUNT_TAKEOVER Capital_Union_Bank North_India ONLINE 1450000 94 09JAN2024
ATM_JACKPOT Oceanic_Bank South_India ATM 510000 77 18FEB2024
E_WALLET Sunrise_Bank Metro MOBILE_APP 130000 52 21MAR2024
TRADE_FINANCE Liberty_Bank Central_India BRANCH 2100000 97 27OCT2023
;
run;
proc print data=bank_fraud_raw;
run;
OUTPUT:
| Obs | Fraud_Type | Bank_Name | Region | Mode | Detection_Date_char | Amount_Loss | Risk_Score |
|---|---|---|---|---|---|---|---|
| 1 | PHISHING | GlobalSecure_Bank | North_India | ONLINE | 15JAN2024 | 350000 | 82 |
| 2 | CARD_SKIMMING | Metro_Bank | South_India | ATM | 02FEB2024 | 125000 | 68 |
| 3 | LOAN_FRAUD | Bharat_National_Bank | West_India | BRANCH | 20DEC2023 | 950000 | 91 |
| 4 | INTERNAL_FRAUD | CityUnion_Bank | Central_India | BRANCH | 05NOV2023 | 1200000 | 95 |
| 5 | UPI_FRAUD | DigitalFirst_Bank | Metro | MOBILE_APP | 28MAR2024 | 60000 | 45 |
| 6 | KYC_FRAUD | SafeTrust_Bank | East_India | ONLINE | 11APR2024 | 210000 | 55 |
| 7 | INTERNET_BANKING | Horizon_Bank | North_India | ONLINE | 19JAN2024 | 480000 | 73 |
| 8 | FAKE_CHEQUE | Peoples_Bank | South_India | CHEQUE | 07MAY2024 | 320000 | 64 |
| 9 | CREDIT_CARD | PrimeSecure_Bank | Metro | ONLINE | 23FEB2024 | 780000 | 88 |
| 10 | DEBIT_CARD | Rural_Cooperative_Bank | Rural | ATM | 14MAR2024 | 90000 | 38 |
| 11 | SIM_SWAP | UrbanTrust_Bank | West_India | MOBILE_APP | 30APR2024 | 260000 | 72 |
| 12 | ACCOUNT_TAKEOVER | Capital_Union_Bank | North_India | ONLINE | 09JAN2024 | 1450000 | 94 |
| 13 | ATM_JACKPOT | Oceanic_Bank | South_India | ATM | 18FEB2024 | 510000 | 77 |
| 14 | E_WALLET | Sunrise_Bank | Metro | MOBILE_APP | 21MAR2024 | 130000 | 52 |
| 15 | TRADE_FINANCE | Liberty_Bank | Central_India | BRANCH | 27OCT2023 | 2100000 | 97 |
STEP 2: CONVERT CHARACTER DATE TO SAS DATE AND USE INTNX / INTCK
data bank_fraud;
set bank_fraud_raw;
/* Convert character date (DATE9.) to SAS date */
Detection_Date = input(Detection_Date_char, date9.);
format Detection_Date date9.;
/* Suppose the case is formally reported to regulator 5 days after detection */
Report_Date = intnx('day', Detection_Date, 5, 'same');
format Report_Date date9.;
/* Days between detection and reporting */
Days_To_Report = intck('day', Detection_Date, Report_Date);
/* How many months between detection and end of 2024? */
Months_To_2024_End = intck('month', Detection_Date, '31DEC2024'd);
/* Also derive year and month of detection for time-based analysis */
Year_Detected = year(Detection_Date);
Month_Detected = month(Detection_Date);
run;
proc print data=bank_fraud;
run;
OUTPUT:
| Obs | Fraud_Type | Bank_Name | Region | Mode | Detection_Date_char | Amount_Loss | Risk_Score | Detection_Date | Report_Date | Days_To_Report | Months_To_2024_End | Year_Detected | Month_Detected |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | PHISHING | GlobalSecure_Bank | North_India | ONLINE | 15JAN2024 | 350000 | 82 | 15JAN2024 | 20JAN2024 | 5 | 11 | 2024 | 1 |
| 2 | CARD_SKIMMING | Metro_Bank | South_India | ATM | 02FEB2024 | 125000 | 68 | 02FEB2024 | 07FEB2024 | 5 | 10 | 2024 | 2 |
| 3 | LOAN_FRAUD | Bharat_National_Bank | West_India | BRANCH | 20DEC2023 | 950000 | 91 | 20DEC2023 | 25DEC2023 | 5 | 12 | 2023 | 12 |
| 4 | INTERNAL_FRAUD | CityUnion_Bank | Central_India | BRANCH | 05NOV2023 | 1200000 | 95 | 05NOV2023 | 10NOV2023 | 5 | 13 | 2023 | 11 |
| 5 | UPI_FRAUD | DigitalFirst_Bank | Metro | MOBILE_APP | 28MAR2024 | 60000 | 45 | 28MAR2024 | 02APR2024 | 5 | 9 | 2024 | 3 |
| 6 | KYC_FRAUD | SafeTrust_Bank | East_India | ONLINE | 11APR2024 | 210000 | 55 | 11APR2024 | 16APR2024 | 5 | 8 | 2024 | 4 |
| 7 | INTERNET_BANKING | Horizon_Bank | North_India | ONLINE | 19JAN2024 | 480000 | 73 | 19JAN2024 | 24JAN2024 | 5 | 11 | 2024 | 1 |
| 8 | FAKE_CHEQUE | Peoples_Bank | South_India | CHEQUE | 07MAY2024 | 320000 | 64 | 07MAY2024 | 12MAY2024 | 5 | 7 | 2024 | 5 |
| 9 | CREDIT_CARD | PrimeSecure_Bank | Metro | ONLINE | 23FEB2024 | 780000 | 88 | 23FEB2024 | 28FEB2024 | 5 | 10 | 2024 | 2 |
| 10 | DEBIT_CARD | Rural_Cooperative_Bank | Rural | ATM | 14MAR2024 | 90000 | 38 | 14MAR2024 | 19MAR2024 | 5 | 9 | 2024 | 3 |
| 11 | SIM_SWAP | UrbanTrust_Bank | West_India | MOBILE_APP | 30APR2024 | 260000 | 72 | 30APR2024 | 05MAY2024 | 5 | 8 | 2024 | 4 |
| 12 | ACCOUNT_TAKEOVER | Capital_Union_Bank | North_India | ONLINE | 09JAN2024 | 1450000 | 94 | 09JAN2024 | 14JAN2024 | 5 | 11 | 2024 | 1 |
| 13 | ATM_JACKPOT | Oceanic_Bank | South_India | ATM | 18FEB2024 | 510000 | 77 | 18FEB2024 | 23FEB2024 | 5 | 10 | 2024 | 2 |
| 14 | E_WALLET | Sunrise_Bank | Metro | MOBILE_APP | 21MAR2024 | 130000 | 52 | 21MAR2024 | 26MAR2024 | 5 | 9 | 2024 | 3 |
| 15 | TRADE_FINANCE | Liberty_Bank | Central_India | BRANCH | 27OCT2023 | 2100000 | 97 | 27OCT2023 | 01NOV2023 | 5 | 14 | 2023 | 10 |
%macro make_risk_format(low=30, medium=60, high=80);
/* Precompute boundary values using macro logic */
%let low_plus1 = %eval(&low. + 1);
%let med_plus1 = %eval(&medium. + 1);
%let high_plus1 = %eval(&high. + 1);
proc format;
/* Numeric format for risk score */
value riskfmt
0 - &low. = 'LOW'
&low_plus1 - &medium. = 'MEDIUM'
&med_plus1 - &high. = 'HIGH'
&high_plus1 - 100 = 'CRITICAL';
/* Region format to display nicer labels */
value $regionfmt
'North_India' = 'North India'
'South_India' = 'South India'
'East_India' = 'East India'
'West_India' = 'West India'
'Central_India' = 'Central India'
'Metro' = 'Metro Region'
'Rural' = 'Rural Region';
/* Mode format for readability */
value $modefmt
'ONLINE' = 'Online'
'ATM' = 'ATM'
'BRANCH' = 'Branch'
'MOBILE_APP' = 'Mobile App'
'CHEQUE' = 'Cheque';
run;
%mend make_risk_format;
%make_risk_format(low=30, medium=60, high=80);
LOG:
STEP 4: APPLY FORMATS AND CREATE A SEVERITY VARIABLE
data bank_fraud_final;
set bank_fraud;
/* Apply_formats */
format Risk_Score riskfmt.
Region $regionfmt.
Mode $modefmt.;
/* Convert numeric risk score into a text label using the format */
length Risk_Severity $10;
Risk_Severity = put(Risk_Score, riskfmt.);
run;
proc print data=bank_fraud_final(obs=10);
title "Sample of BANK_FRAUD_FINAL Dataset";
run;
OUTPUT:
| Obs | Fraud_Type | Bank_Name | Region | Mode | Detection_Date_char | Amount_Loss | Risk_Score | Detection_Date | Report_Date | Days_To_Report | Months_To_2024_End | Year_Detected | Month_Detected | Risk_Severity |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | PHISHING | GlobalSecure_Bank | North India | Online | 15JAN2024 | 350000 | CRITICAL | 15JAN2024 | 20JAN2024 | 5 | 11 | 2024 | 1 | CRITICAL |
| 2 | CARD_SKIMMING | Metro_Bank | South India | ATM | 02FEB2024 | 125000 | HIGH | 02FEB2024 | 07FEB2024 | 5 | 10 | 2024 | 2 | HIGH |
| 3 | LOAN_FRAUD | Bharat_National_Bank | West India | Branch | 20DEC2023 | 950000 | CRITICAL | 20DEC2023 | 25DEC2023 | 5 | 12 | 2023 | 12 | CRITICAL |
| 4 | INTERNAL_FRAUD | CityUnion_Bank | Central India | Branch | 05NOV2023 | 1200000 | CRITICAL | 05NOV2023 | 10NOV2023 | 5 | 13 | 2023 | 11 | CRITICAL |
| 5 | UPI_FRAUD | DigitalFirst_Bank | Metro Region | Mobile App | 28MAR2024 | 60000 | MEDIUM | 28MAR2024 | 02APR2024 | 5 | 9 | 2024 | 3 | MEDIUM |
| 6 | KYC_FRAUD | SafeTrust_Bank | East India | Online | 11APR2024 | 210000 | MEDIUM | 11APR2024 | 16APR2024 | 5 | 8 | 2024 | 4 | MEDIUM |
| 7 | INTERNET_BANKING | Horizon_Bank | North India | Online | 19JAN2024 | 480000 | HIGH | 19JAN2024 | 24JAN2024 | 5 | 11 | 2024 | 1 | HIGH |
| 8 | FAKE_CHEQUE | Peoples_Bank | South India | Cheque | 07MAY2024 | 320000 | HIGH | 07MAY2024 | 12MAY2024 | 5 | 7 | 2024 | 5 | HIGH |
| 9 | CREDIT_CARD | PrimeSecure_Bank | Metro Region | Online | 23FEB2024 | 780000 | CRITICAL | 23FEB2024 | 28FEB2024 | 5 | 10 | 2024 | 2 | CRITICAL |
| 10 | DEBIT_CARD | Rural_Cooperative_Bank | Rural Region | ATM | 14MAR2024 | 90000 | MEDIUM | 14MAR2024 | 19MAR2024 | 5 | 9 | 2024 | 3 | MEDIUM |
STEP 5: PROC SQL SUMMARY BY FRAUD TYPE
proc sql;
create table fraud_summary_sql as
select Fraud_Type,
count(*) as Num_Cases,
sum(Amount_Loss) as Total_Loss format=comma14.,
mean(Amount_Loss) as Avg_Loss format=comma14.,
min(Detection_Date) as First_Detection format=date9.,
max(Detection_Date) as Last_Detection format=date9.,
mean(Risk_Score) as Avg_Risk_Score format=8.2
from bank_fraud_final
group by Fraud_Type
order by Total_Loss desc;
quit;
proc print data=fraud_summary_sql;
title "Fraud Summary by Type Using PROC SQL";
run;
OUTPUT:
| Obs | Fraud_Type | Num_Cases | Total_Loss | Avg_Loss | First_Detection | Last_Detection | Avg_Risk_Score |
|---|---|---|---|---|---|---|---|
| 1 | TRADE_FINANCE | 1 | 2,100,000 | 2,100,000 | 27OCT2023 | 27OCT2023 | 97.00 |
| 2 | ACCOUNT_TAKEOVER | 1 | 1,450,000 | 1,450,000 | 09JAN2024 | 09JAN2024 | 94.00 |
| 3 | INTERNAL_FRAUD | 1 | 1,200,000 | 1,200,000 | 05NOV2023 | 05NOV2023 | 95.00 |
| 4 | LOAN_FRAUD | 1 | 950,000 | 950,000 | 20DEC2023 | 20DEC2023 | 91.00 |
| 5 | CREDIT_CARD | 1 | 780,000 | 780,000 | 23FEB2024 | 23FEB2024 | 88.00 |
| 6 | ATM_JACKPOT | 1 | 510,000 | 510,000 | 18FEB2024 | 18FEB2024 | 77.00 |
| 7 | INTERNET_BANKING | 1 | 480,000 | 480,000 | 19JAN2024 | 19JAN2024 | 73.00 |
| 8 | PHISHING | 1 | 350,000 | 350,000 | 15JAN2024 | 15JAN2024 | 82.00 |
| 9 | FAKE_CHEQUE | 1 | 320,000 | 320,000 | 07MAY2024 | 07MAY2024 | 64.00 |
| 10 | SIM_SWAP | 1 | 260,000 | 260,000 | 30APR2024 | 30APR2024 | 72.00 |
| 11 | KYC_FRAUD | 1 | 210,000 | 210,000 | 11APR2024 | 11APR2024 | 55.00 |
| 12 | E_WALLET | 1 | 130,000 | 130,000 | 21MAR2024 | 21MAR2024 | 52.00 |
| 13 | CARD_SKIMMING | 1 | 125,000 | 125,000 | 02FEB2024 | 02FEB2024 | 68.00 |
| 14 | DEBIT_CARD | 1 | 90,000 | 90,000 | 14MAR2024 | 14MAR2024 | 38.00 |
| 15 | UPI_FRAUD | 1 | 60,000 | 60,000 | 28MAR2024 | 28MAR2024 | 45.00 |
proc sql;
create table fraud_by_region as
select Region,
count(*) as Num_Cases,
sum(Amount_Loss) as Total_Loss format=comma14.
from bank_fraud_final
group by Region;
quit;
proc print data=fraud_by_region;
run;
OUTPUT:
| Obs | Region | Num_Cases | Total_Loss |
|---|---|---|---|
| 1 | Central India | 2 | 3,300,000 |
| 2 | East India | 1 | 210,000 |
| 3 | Metro Region | 3 | 970,000 |
| 4 | North India | 3 | 2,280,000 |
| 5 | Rural Region | 1 | 90,000 |
| 6 | South India | 3 | 955,000 |
| 7 | West India | 2 | 1,210,000 |
STEP 6: PROC MEANS FOR AMOUNT LOSS AND RISK SCORE
proc means data=bank_fraud_final mean median min max sum maxdec=2;
class Fraud_Type;
var Amount_Loss Risk_Score;
title "PROC MEANS: Statistics of Amount_Loss and Risk_Score by Fraud_Type";
run;
OUTPUT:
The MEANS Procedure
| Fraud_Type | N Obs | Variable | Mean | Median | Minimum | Maximum | Sum |
|---|---|---|---|---|---|---|---|
| ACCOUNT_TAKEOVER | 1 | Amount_Loss Risk_Score | 1450000.00 94.00 | 1450000.00 94.00 | 1450000.00 94.00 | 1450000.00 94.00 | 1450000.00 94.00 |
| ATM_JACKPOT | 1 | Amount_Loss Risk_Score | 510000.00 77.00 | 510000.00 77.00 | 510000.00 77.00 | 510000.00 77.00 | 510000.00 77.00 |
| CARD_SKIMMING | 1 | Amount_Loss Risk_Score | 125000.00 68.00 | 125000.00 68.00 | 125000.00 68.00 | 125000.00 68.00 | 125000.00 68.00 |
| CREDIT_CARD | 1 | Amount_Loss Risk_Score | 780000.00 88.00 | 780000.00 88.00 | 780000.00 88.00 | 780000.00 88.00 | 780000.00 88.00 |
| DEBIT_CARD | 1 | Amount_Loss Risk_Score | 90000.00 38.00 | 90000.00 38.00 | 90000.00 38.00 | 90000.00 38.00 | 90000.00 38.00 |
| E_WALLET | 1 | Amount_Loss Risk_Score | 130000.00 52.00 | 130000.00 52.00 | 130000.00 52.00 | 130000.00 52.00 | 130000.00 52.00 |
| FAKE_CHEQUE | 1 | Amount_Loss Risk_Score | 320000.00 64.00 | 320000.00 64.00 | 320000.00 64.00 | 320000.00 64.00 | 320000.00 64.00 |
| INTERNAL_FRAUD | 1 | Amount_Loss Risk_Score | 1200000.00 95.00 | 1200000.00 95.00 | 1200000.00 95.00 | 1200000.00 95.00 | 1200000.00 95.00 |
| INTERNET_BANKING | 1 | Amount_Loss Risk_Score | 480000.00 73.00 | 480000.00 73.00 | 480000.00 73.00 | 480000.00 73.00 | 480000.00 73.00 |
| KYC_FRAUD | 1 | Amount_Loss Risk_Score | 210000.00 55.00 | 210000.00 55.00 | 210000.00 55.00 | 210000.00 55.00 | 210000.00 55.00 |
| LOAN_FRAUD | 1 | Amount_Loss Risk_Score | 950000.00 91.00 | 950000.00 91.00 | 950000.00 91.00 | 950000.00 91.00 | 950000.00 91.00 |
| PHISHING | 1 | Amount_Loss Risk_Score | 350000.00 82.00 | 350000.00 82.00 | 350000.00 82.00 | 350000.00 82.00 | 350000.00 82.00 |
| SIM_SWAP | 1 | Amount_Loss Risk_Score | 260000.00 72.00 | 260000.00 72.00 | 260000.00 72.00 | 260000.00 72.00 | 260000.00 72.00 |
| TRADE_FINANCE | 1 | Amount_Loss Risk_Score | 2100000.00 97.00 | 2100000.00 97.00 | 2100000.00 97.00 | 2100000.00 97.00 | 2100000.00 97.00 |
| UPI_FRAUD | 1 | Amount_Loss Risk_Score | 60000.00 45.00 | 60000.00 45.00 | 60000.00 45.00 | 60000.00 45.00 | 60000.00 45.00 |
STEP 7: PROC FREQ FOR FRAUD PATTERN ANALYSIS
proc freq data=bank_fraud_final;
tables Fraud_Type*Region / nocol norow nopercent;
tables Risk_Severity*Mode / nocum;
title "PROC FREQ: Cross-Tabulations for Bank Fraud Cases";
run;
OUTPUT:
The FREQ Procedure
|
| |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
| |||||||||||||||||||||||||||||||||||||||||||||||||
STEP 8: PROC SGPLOT - TOTAL LOSS BY FRAUD TYPE
proc sgplot data=bank_fraud_final;
vbar Fraud_Type / response=Amount_Loss stat=sum datalabel;
yaxis label="Total Amount Loss (INR)";
xaxis label="Fraud Type";
title "Total Amount Loss by Fraud Type";
run;
OUTPUT:
/*CASE COUNT BY DETECTION DATE*/
proc sgplot data=bank_fraud_final;
title "Number of Fraud Cases Over Time";
vbar Detection_Date;
xaxis label="Detection Date";
yaxis label="Number of Cases";
run;
No comments:
Post a Comment