Friday, 5 December 2025

330.BANK FRAUD CASE ANALYSIS USING PROC SQL | PROC FREQ | PROC MEANS | PROC FORMAT | PROC SGPLOT WITH MACROS AND DATE FUNCTIONS

BANK FRAUD CASE ANALYSIS USING DATA STEP | PROC SQL | PROC FREQ | PROC MEANS | PROC FORMAT | PROC SGPLOT WITH MACROS AND DATE FUNCTIONS

options nocenter;

STEP 1: CREATE RAW BANK FRAUD DATASET 

data bank_fraud_raw;

    length Fraud_Type $30 Bank_Name $30 Region $20 Mode $15 Detection_Date_char $9;

    input Fraud_Type $ Bank_Name $ Region $ Mode $ Amount_Loss Risk_Score Detection_Date_char $;

    datalines;

PHISHING            GlobalSecure_Bank      North_India     ONLINE       350000 82 15JAN2024

CARD_SKIMMING       Metro_Bank             South_India     ATM          125000 68 02FEB2024

LOAN_FRAUD          Bharat_National_Bank   West_India      BRANCH       950000 91 20DEC2023

INTERNAL_FRAUD      CityUnion_Bank         Central_India   BRANCH       1200000 95 05NOV2023

UPI_FRAUD           DigitalFirst_Bank      Metro           MOBILE_APP   60000  45 28MAR2024

KYC_FRAUD           SafeTrust_Bank         East_India      ONLINE       210000 55 11APR2024

INTERNET_BANKING    Horizon_Bank           North_India     ONLINE       480000 73 19JAN2024

FAKE_CHEQUE         Peoples_Bank           South_India     CHEQUE       320000 64 07MAY2024

CREDIT_CARD         PrimeSecure_Bank       Metro           ONLINE       780000 88 23FEB2024

DEBIT_CARD          Rural_Cooperative_Bank Rural           ATM          90000  38 14MAR2024

SIM_SWAP            UrbanTrust_Bank        West_India      MOBILE_APP   260000 72 30APR2024

ACCOUNT_TAKEOVER    Capital_Union_Bank     North_India     ONLINE       1450000 94 09JAN2024

ATM_JACKPOT         Oceanic_Bank           South_India     ATM          510000 77 18FEB2024

E_WALLET            Sunrise_Bank           Metro           MOBILE_APP   130000 52 21MAR2024

TRADE_FINANCE       Liberty_Bank           Central_India   BRANCH       2100000 97 27OCT2023

;

run;

proc print data=bank_fraud_raw;

run;

OUTPUT:

ObsFraud_TypeBank_NameRegionModeDetection_Date_charAmount_LossRisk_Score
1PHISHINGGlobalSecure_BankNorth_IndiaONLINE15JAN202435000082
2CARD_SKIMMINGMetro_BankSouth_IndiaATM02FEB202412500068
3LOAN_FRAUDBharat_National_BankWest_IndiaBRANCH20DEC202395000091
4INTERNAL_FRAUDCityUnion_BankCentral_IndiaBRANCH05NOV2023120000095
5UPI_FRAUDDigitalFirst_BankMetroMOBILE_APP28MAR20246000045
6KYC_FRAUDSafeTrust_BankEast_IndiaONLINE11APR202421000055
7INTERNET_BANKINGHorizon_BankNorth_IndiaONLINE19JAN202448000073
8FAKE_CHEQUEPeoples_BankSouth_IndiaCHEQUE07MAY202432000064
9CREDIT_CARDPrimeSecure_BankMetroONLINE23FEB202478000088
10DEBIT_CARDRural_Cooperative_BankRuralATM14MAR20249000038
11SIM_SWAPUrbanTrust_BankWest_IndiaMOBILE_APP30APR202426000072
12ACCOUNT_TAKEOVERCapital_Union_BankNorth_IndiaONLINE09JAN2024145000094
13ATM_JACKPOTOceanic_BankSouth_IndiaATM18FEB202451000077
14E_WALLETSunrise_BankMetroMOBILE_APP21MAR202413000052
15TRADE_FINANCELiberty_BankCentral_IndiaBRANCH27OCT2023210000097


STEP 2: CONVERT CHARACTER DATE TO SAS DATE AND USE INTNX / INTCK 

data bank_fraud;

    set bank_fraud_raw;


    /* Convert character date (DATE9.) to SAS date */

    Detection_Date = input(Detection_Date_char, date9.);

    format Detection_Date date9.;


    /* Suppose the case is formally reported to regulator 5 days after detection */

    Report_Date = intnx('day', Detection_Date, 5, 'same');

    format Report_Date date9.;


    /* Days between detection and reporting */

    Days_To_Report = intck('day', Detection_Date, Report_Date);


    /* How many months between detection and end of 2024? */

    Months_To_2024_End = intck('month', Detection_Date, '31DEC2024'd);


    /* Also derive year and month of detection for time-based analysis */

    Year_Detected  = year(Detection_Date);

    Month_Detected = month(Detection_Date);

run;

proc print data=bank_fraud;

run;

OUTPUT:

ObsFraud_TypeBank_NameRegionModeDetection_Date_charAmount_LossRisk_ScoreDetection_DateReport_DateDays_To_ReportMonths_To_2024_EndYear_DetectedMonth_Detected
1PHISHINGGlobalSecure_BankNorth_IndiaONLINE15JAN20243500008215JAN202420JAN202451120241
2CARD_SKIMMINGMetro_BankSouth_IndiaATM02FEB20241250006802FEB202407FEB202451020242
3LOAN_FRAUDBharat_National_BankWest_IndiaBRANCH20DEC20239500009120DEC202325DEC2023512202312
4INTERNAL_FRAUDCityUnion_BankCentral_IndiaBRANCH05NOV202312000009505NOV202310NOV2023513202311
5UPI_FRAUDDigitalFirst_BankMetroMOBILE_APP28MAR2024600004528MAR202402APR20245920243
6KYC_FRAUDSafeTrust_BankEast_IndiaONLINE11APR20242100005511APR202416APR20245820244
7INTERNET_BANKINGHorizon_BankNorth_IndiaONLINE19JAN20244800007319JAN202424JAN202451120241
8FAKE_CHEQUEPeoples_BankSouth_IndiaCHEQUE07MAY20243200006407MAY202412MAY20245720245
9CREDIT_CARDPrimeSecure_BankMetroONLINE23FEB20247800008823FEB202428FEB202451020242
10DEBIT_CARDRural_Cooperative_BankRuralATM14MAR2024900003814MAR202419MAR20245920243
11SIM_SWAPUrbanTrust_BankWest_IndiaMOBILE_APP30APR20242600007230APR202405MAY20245820244
12ACCOUNT_TAKEOVERCapital_Union_BankNorth_IndiaONLINE09JAN202414500009409JAN202414JAN202451120241
13ATM_JACKPOTOceanic_BankSouth_IndiaATM18FEB20245100007718FEB202423FEB202451020242
14E_WALLETSunrise_BankMetroMOBILE_APP21MAR20241300005221MAR202426MAR20245920243
15TRADE_FINANCELiberty_BankCentral_IndiaBRANCH27OCT202321000009727OCT202301NOV2023514202310

 STEP 3: MACRO TO CREATE RISK SEVERITY FORMAT 

%macro make_risk_format(low=30, medium=60, high=80);

    /* Precompute boundary values using macro logic */

    %let low_plus1   = %eval(&low. + 1);

    %let med_plus1   = %eval(&medium. + 1);

    %let high_plus1  = %eval(&high. + 1);


    proc format;

        /* Numeric format for risk score */

        value riskfmt

            0 - &low.              = 'LOW'

            &low_plus1 - &medium.  = 'MEDIUM'

            &med_plus1 - &high.    = 'HIGH'

            &high_plus1 - 100      = 'CRITICAL';


        /* Region format to display nicer labels */

        value $regionfmt

            'North_India'   = 'North India'

            'South_India'   = 'South India'

            'East_India'    = 'East India'

            'West_India'    = 'West India'

            'Central_India' = 'Central India'

            'Metro'         = 'Metro Region'

            'Rural'         = 'Rural Region';


        /* Mode format for readability */

        value $modefmt

            'ONLINE'      = 'Online'

            'ATM'         = 'ATM'

            'BRANCH'      = 'Branch'

            'MOBILE_APP'  = 'Mobile App'

            'CHEQUE'      = 'Cheque';

    run;

%mend make_risk_format;


%make_risk_format(low=30, medium=60, high=80);

LOG:

NOTE: Format RISKFMT has been output.
NOTE: Format $REGIONFMT has been output.
NOTE: Format $MODEFMT has been output.

STEP 4: APPLY FORMATS AND CREATE A SEVERITY VARIABLE 

data bank_fraud_final;

    set bank_fraud;


    /* Apply_formats */

    format Risk_Score riskfmt.

           Region    $regionfmt.

           Mode      $modefmt.;


    /* Convert numeric risk score into a text label using the format */

    length Risk_Severity $10;

    Risk_Severity = put(Risk_Score, riskfmt.);

run;

proc print data=bank_fraud_final(obs=10);

    title "Sample of BANK_FRAUD_FINAL Dataset";

run;

OUTPUT:

Sample of BANK_FRAUD_FINAL Dataset

ObsFraud_TypeBank_NameRegionModeDetection_Date_charAmount_LossRisk_ScoreDetection_DateReport_DateDays_To_ReportMonths_To_2024_EndYear_DetectedMonth_DetectedRisk_Severity
1PHISHINGGlobalSecure_BankNorth IndiaOnline15JAN2024350000CRITICAL15JAN202420JAN202451120241CRITICAL
2CARD_SKIMMINGMetro_BankSouth IndiaATM02FEB2024125000HIGH02FEB202407FEB202451020242HIGH
3LOAN_FRAUDBharat_National_BankWest IndiaBranch20DEC2023950000CRITICAL20DEC202325DEC2023512202312CRITICAL
4INTERNAL_FRAUDCityUnion_BankCentral IndiaBranch05NOV20231200000CRITICAL05NOV202310NOV2023513202311CRITICAL
5UPI_FRAUDDigitalFirst_BankMetro RegionMobile App28MAR202460000MEDIUM28MAR202402APR20245920243MEDIUM
6KYC_FRAUDSafeTrust_BankEast IndiaOnline11APR2024210000MEDIUM11APR202416APR20245820244MEDIUM
7INTERNET_BANKINGHorizon_BankNorth IndiaOnline19JAN2024480000HIGH19JAN202424JAN202451120241HIGH
8FAKE_CHEQUEPeoples_BankSouth IndiaCheque07MAY2024320000HIGH07MAY202412MAY20245720245HIGH
9CREDIT_CARDPrimeSecure_BankMetro RegionOnline23FEB2024780000CRITICAL23FEB202428FEB202451020242CRITICAL
10DEBIT_CARDRural_Cooperative_BankRural RegionATM14MAR202490000MEDIUM14MAR202419MAR20245920243MEDIUM

STEP 5: PROC SQL SUMMARY BY FRAUD TYPE 

proc sql;

    create table fraud_summary_sql as

    select  Fraud_Type,

            count(*)           as Num_Cases,

            sum(Amount_Loss)   as Total_Loss format=comma14.,

            mean(Amount_Loss)  as Avg_Loss   format=comma14.,

            min(Detection_Date) as First_Detection format=date9.,

            max(Detection_Date) as Last_Detection  format=date9.,

            mean(Risk_Score)   as Avg_Risk_Score   format=8.2

    from bank_fraud_final

    group by Fraud_Type

    order by Total_Loss desc;

quit;

proc print data=fraud_summary_sql;

    title "Fraud Summary by Type Using PROC SQL";

run;

OUTPUT:

Fraud Summary by Type Using PROC SQL

ObsFraud_TypeNum_CasesTotal_LossAvg_LossFirst_DetectionLast_DetectionAvg_Risk_Score
1TRADE_FINANCE12,100,0002,100,00027OCT202327OCT202397.00
2ACCOUNT_TAKEOVER11,450,0001,450,00009JAN202409JAN202494.00
3INTERNAL_FRAUD11,200,0001,200,00005NOV202305NOV202395.00
4LOAN_FRAUD1950,000950,00020DEC202320DEC202391.00
5CREDIT_CARD1780,000780,00023FEB202423FEB202488.00
6ATM_JACKPOT1510,000510,00018FEB202418FEB202477.00
7INTERNET_BANKING1480,000480,00019JAN202419JAN202473.00
8PHISHING1350,000350,00015JAN202415JAN202482.00
9FAKE_CHEQUE1320,000320,00007MAY202407MAY202464.00
10SIM_SWAP1260,000260,00030APR202430APR202472.00
11KYC_FRAUD1210,000210,00011APR202411APR202455.00
12E_WALLET1130,000130,00021MAR202421MAR202452.00
13CARD_SKIMMING1125,000125,00002FEB202402FEB202468.00
14DEBIT_CARD190,00090,00014MAR202414MAR202438.00
15UPI_FRAUD160,00060,00028MAR202428MAR202445.00

proc sql;

    create table fraud_by_region as

    select  Region,

            count(*)         as Num_Cases,

            sum(Amount_Loss) as Total_Loss format=comma14.

    from bank_fraud_final

    group by Region;

quit;

proc print data=fraud_by_region;

run;

OUTPUT:

ObsRegionNum_CasesTotal_Loss
1Central India23,300,000
2East India1210,000
3Metro Region3970,000
4North India32,280,000
5Rural Region190,000
6South India3955,000
7West India21,210,000


STEP 6: PROC MEANS FOR AMOUNT LOSS AND RISK SCORE 

proc means data=bank_fraud_final mean median min max sum maxdec=2;

    class Fraud_Type;

    var Amount_Loss Risk_Score;

    title "PROC MEANS: Statistics of Amount_Loss and Risk_Score by Fraud_Type";

run;

OUTPUT:

PROC MEANS: Statistics of Amount_Loss and Risk_Score by Fraud_Type

The MEANS Procedure

Fraud_TypeN ObsVariableMeanMedianMinimumMaximumSum
ACCOUNT_TAKEOVER1
Amount_Loss
Risk_Score
1450000.00
94.00
1450000.00
94.00
1450000.00
94.00
1450000.00
94.00
1450000.00
94.00
ATM_JACKPOT1
Amount_Loss
Risk_Score
510000.00
77.00
510000.00
77.00
510000.00
77.00
510000.00
77.00
510000.00
77.00
CARD_SKIMMING1
Amount_Loss
Risk_Score
125000.00
68.00
125000.00
68.00
125000.00
68.00
125000.00
68.00
125000.00
68.00
CREDIT_CARD1
Amount_Loss
Risk_Score
780000.00
88.00
780000.00
88.00
780000.00
88.00
780000.00
88.00
780000.00
88.00
DEBIT_CARD1
Amount_Loss
Risk_Score
90000.00
38.00
90000.00
38.00
90000.00
38.00
90000.00
38.00
90000.00
38.00
E_WALLET1
Amount_Loss
Risk_Score
130000.00
52.00
130000.00
52.00
130000.00
52.00
130000.00
52.00
130000.00
52.00
FAKE_CHEQUE1
Amount_Loss
Risk_Score
320000.00
64.00
320000.00
64.00
320000.00
64.00
320000.00
64.00
320000.00
64.00
INTERNAL_FRAUD1
Amount_Loss
Risk_Score
1200000.00
95.00
1200000.00
95.00
1200000.00
95.00
1200000.00
95.00
1200000.00
95.00
INTERNET_BANKING1
Amount_Loss
Risk_Score
480000.00
73.00
480000.00
73.00
480000.00
73.00
480000.00
73.00
480000.00
73.00
KYC_FRAUD1
Amount_Loss
Risk_Score
210000.00
55.00
210000.00
55.00
210000.00
55.00
210000.00
55.00
210000.00
55.00
LOAN_FRAUD1
Amount_Loss
Risk_Score
950000.00
91.00
950000.00
91.00
950000.00
91.00
950000.00
91.00
950000.00
91.00
PHISHING1
Amount_Loss
Risk_Score
350000.00
82.00
350000.00
82.00
350000.00
82.00
350000.00
82.00
350000.00
82.00
SIM_SWAP1
Amount_Loss
Risk_Score
260000.00
72.00
260000.00
72.00
260000.00
72.00
260000.00
72.00
260000.00
72.00
TRADE_FINANCE1
Amount_Loss
Risk_Score
2100000.00
97.00
2100000.00
97.00
2100000.00
97.00
2100000.00
97.00
2100000.00
97.00
UPI_FRAUD1
Amount_Loss
Risk_Score
60000.00
45.00
60000.00
45.00
60000.00
45.00
60000.00
45.00
60000.00
45.00

STEP 7: PROC FREQ FOR FRAUD PATTERN ANALYSIS 

proc freq data=bank_fraud_final;

    tables Fraud_Type*Region / nocol norow nopercent;

    tables Risk_Severity*Mode / nocum;

    title "PROC FREQ: Cross-Tabulations for Bank Fraud Cases";

run;

OUTPUT:

PROC FREQ: Cross-Tabulations for Bank Fraud Cases

The FREQ Procedure

Frequency
Table of Fraud_Type by Region
Fraud_TypeRegion
Central IndiaEast IndiaMetro RegionNorth IndiaRural RegionSouth IndiaWest IndiaTotal
ACCOUNT_TAKEOVER
0
0
0
1
0
0
0
1
ATM_JACKPOT
0
0
0
0
0
1
0
1
CARD_SKIMMING
0
0
0
0
0
1
0
1
CREDIT_CARD
0
0
1
0
0
0
0
1
DEBIT_CARD
0
0
0
0
1
0
0
1
E_WALLET
0
0
1
0
0
0
0
1
FAKE_CHEQUE
0
0
0
0
0
1
0
1
INTERNAL_FRAUD
1
0
0
0
0
0
0
1
INTERNET_BANKING
0
0
0
1
0
0
0
1
KYC_FRAUD
0
1
0
0
0
0
0
1
LOAN_FRAUD
0
0
0
0
0
0
1
1
PHISHING
0
0
0
1
0
0
0
1
SIM_SWAP
0
0
0
0
0
0
1
1
TRADE_FINANCE
1
0
0
0
0
0
0
1
UPI_FRAUD
0
0
1
0
0
0
0
1
Total
2
1
3
3
1
3
2
15
Frequency
Percent
Row Pct
Col Pct
Table of Risk_Severity by Mode
Risk_SeverityMode
ATMBranchChequeMobile AppOnlineTotal
CRITICAL
0
0.00
0.00
0.00
3
20.00
50.00
100.00
0
0.00
0.00
0.00
0
0.00
0.00
0.00
3
20.00
50.00
60.00
6
40.00
 
 
HIGH
2
13.33
40.00
66.67
0
0.00
0.00
0.00
1
6.67
20.00
100.00
1
6.67
20.00
33.33
1
6.67
20.00
20.00
5
33.33
 
 
MEDIUM
1
6.67
25.00
33.33
0
0.00
0.00
0.00
0
0.00
0.00
0.00
2
13.33
50.00
66.67
1
6.67
25.00
20.00
4
26.67
 
 
Total
3
20.00
3
20.00
1
6.67
3
20.00
5
33.33
15
100.00

STEP 8: PROC SGPLOT - TOTAL LOSS BY FRAUD TYPE 

proc sgplot data=bank_fraud_final;

    vbar Fraud_Type / response=Amount_Loss stat=sum datalabel;

    yaxis label="Total Amount Loss (INR)";

    xaxis label="Fraud Type";

    title "Total Amount Loss by Fraud Type";

run;

OUTPUT:

The SGPlot Procedure


/*CASE COUNT BY DETECTION DATE*/

proc sgplot data=bank_fraud_final;

    title "Number of Fraud Cases Over Time";

    vbar Detection_Date;

    xaxis label="Detection Date";

    yaxis label="Number of Cases";

run;

OUTPUT:
The SGPlot Procedure




To Visit My Previous Software Company Analysis Dataset:Click Here
To Visit My Previous Vote Program Dataset:Click Here
To Visit My Previous Audi Cars Performance Analysis Dataset:Click Here
To Visit My Previous Global Clothing Trends Dataset:Click Here  



Follow Us On : 


 


--- FOLLOW OUR BLOG FOR MORE INFORMATION.

--->PLEASE DO COMMENTS AND SHARE OUR BLOG.


No comments:

Post a Comment