Tuesday, 2 December 2025

327.CYBER ATTACKS DATASET CREATION AND ANALYSIS USING PROC SQL | PROC MEANS | PROC FREQ | PROC SGPLOT WITH MACRO-BASED RISK CLASSIFICATION AND DATE FUNCTIONS (INTCK | INTNX)

CYBER ATTACKS DATASET CREATION AND ANALYSIS USING PROC SQL | PROC MEANS | PROC FREQ | PROC SGPLOT WITH MACRO-BASED RISK CLASSIFICATION AND DATE FUNCTIONS (INTCK | INTNX)

options nocenter;

1) CREATE RAW DATASET: work.cyber_attacks_raw

data work.cyber_attacks_raw;

  infile datalines dlm='|' dsd;

  informat Occurrence_Date date9. Discovery_Date date9.;

  format Occurrence_Date date9. Discovery_Date date9.;

  length Attack_Type $30 Country $30;

  input Attack_Type $ Country $ Frequency Financial_Loss Occurrence_Date Discovery_Date

        Vulnerability_Level;

  /* compute detection time in days using INTCK (difference in days) */

  Detection_Time_Days = intck('day', Occurrence_Date, Discovery_Date);

datalines;

Phishing|USA|120|1500|01JAN2025|03JAN2025|6

Ransomware|India|18|7500|15FEB2025|20FEB2025|9

DDoS|China|45|250|05MAR2025|05MAR2025|3

Insider Threat|Germany|6|300|12MAR2025|25MAR2025|8

Supply Chain|USA|2|12000|28FEB2025|12MAR2025|7

SQL Injection|Brazil|30|400|10JAN2025|15JAN2025|5

Zero-day Exploit|Russia|1|9000|22MAR2025|28MAR2025|9

Credential Stuffing|UK|85|600|07FEB2025|10FEB2025|4

Malware (Trojan)|South_Africa|25|1200|02MAR2025|06MAR2025|6

Business Email Compromise|Australia|9|2200|18JAN2025|30JAN2025|12

Man-in-the-Middle|Canada|12|150|09FEB2025|11FEB2025|2

Adware/Spyware|Mexico|40|80|20JAN2025|22JAN2025|3

;

run;

proc print data=work.cyber_attacks_raw label noobs;

  title "CYBER ATTACKS RAW (12 OBS)";

  var Attack_Type Country Frequency Financial_Loss Occurrence_Date Discovery_Date 

      Detection_Time_Days Vulnerability_Level;

run;

OUTPUT:

CYBER ATTACKS RAW (12 OBS)

Attack_TypeCountryFrequencyFinancial_LossOccurrence_DateDiscovery_DateDetection_Time_DaysVulnerability_Level
PhishingUSA120150001JAN202503JAN202526
RansomwareIndia18750015FEB202520FEB202559
DDoSChina4525005MAR202505MAR202503
Insider ThreatGermany630012MAR202525MAR2025138
Supply ChainUSA21200028FEB202512MAR2025127
SQL InjectionBrazil3040010JAN202515JAN202555
Zero-day ExploitRussia1900022MAR202528MAR202569
Credential StuffingUK8560007FEB202510FEB202534
Malware (Trojan)South_Africa25120002MAR202506MAR202546
Business Email CompromiseAustralia9220018JAN202530JAN20251212
Man-in-the-MiddleCanada1215009FEB202511FEB202522
Adware/SpywareMexico408020JAN202522JAN202523

2) Use PROC SQL for derived tables / aggregations

/* Create a summary with average loss and mean detection time by Attack_Type */

proc sql;

  create table work.attack_summary as

  select Attack_Type,

         count(*) as N,

         mean(Frequency) format=8.2 as Mean_Freq,

         mean(Financial_Loss) format=12.2 as Mean_Loss_KUSD,

         mean(Detection_Time_Days) format=8.2 as Mean_Detect_Days,

         mean(Vulnerability_Level) format=8.2 as Mean_Vuln

  from work.cyber_attacks_raw

  group by Attack_Type;

quit;

proc print data=work.attack_summary;

run;

OUTPUT:

ObsAttack_TypeNMean_FreqMean_Loss_KUSDMean_Detect_DaysMean_Vuln
1Adware/Spyware140.0080.002.003.00
2Business Email Compromise19.002200.0012.0012.00
3Credential Stuffing185.00600.003.004.00
4DDoS145.00250.000.003.00
5Insider Threat16.00300.0013.008.00
6Malware (Trojan)125.001200.004.006.00
7Man-in-the-Middle112.00150.002.002.00
8Phishing1120.001500.002.006.00
9Ransomware118.007500.005.009.00
10SQL Injection130.00400.005.005.00
11Supply Chain12.0012000.0012.007.00
12Zero-day Exploit11.009000.006.009.00


/* Top 5 highest financial loss events */

proc sql;

  create table work.top_losses as

  select * from work.cyber_attacks_raw

  order by Financial_Loss desc

  ;

quit;

proc print data=work.top_losses;

run;

OUTPUT:

ObsOccurrence_DateDiscovery_DateAttack_TypeCountryFrequencyFinancial_LossVulnerability_LevelDetection_Time_Days
128FEB202512MAR2025Supply ChainUSA212000712
222MAR202528MAR2025Zero-day ExploitRussia1900096
315FEB202520FEB2025RansomwareIndia18750095
418JAN202530JAN2025Business Email CompromiseAustralia922001212
501JAN202503JAN2025PhishingUSA120150062
602MAR202506MAR2025Malware (Trojan)South_Africa25120064
707FEB202510FEB2025Credential StuffingUK8560043
810JAN202515JAN2025SQL InjectionBrazil3040055
912MAR202525MAR2025Insider ThreatGermany6300813
1005MAR202505MAR2025DDoSChina4525030
1109FEB202511FEB2025Man-in-the-MiddleCanada1215022
1220JAN202522JAN2025Adware/SpywareMexico408032


3) Macro: Risk classification (composite score)

%macro classify_risk(indata=work.cyber_attacks_raw, outdata=work.cyber_attacks_risk);

  data &outdata.;

    set &indata.;

    if _n_=1 then do;

      call symputx('minFreq', .);

      call symputx('maxFreq', .);

      call symputx('minLoss', .);

      call symputx('maxLoss', .);

      call symputx('minDetect', .);

      call symputx('maxDetect', .);

      call symputx('minVuln', .);

      call symputx('maxVuln', .);

    end;

    n_Freq = min(max((Frequency - 0) / (200 - 0) * 100, 0), 100);

    n_Loss = min(max((Financial_Loss - 0) / (20000 - 0) * 100, 0), 100);

    /* For detection time: longer detection time increases risk => higher score */

    n_Detect = min(max((Detection_Time_Days - 0) / (365 - 0) * 100, 0), 100);

    /* Vulnerability level already 1-10, scale to 0-100 */

    n_Vuln = min(max((Vulnerability_Level - 1) / (10 - 1) * 100, 0), 100);

    /* Weighted composite score (weights sum to 1) */

    /* Weights: Loss 0.35, Vulnerability 0.25, Frequency 0.20, Detection 0.20 */

    Risk_Score = (0.20 * n_Freq) + (0.35 * n_Loss) + (0.20 * n_Detect) + (0.25 * n_Vuln);

    format Risk_Score 8.2;


    /* Risk categorization */

    if Risk_Score < 25 then Risk_Cat = 'Low';

    else if 25 <= Risk_Score < 50 then Risk_Cat = 'Medium';

    else if 50 <= Risk_Score < 75 then Risk_Cat = 'High';

    else Risk_Cat = 'Critical';


    /* Demonstrate INTCK / INTNX usage: compute months between occurrence and today, 

   and month-start of discovery */

    Months_Since_Occurrence = intck('month', Occurrence_Date, today());

    Discovery_MonthStart = intnx('month', Discovery_Date, 0, 'beginning');

    format Discovery_MonthStart date9.;

  run;

%mend classify_risk;


%classify_risk(indata=work.cyber_attacks_raw, outdata=work.cyber_attacks_risk);

proc print data=work.cyber_attacks_risk noobs;

  title "CYBER ATTACKS WITH RISK SCORES";

  var Attack_Type Country Frequency Financial_Loss Occurrence_Date Discovery_Date

      Detection_Time_Days Vulnerability_Level Risk_Score Risk_Cat Months_Since_Occurrence 

      Discovery_MonthStart;

run;

OUTPUT:

CYBER ATTACKS WITH RISK SCORES

Attack_TypeCountryFrequencyFinancial_LossOccurrence_DateDiscovery_DateDetection_Time_DaysVulnerability_LevelRisk_ScoreRisk_CatMonths_Since_OccurrenceDiscovery_MonthStart
PhishingUSA120150001JAN202503JAN20252628.62Med1101JAN2025
RansomwareIndia18750015FEB202520FEB20255937.42Med1001FEB2025
DDoSChina4525005MAR202505MAR20250310.49Low901MAR2025
Insider ThreatGermany630012MAR202525MAR202513821.28Low901MAR2025
Supply ChainUSA21200028FEB202512MAR202512738.52Med1001MAR2025
SQL InjectionBrazil3040010JAN202515JAN20255515.09Low1101JAN2025
Zero-day ExploitRussia1900022MAR202528MAR20256938.40Med901MAR2025
Credential StuffingUK8560007FEB202510FEB20253418.05Low1001FEB2025
Malware (Trojan)South_Africa25120002MAR202506MAR20254618.71Low901MAR2025
Business Email CompromiseAustralia9220018JAN202530JAN2025121230.41Med1101JAN2025
Man-in-the-MiddleCanada1215009FEB202511FEB2025224.35Low1001FEB2025
Adware/SpywareMexico408020JAN202522JAN2025239.81Low1101JAN2025

4) PROC MEANS for numeric summaries

proc means data=work.cyber_attacks_risk n mean median std min max;

  var Frequency Financial_Loss Detection_Time_Days Vulnerability_Level Risk_Score 

      Months_Since_Occurrence;

  title "NUMERIC SUMMARY - MEANS";

run;

OUTPUT:

NUMERIC SUMMARY - MEANS

The MEANS Procedure

VariableNMeanMedianStd DevMinimumMaximum
Frequency
Financial_Loss
Detection_Time_Days
Vulnerability_Level
Risk_Score
Months_Since_Occurrence
12
12
12
12
12
12
32.7500000
2931.67
5.5000000
6.1666667
22.5956754
10.0000000
21.5000000
900.0000000
4.5000000
6.0000000
19.9949201
10.0000000
36.2995743
4126.91
4.4415395
2.9797295
11.8995600
0.8528029
1.0000000
80.0000000
0
2.0000000
4.3498668
9.0000000
120.0000000
12000.00
13.0000000
12.0000000
38.5242009
11.0000000

5) PROC FREQ for categorical summaries

proc freq data=work.cyber_attacks_risk;

  tables Risk_Cat Attack_Type*Risk_Cat / norow nocol nopercent;

  title "FREQUENCY OF RISK CATEGORIES";

run;

OUTPUT:

FREQUENCY OF RISK CATEGORIES

The FREQ Procedure

Risk_CatFrequencyCumulative
Frequency
Low77
Med512
Frequency
Table of Attack_Type by Risk_Cat
Attack_TypeRisk_Cat
LowMedTotal
Adware/Spyware
1
0
1
Business Email Compromise
0
1
1
Credential Stuffing
1
0
1
DDoS
1
0
1
Insider Threat
1
0
1
Malware (Trojan)
1
0
1
Man-in-the-Middle
1
0
1
Phishing
0
1
1
Ransomware
0
1
1
SQL Injection
1
0
1
Supply Chain
0
1
1
Zero-day Exploit
0
1
1
Total
7
5
12

6) PROC SGPLOT examples:

ods graphics / reset imagename='RiskBar' width=800 height=400;

proc sgplot data=work.cyber_attacks_risk;

  vbar Risk_Cat / datalabel;

  xaxis label="Risk Category";

  yaxis label="Count";

  title "COUNT OF EVENTS BY RISK CATEGORY";

run;

OUTPUT:

The SGPlot Procedure


ods graphics / reset imagename='LossByAttack' width=900 height=450;

proc sgplot data=work.cyber_attacks_risk;

  hbar Attack_Type / response=Financial_Loss stat=sum datalabel;

  xaxis label="Total Financial Loss (K USD)";

  title "TOTAL FINANCIAL LOSS BY ATTACK TYPE";

run;

OUTPUT:

The SGPlot Procedure


ods graphics / reset imagename='RiskVsDetect' width=800 height=400;

proc sgplot data=work.cyber_attacks_risk;

  scatter x=Detection_Time_Days y=Risk_Score / datalabel=Attack_Type markerattrs=(symbol=CircleFilled);

  reg x=Detection_Time_Days y=Risk_Score;

  xaxis label="Detection Time (days)";

  yaxis label="Risk Score";

  title "RISK SCORE VS DETECTION TIME";

run;

OUTPUT:

The SGPlot Procedure


7) Examples of additional derived datasets

proc sql;

  create table work.monthly_by_attack as

  select intnx('month', Occurrence_Date, 0, 'beginning') as Month_Start format=date9.,

         Attack_Type,

         sum(Financial_Loss) as Total_Loss_KUSD,

         sum(Frequency) as Total_Freq,

         mean(Risk_Score) as Avg_Risk_Score

  from work.cyber_attacks_risk

  group by calculated Month_Start, Attack_Type

  order by calculated Month_Start, Attack_Type;

quit;

proc print data=work.monthly_by_attack noobs;

  title "MONTHLY AGGREGATION BY ATTACK TYPE";

run;

OUTPUT:

MONTHLY AGGREGATION BY ATTACK TYPE

Month_StartAttack_TypeTotal_Loss_KUSDTotal_FreqAvg_Risk_Score
01JAN2025Adware/Spyware80409.8051
01JAN2025Business Email Compromise2200930.4075
01JAN2025Phishing150012028.6235
01JAN2025SQL Injection4003015.0851
01FEB2025Credential Stuffing6008518.0477
01FEB2025Man-in-the-Middle150124.3499
01FEB2025Ransomware75001837.4212
01FEB2025Supply Chain12000238.5242
01MAR2025DDoS2504510.4931
01MAR2025Insider Threat300621.2818
01MAR2025Malware (Trojan)12002518.7081
01MAR2025Zero-day Exploit9000138.4010




To Visit My Previous Different Types Of Oils Dataset:Click Here
To Visit My Previous Different Types Of Series 2025 Dataset:Click Here
To Visit My Previous Analyzing Yoga Asanas Worldwide Dataset:Click Here
To Visit My Previous Analyzing Indian Languages Dataset:Click Here  



Follow Us On : 


 


--- FOLLOW OUR BLOG FOR MORE INFORMATION.

--->PLEASE DO COMMENTS AND SHARE OUR BLOG.


No comments:

Post a Comment