CRYPTOCURRENCY MARKET ANALYSIS AND RISK MODELLING USING PROC SQL | PROC MEANS | PROC REG | PROC UNIVARIATE | PROC SGPLOT | PROC CORR | MACROS | DATE FUNCTIONS (INTCK | INTNX)
options nocenter;
1.CRYPTOCURRENCIES DATASET CREATION
data work.crypto_raw;
length Coin_Name $20;
informat Launch_Date date9.;
format Launch_Date Last_Updated date9.;
input Coin_Name $ Market_Cap Price Volatility Transactions Energy_Usage
Launch_Date :date9.;
/* Date derivations */
Last_Updated = intnx('month', Launch_Date, 6, 'same');
Days_Since_Launch = intck('day', Launch_Date, today());
datalines;
Bitcoin 850 42000 4.5 350000 707 03JAN2009
Ethereum 380 2300 5.2 900000 62 30JUL2015
BinanceCoin 90 310 3.8 1200000 12 14JUL2017
Cardano 45 0.55 6.1 250000 6 29SEP2017
Solana 65 95 7.5 1800000 5 16MAR2020
Ripple 55 0.75 5.9 1400000 2 02FEB2013
Polkadot 35 7.5 6.8 600000 8 26AUG2020
Dogecoin 20 0.15 9.2 450000 15 06DEC2013
Litecoin 30 85 4.1 300000 18 13OCT2011
Avalanche 28 38 6.4 500000 7 21SEP2020
Chainlink 25 14 5.7 420000 9 19SEP2017
Polygon 18 0.9 6.9 800000 4 10OCT2017
;
run;
proc print data=work.crypto_raw;
run;
OUTPUT:
| Obs | Coin_Name | Launch_Date | Last_Updated | Market_Cap | Price | Volatility | Transactions | Energy_Usage | Days_Since_Launch |
|---|---|---|---|---|---|---|---|---|---|
| 1 | Bitcoin | 03JAN2009 | 03JUL2009 | 850 | 42000.00 | 4.5 | 350000 | 707 | 6190 |
| 2 | Ethereum | 30JUL2015 | 30JAN2016 | 380 | 2300.00 | 5.2 | 900000 | 62 | 3791 |
| 3 | BinanceCoin | 14JUL2017 | 14JAN2018 | 90 | 310.00 | 3.8 | 1200000 | 12 | 3076 |
| 4 | Cardano | 29SEP2017 | 29MAR2018 | 45 | 0.55 | 6.1 | 250000 | 6 | 2999 |
| 5 | Solana | 16MAR2020 | 16SEP2020 | 65 | 95.00 | 7.5 | 1800000 | 5 | 2100 |
| 6 | Ripple | 02FEB2013 | 02AUG2013 | 55 | 0.75 | 5.9 | 1400000 | 2 | 4699 |
| 7 | Polkadot | 26AUG2020 | 26FEB2021 | 35 | 7.50 | 6.8 | 600000 | 8 | 1937 |
| 8 | Dogecoin | 06DEC2013 | 06JUN2014 | 20 | 0.15 | 9.2 | 450000 | 15 | 4392 |
| 9 | Litecoin | 13OCT2011 | 13APR2012 | 30 | 85.00 | 4.1 | 300000 | 18 | 5177 |
| 10 | Avalanche | 21SEP2020 | 21MAR2021 | 28 | 38.00 | 6.4 | 500000 | 7 | 1911 |
| 11 | Chainlink | 19SEP2017 | 19MAR2018 | 25 | 14.00 | 5.7 | 420000 | 9 | 3009 |
| 12 | Polygon | 10OCT2017 | 10APR2018 | 18 | 0.90 | 6.9 | 800000 | 4 | 2988 |
2.PROC SQL – DATA EXTRACTION & FILTERING
proc sql;
create table crypto_highcap as
select Coin_Name,Market_Cap,Price,Volatility,Transactions,Energy_Usage,Launch_Date,
Last_Updated
from crypto_raw
where Market_Cap > 50
order by Market_Cap desc;
quit;
proc print data=crypto_highcap;
run;
OUTPUT:
| Obs | Coin_Name | Market_Cap | Price | Volatility | Transactions | Energy_Usage | Launch_Date | Last_Updated |
|---|---|---|---|---|---|---|---|---|
| 1 | Bitcoin | 850 | 42000.00 | 4.5 | 350000 | 707 | 03JAN2009 | 03JUL2009 |
| 2 | Ethereum | 380 | 2300.00 | 5.2 | 900000 | 62 | 30JUL2015 | 30JAN2016 |
| 3 | BinanceCoin | 90 | 310.00 | 3.8 | 1200000 | 12 | 14JUL2017 | 14JAN2018 |
| 4 | Solana | 65 | 95.00 | 7.5 | 1800000 | 5 | 16MAR2020 | 16SEP2020 |
| 5 | Ripple | 55 | 0.75 | 5.9 | 1400000 | 2 | 02FEB2013 | 02AUG2013 |
3.PROC MEANS – DESCRIPTIVE STATISTICS
proc means data=crypto_raw mean min max std;
var Market_Cap Price Volatility Transactions Energy_Usage Days_Since_Launch;
run;
OUTPUT:
The MEANS Procedure
| Variable | Mean | Minimum | Maximum | Std Dev |
|---|---|---|---|---|
Market_Cap Price Volatility Transactions Energy_Usage Days_Since_Launch | 136.7500000 3737.65 6.0083333 747500.00 71.2500000 3522.42 | 18.0000000 0.1500000 3.8000000 250000.00 2.0000000 1911.00 | 850.0000000 42000.00 9.2000000 1800000.00 707.0000000 6190.00 | 245.7134787 12067.10 1.5240248 490716.08 200.8487106 1355.87 |
4.PROC UNIVARIATE – DISTRIBUTION ANALYSIS
proc univariate data=crypto_raw;
var Volatility Energy_Usage;
histogram Volatility Energy_Usage;
run;
OUTPUT:
The UNIVARIATE Procedure
Variable: Volatility
| Moments | |||
|---|---|---|---|
| N | 12 | Sum Weights | 12 |
| Mean | 6.00833333 | Sum Observations | 72.1 |
| Std Deviation | 1.52402478 | Variance | 2.32265152 |
| Skewness | 0.47424428 | Kurtosis | 0.41892236 |
| Uncorrected SS | 458.75 | Corrected SS | 25.5491667 |
| Coeff Variation | 25.3651835 | Std Error Mean | 0.43994806 |
| Basic Statistical Measures | |||
|---|---|---|---|
| Location | Variability | ||
| Mean | 6.008333 | Std Deviation | 1.52402 |
| Median | 6.000000 | Variance | 2.32265 |
| Mode | . | Range | 5.40000 |
| Interquartile Range | 2.00000 | ||
| Tests for Location: Mu0=0 | ||||
|---|---|---|---|---|
| Test | Statistic | p Value | ||
| Student's t | t | 13.65692 | Pr > |t| | <.0001 |
| Sign | M | 6 | Pr >= |M| | 0.0005 |
| Signed Rank | S | 39 | Pr >= |S| | 0.0005 |
| Quantiles (Definition 5) | |
|---|---|
| Level | Quantile |
| 100% Max | 9.20 |
| 99% | 9.20 |
| 95% | 9.20 |
| 90% | 7.50 |
| 75% Q3 | 6.85 |
| 50% Median | 6.00 |
| 25% Q1 | 4.85 |
| 10% | 4.10 |
| 5% | 3.80 |
| 1% | 3.80 |
| 0% Min | 3.80 |
| Extreme Observations | |||
|---|---|---|---|
| Lowest | Highest | ||
| Value | Obs | Value | Obs |
| 3.8 | 3 | 6.4 | 10 |
| 4.1 | 9 | 6.8 | 7 |
| 4.5 | 1 | 6.9 | 12 |
| 5.2 | 2 | 7.5 | 5 |
| 5.7 | 11 | 9.2 | 8 |
The UNIVARIATE Procedure
The UNIVARIATE Procedure
Variable: Energy_Usage
| Moments | |||
|---|---|---|---|
| N | 12 | Sum Weights | 12 |
| Mean | 71.25 | Sum Observations | 855 |
| Std Deviation | 200.848711 | Variance | 40340.2045 |
| Skewness | 3.42602819 | Kurtosis | 11.8003497 |
| Uncorrected SS | 504661 | Corrected SS | 443742.25 |
| Coeff Variation | 281.892927 | Std Error Mean | 57.9800286 |
| Basic Statistical Measures | |||
|---|---|---|---|
| Location | Variability | ||
| Mean | 71.25000 | Std Deviation | 200.84871 |
| Median | 8.50000 | Variance | 40340 |
| Mode | . | Range | 705.00000 |
| Interquartile Range | 11.00000 | ||
| Tests for Location: Mu0=0 | ||||
|---|---|---|---|---|
| Test | Statistic | p Value | ||
| Student's t | t | 1.228871 | Pr > |t| | 0.2448 |
| Sign | M | 6 | Pr >= |M| | 0.0005 |
| Signed Rank | S | 39 | Pr >= |S| | 0.0005 |
| Quantiles (Definition 5) | |
|---|---|
| Level | Quantile |
| 100% Max | 707.0 |
| 99% | 707.0 |
| 95% | 707.0 |
| 90% | 62.0 |
| 75% Q3 | 16.5 |
| 50% Median | 8.5 |
| 25% Q1 | 5.5 |
| 10% | 4.0 |
| 5% | 2.0 |
| 1% | 2.0 |
| 0% Min | 2.0 |
| Extreme Observations | |||
|---|---|---|---|
| Lowest | Highest | ||
| Value | Obs | Value | Obs |
| 2 | 6 | 12 | 3 |
| 4 | 12 | 15 | 8 |
| 5 | 5 | 18 | 9 |
| 6 | 4 | 62 | 2 |
| 7 | 10 | 707 | 1 |
The UNIVARIATE Procedure
5.PROC REG – REGRESSION MODELING
proc reg data=crypto_raw;
model Price = Market_Cap Volatility Energy_Usage;
run;
quit;
OUTPUT:
The REG Procedure
Model: MODEL1
Dependent Variable: Price
| Number of Observations Read | 12 |
|---|---|
| Number of Observations Used | 12 |
| Analysis of Variance | |||||
|---|---|---|---|---|---|
| Source | DF | Sum of Squares | Mean Square | F Value | Pr > F |
| Model | 3 | 1600742908 | 533580969 | 4183.16 | <.0001 |
| Error | 8 | 1020435 | 127554 | ||
| Corrected Total | 11 | 1601763344 | |||
| Root MSE | 357.14764 | R-Square | 0.9994 |
|---|---|---|---|
| Dependent Mean | 3737.65417 | Adj R-Sq | 0.9991 |
| Coeff Var | 9.55540 |
| Parameter Estimates | |||||
|---|---|---|---|---|---|
| Variable | DF | Parameter Estimate | Standard Error | t Value | Pr > |t| |
| Intercept | 1 | -358.66926 | 518.45460 | -0.69 | 0.5086 |
| Market_Cap | 1 | -2.74932 | 1.34947 | -2.04 | 0.0760 |
| Volatility | 1 | -5.15661 | 77.82243 | -0.07 | 0.9488 |
| Energy_Usage | 1 | 63.20387 | 1.60454 | 39.39 | <.0001 |
The REG Procedure
Model: MODEL1
Dependent Variable: Price
6.PROC SGPLOT – VISUALIZATION
proc sgplot data=crypto_raw;
scatter x=Market_Cap y=Price;
reg x=Market_Cap y=Price;
title "Market Capitalization vs Price";
run;
OUTPUT:
7.ADDITIONAL PROC – PROC CORR
proc corr data=crypto_raw;
var Market_Cap Price Volatility Transactions Energy_Usage;
run;
OUTPUT:
The CORR Procedure
| 5 Variables: | Market_Cap Price Volatility Transactions Energy_Usage |
|---|
| Simple Statistics | ||||||
|---|---|---|---|---|---|---|
| Variable | N | Mean | Std Dev | Sum | Minimum | Maximum |
| Market_Cap | 12 | 136.75000 | 245.71348 | 1641 | 18.00000 | 850.00000 |
| Price | 12 | 3738 | 12067 | 44852 | 0.15000 | 42000 |
| Volatility | 12 | 6.00833 | 1.52402 | 72.10000 | 3.80000 | 9.20000 |
| Transactions | 12 | 747500 | 490716 | 8970000 | 250000 | 1800000 |
| Energy_Usage | 12 | 71.25000 | 200.84871 | 855.00000 | 2.00000 | 707.00000 |
| Pearson Correlation Coefficients, N = 12 Prob > |r| under H0: Rho=0 | |||||
|---|---|---|---|---|---|
| Market_Cap | Price | Volatility | Transactions | Energy_Usage | |
| Market_Cap | 1.00000 | 0.93449 <.0001 | -0.39796 0.2001 | -0.14777 0.6467 | 0.94128 <.0001 |
| Price | 0.93449 <.0001 | 1.00000 | -0.32574 0.3015 | -0.24805 0.4370 | 0.99951 <.0001 |
| Volatility | -0.39796 0.2001 | -0.32574 0.3015 | 1.00000 | 0.09351 0.7725 | -0.33020 0.2945 |
| Transactions | -0.14777 0.6467 | -0.24805 0.4370 | 0.09351 0.7725 | 1.00000 | -0.25894 0.4164 |
| Energy_Usage | 0.94128 <.0001 | 0.99951 <.0001 | -0.33020 0.2945 | -0.25894 0.4164 | 1.00000 |
8.MACRO FOR RISK GROUPING
%macro risk_group(var=, out=);
if &var < 4 then &out = "Low_Risk";
else if &var < 6 then &out = "Medium_Risk";
else &out = "High_Risk";
%mend risk_group;
data crypto_with_risk;
set crypto_raw;
length Risk_Level $12;
%risk_group(var=Volatility, out=Risk_Level);
run;
proc print data=crypto_with_risk;
run;
OUTPUT:
| Obs | Coin_Name | Launch_Date | Last_Updated | Market_Cap | Price | Volatility | Transactions | Energy_Usage | Days_Since_Launch | Risk_Level |
|---|---|---|---|---|---|---|---|---|---|---|
| 1 | Bitcoin | 03JAN2009 | 03JUL2009 | 850 | 42000.00 | 4.5 | 350000 | 707 | 6190 | Medium_Risk |
| 2 | Ethereum | 30JUL2015 | 30JAN2016 | 380 | 2300.00 | 5.2 | 900000 | 62 | 3791 | Medium_Risk |
| 3 | BinanceCoin | 14JUL2017 | 14JAN2018 | 90 | 310.00 | 3.8 | 1200000 | 12 | 3076 | Low_Risk |
| 4 | Cardano | 29SEP2017 | 29MAR2018 | 45 | 0.55 | 6.1 | 250000 | 6 | 2999 | High_Risk |
| 5 | Solana | 16MAR2020 | 16SEP2020 | 65 | 95.00 | 7.5 | 1800000 | 5 | 2100 | High_Risk |
| 6 | Ripple | 02FEB2013 | 02AUG2013 | 55 | 0.75 | 5.9 | 1400000 | 2 | 4699 | Medium_Risk |
| 7 | Polkadot | 26AUG2020 | 26FEB2021 | 35 | 7.50 | 6.8 | 600000 | 8 | 1937 | High_Risk |
| 8 | Dogecoin | 06DEC2013 | 06JUN2014 | 20 | 0.15 | 9.2 | 450000 | 15 | 4392 | High_Risk |
| 9 | Litecoin | 13OCT2011 | 13APR2012 | 30 | 85.00 | 4.1 | 300000 | 18 | 5177 | Medium_Risk |
| 10 | Avalanche | 21SEP2020 | 21MAR2021 | 28 | 38.00 | 6.4 | 500000 | 7 | 1911 | High_Risk |
| 11 | Chainlink | 19SEP2017 | 19MAR2018 | 25 | 14.00 | 5.7 | 420000 | 9 | 3009 | Medium_Risk |
| 12 | Polygon | 10OCT2017 | 10APR2018 | 18 | 0.90 | 6.9 | 800000 | 4 | 2988 | High_Risk |
9.DATE FUNCTIONS DEMONSTRATION
data crypto_dates;
set crypto_raw;
Months_Since_Launch = intck('month', Launch_Date, today());
Next_Review_Date = intnx('month', today(), 3, 'same');
Launch_Year = year(Launch_Date);
run;
proc print data=crypto_dates;
run;
| Obs | Coin_Name | Launch_Date | Last_Updated | Market_Cap | Price | Volatility | Transactions | Energy_Usage | Days_Since_Launch | Months_Since_Launch | Next_Review_Date | Launch_Year |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | Bitcoin | 03JAN2009 | 03JUL2009 | 850 | 42000.00 | 4.5 | 350000 | 707 | 6190 | 203 | 24180 | 2009 |
| 2 | Ethereum | 30JUL2015 | 30JAN2016 | 380 | 2300.00 | 5.2 | 900000 | 62 | 3791 | 125 | 24180 | 2015 |
| 3 | BinanceCoin | 14JUL2017 | 14JAN2018 | 90 | 310.00 | 3.8 | 1200000 | 12 | 3076 | 101 | 24180 | 2017 |
| 4 | Cardano | 29SEP2017 | 29MAR2018 | 45 | 0.55 | 6.1 | 250000 | 6 | 2999 | 99 | 24180 | 2017 |
| 5 | Solana | 16MAR2020 | 16SEP2020 | 65 | 95.00 | 7.5 | 1800000 | 5 | 2100 | 69 | 24180 | 2020 |
| 6 | Ripple | 02FEB2013 | 02AUG2013 | 55 | 0.75 | 5.9 | 1400000 | 2 | 4699 | 154 | 24180 | 2013 |
| 7 | Polkadot | 26AUG2020 | 26FEB2021 | 35 | 7.50 | 6.8 | 600000 | 8 | 1937 | 64 | 24180 | 2020 |
| 8 | Dogecoin | 06DEC2013 | 06JUN2014 | 20 | 0.15 | 9.2 | 450000 | 15 | 4392 | 144 | 24180 | 2013 |
| 9 | Litecoin | 13OCT2011 | 13APR2012 | 30 | 85.00 | 4.1 | 300000 | 18 | 5177 | 170 | 24180 | 2011 |
| 10 | Avalanche | 21SEP2020 | 21MAR2021 | 28 | 38.00 | 6.4 | 500000 | 7 | 1911 | 63 | 24180 | 2020 |
| 11 | Chainlink | 19SEP2017 | 19MAR2018 | 25 | 14.00 | 5.7 | 420000 | 9 | 3009 | 99 | 24180 | 2017 |
| 12 | Polygon | 10OCT2017 | 10APR2018 | 18 | 0.90 | 6.9 | 800000 | 4 | 2988 | 98 | 24180 | 2017 |
5.CREATING NEW VARIABLE
data logic_speed;
set transport_enhanced;
Speed_Growth = Years_Operational / Speed; (WRONG)
Speed_Growth = Speed / Years_Operational ;
run;
run;
proc print data=logic_speed;
run;
/* Note: In practice above there is an Invalid in this code Find it,Correct it and Use it /*
No comments:
Post a Comment