A NEW EXAMPLE : DO | SELECT | PROC SORT | PROC MEANS | PROC FREQ | PROC REPORT | PROC SQL | PROC SGPLOT | PROC TABULATE | PROC EXPORT | PROC UNIVARIATE | PROC GCHART | PROC FORMAT | PROC TRANSPOSE | PROC PRINT | PROC CORR | PROC REG
/* Step 1: Creating a Pharmaceutical Sales Dataset */
DATA PharmaSales;
LENGTH Region $10 Product $20 SalesRep $15;
FORMAT SaleDate DATE9. Revenue DOLLAR10.2;
DO i = 1 TO 5000;
/* Assign random values for different attributes */
SELECT (CEIL(RAND("UNIFORM") * 4));
WHEN (1) Region = 'North';
WHEN (2) Region = 'South';
WHEN (3) Region = 'East';
OTHERWISE Region = 'West';
END;
SELECT (CEIL(RAND("UNIFORM") * 5));
WHEN (1) Product = 'Painkiller';
WHEN (2) Product = 'Antibiotic';
WHEN (3) Product = 'Antacid';
WHEN (4) Product = 'Antiviral';
OTHERWISE Product = 'Antifungal';
END;
SalesRep = CAT('Rep', PUT(CEIL(RAND("UNIFORM") * 50), 2.));
SaleDate = INTNX('DAY', '01JAN2024'D, FLOOR(RAND("UNIFORM") * 90));
UnitsSold = CEIL(RAND("UNIFORM") * 100);
PricePerUnit = ROUND(50 + RAND("UNIFORM") * 200, 0.01);
Revenue = UnitsSold * PricePerUnit;
OUTPUT;
END;
DROP i;
RUN;
PROC PRINT DATA=PharmaSales;
RUN;
Output:
| Obs | Region | Product | SalesRep | SaleDate | Revenue | UnitsSold | PricePerUnit |
|---|---|---|---|---|---|---|---|
| 1 | East | Antiviral | Rep13 | 15FEB2024 | $4,083.20 | 20 | 204.16 |
| 2 | South | Painkiller | Rep41 | 28MAR2024 | $2,705.81 | 37 | 73.13 |
| 3 | North | Painkiller | Rep45 | 17MAR2024 | $8,454.60 | 63 | 134.20 |
| 4 | East | Painkiller | Rep50 | 06MAR2024 | $14,518.14 | 78 | 186.13 |
| 5 | East | Antiviral | Rep25 | 20FEB2024 | $13,198.08 | 96 | 137.48 |
| 6 | North | Painkiller | Rep42 | 27JAN2024 | $999.54 | 9 | 111.06 |
| 7 | East | Antifungal | Rep17 | 17JAN2024 | $1,665.93 | 21 | 79.33 |
| 8 | East | Antacid | Rep36 | 19JAN2024 | $9,807.55 | 73 | 134.35 |
| 9 | West | Antibiotic | Rep 1 | 30JAN2024 | $7,102.50 | 75 | 94.70 |
| 10 | West | Antacid | Rep46 | 28MAR2024 | $2,331.34 | 22 | 105.97 |
| 11 | South | Antifungal | Rep31 | 21JAN2024 | $3,659.66 | 41 | 89.26 |
| 12 | North | Antibiotic | Rep48 | 12JAN2024 | $10,140.48 | 72 | 140.84 |
| 13 | South | Antifungal | Rep40 | 16FEB2024 | $18,027.96 | 76 | 237.21 |
| 14 | North | Painkiller | Rep44 | 14FEB2024 | $10,292.68 | 58 | 177.46 |
| 15 | West | Antiviral | Rep31 | 23FEB2024 | $6,316.18 | 34 | 185.77 |
/* Step 2: Sorting Data */
PROC SORT DATA=PharmaSales;
BY Region Product SaleDate;
RUN;
PROC PRINT DATA=PharmaSales;
RUN;
Output:
| Obs | Region | Product | SalesRep | SaleDate | Revenue | UnitsSold | PricePerUnit |
|---|---|---|---|---|---|---|---|
| 1 | East | Antacid | Rep 4 | 01JAN2024 | $12,426.00 | 76 | 163.50 |
| 2 | East | Antacid | Rep36 | 01JAN2024 | $16,466.10 | 70 | 235.23 |
| 3 | East | Antacid | Rep41 | 02JAN2024 | $2,375.67 | 11 | 215.97 |
| 4 | East | Antacid | Rep31 | 02JAN2024 | $11,127.84 | 48 | 231.83 |
| 5 | East | Antacid | Rep15 | 02JAN2024 | $9,700.56 | 54 | 179.64 |
| 6 | East | Antacid | Rep23 | 03JAN2024 | $8,348.54 | 46 | 181.49 |
| 7 | East | Antacid | Rep22 | 03JAN2024 | $4,703.00 | 50 | 94.06 |
| 8 | East | Antacid | Rep14 | 03JAN2024 | $377.92 | 2 | 188.96 |
| 9 | East | Antacid | Rep15 | 03JAN2024 | $16,551.29 | 79 | 209.51 |
| 10 | East | Antacid | Rep 1 | 03JAN2024 | $18,342.72 | 88 | 208.44 |
| 11 | East | Antacid | Rep17 | 03JAN2024 | $2,902.50 | 45 | 64.50 |
| 12 | East | Antacid | Rep 3 | 03JAN2024 | $4,250.55 | 43 | 98.85 |
| 13 | East | Antacid | Rep19 | 04JAN2024 | $16,507.92 | 78 | 211.64 |
| 14 | East | Antacid | Rep13 | 04JAN2024 | $2,138.15 | 35 | 61.09 |
| 15 | East | Antacid | Rep27 | 05JAN2024 | $5,492.19 | 51 | 107.69 |
/* Step 3: Descriptive Statistics */
PROC MEANS DATA=PharmaSales MEAN MEDIAN MIN MAX STD;
CLASS Region;
VAR Revenue UnitsSold;
RUN;
Output:
| Region | N Obs | Variable | Mean | Median | Minimum | Maximum | Std Dev | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| East | 1236 |
|
|
|
|
|
| ||||||||||||
| North | 1242 |
|
|
|
|
|
| ||||||||||||
| South | 1282 |
|
|
|
|
|
| ||||||||||||
| West | 1240 |
|
|
|
|
|
|
/* Step 4: Frequency Analysis */
PROC FREQ DATA=PharmaSales;
TABLES Region Product SalesRep / NOCUM;
RUN;
Output:
| Region | Frequency | Percent |
|---|---|---|
| East | 1236 | 24.72 |
| North | 1242 | 24.84 |
| South | 1282 | 25.64 |
| West | 1240 | 24.80 |
| Product | Frequency | Percent |
|---|---|---|
| Antacid | 1059 | 21.18 |
| Antibiotic | 916 | 18.32 |
| Antifungal | 979 | 19.58 |
| Antiviral | 1014 | 20.28 |
| Painkiller | 1032 | 20.64 |
| SalesRep | Frequency | Percent |
|---|---|---|
| Rep 1 | 97 | 1.94 |
| Rep 2 | 102 | 2.04 |
| Rep 3 | 108 | 2.16 |
| Rep 4 | 96 | 1.92 |
| Rep 5 | 87 | 1.74 |
| Rep 6 | 99 | 1.98 |
| Rep 7 | 113 | 2.26 |
| Rep 8 | 92 | 1.84 |
| Rep 9 | 89 | 1.78 |
| Rep10 | 104 | 2.08 |
/* Step 5: Creating a Summary Report */
PROC REPORT DATA=PharmaSales NOWD;
COLUMN Region Product UnitsSold Revenue;
DEFINE Region / GROUP;
DEFINE Product / GROUP;
DEFINE UnitsSold / ANALYSIS SUM FORMAT=COMMA10.;
DEFINE Revenue / ANALYSIS SUM FORMAT=DOLLAR12.2;
RUN;
Output:
| Region | Product | UnitsSold | Revenue |
|---|---|---|---|
| East | Antacid | 12,921 | $2058392.86 |
| Antibiotic | 11,063 | $1732170.26 | |
| Antifungal | 12,821 | $1884646.04 | |
| Antiviral | 13,939 | $2090965.07 | |
| Painkiller | 11,869 | $1768682.57 | |
| North | Antacid | 12,318 | $1818173.25 |
| Antibiotic | 11,288 | $1684153.30 | |
| Antifungal | 12,831 | $1838029.24 | |
| Antiviral | 12,201 | $1858553.86 | |
| Painkiller | 13,882 | $2032229.38 | |
| South | Antacid | 14,209 | $2169839.75 |
| Antibiotic | 11,466 | $1746899.04 | |
| Antifungal | 13,079 | $1944976.91 | |
| Antiviral | 13,178 | $2023474.02 | |
| Painkiller | 14,469 | $2050371.03 | |
| West | Antacid | 13,034 | $1981865.19 |
| Antibiotic | 11,341 | $1771929.76 | |
| Antifungal | 11,666 | $1752772.69 | |
| Antiviral | 11,862 | $1742710.39 | |
| Painkiller | 11,872 | $1718821.30 |
/* Step 6: Using SQL to Aggregate Data */
PROC SQL;
CREATE TABLE SalesSummary AS
SELECT Region, Product, SUM(UnitsSold) AS TotalUnits, SUM(Revenue) AS TotalRevenue
FROM PharmaSales
GROUP BY Region, Product;
QUIT;
PROC PRINT DATA=SalesSummary;
RUN;
Output:
| Obs | Region | Product | TotalUnits | TotalRevenue |
|---|---|---|---|---|
| 1 | East | Antacid | 12921 | 2058392.86 |
| 2 | East | Antibiotic | 11063 | 1732170.26 |
| 3 | East | Antifungal | 12821 | 1884646.04 |
| 4 | East | Antiviral | 13939 | 2090965.07 |
| 5 | East | Painkiller | 11869 | 1768682.57 |
| 6 | North | Antacid | 12318 | 1818173.25 |
| 7 | North | Antibiotic | 11288 | 1684153.30 |
| 8 | North | Antifungal | 12831 | 1838029.24 |
| 9 | North | Antiviral | 12201 | 1858553.86 |
| 10 | North | Painkiller | 13882 | 2032229.38 |
| 11 | South | Antacid | 14209 | 2169839.75 |
| 12 | South | Antibiotic | 11466 | 1746899.04 |
| 13 | South | Antifungal | 13079 | 1944976.91 |
| 14 | South | Antiviral | 13178 | 2023474.02 |
| 15 | South | Painkiller | 14469 | 2050371.03 |
| 16 | West | Antacid | 13034 | 1981865.19 |
| 17 | West | Antibiotic | 11341 | 1771929.76 |
| 18 | West | Antifungal | 11666 | 1752772.69 |
| 19 | West | Antiviral | 11862 | 1742710.39 |
| 20 | West | Painkiller | 11872 | 1718821.30 |
/* Step 7: Generating a Graph */
PROC SGPLOT DATA=PharmaSales;
VBAR Product / RESPONSE=Revenue STAT=SUM DATALABEL;
TITLE "Total Revenue by Product";
RUN;
Log:
NOTE: PROCEDURE SGPLOT used (Total process time):
real time 2.76 seconds
cpu time 0.42 seconds
NOTE: Listing image output written to SGPlot1.png.
NOTE: There were 5000 observations read from the data set WORK.PHARMASALES.
/* Step 8: Tabulating Data */
PROC TABULATE DATA=PharmaSales FORMAT=DOLLAR10.2;
CLASS Region Product;
VAR Revenue;
TABLE Region*Product, Revenue*(SUM MEAN);
RUN;
Output:
| Revenue | |||
|---|---|---|---|
| Sum | Mean | ||
| Region | Product | 2058392.86 | $7,767.52 |
| East | Antacid | ||
| Antibiotic | 1732170.26 | $7,767.58 | |
| Antifungal | 1884646.04 | $7,885.55 | |
| Antiviral | 2090965.07 | $7,548.61 | |
| Painkiller | 1768682.57 | $7,623.63 | |
| North | Antacid | 1818173.25 | $7,331.34 |
| Antibiotic | 1684153.30 | $7,620.60 | |
| Antifungal | 1838029.24 | $7,124.14 | |
| Antiviral | 1858553.86 | $7,711.84 | |
| Painkiller | 2032229.38 | $7,416.90 | |
| South | Antacid | 2169839.75 | $8,250.34 |
| Antibiotic | 1746899.04 | $7,595.21 | |
| Antifungal | 1944976.91 | $7,779.91 | |
| Antiviral | 2023474.02 | $7,935.19 | |
| Painkiller | 2050371.03 | $7,219.62 | |
| West | Antacid | 1981865.19 | $7,003.06 |
| Antibiotic | 1771929.76 | $7,322.02 | |
| Antifungal | 1752772.69 | $7,555.05 | |
| Antiviral | 1742710.39 | $7,231.16 | |
| Painkiller | 1718821.30 | $7,102.57 | |
/* Step 9: Exporting Data to a CSV File */
PROC EXPORT DATA=SalesSummary
OUTFILE="C:\\Users\\YourName\\Documents\\SalesSummary.csv"
DBMS=CSV REPLACE;
PUTNAMES=YES;
RUN;
Log:
NOTE: The file"C:\\Users\\YourName\\Documents\\SalesSummary.csv" is:
Filename=C:\\Users\\YourName\\Documents\\SalesSummary.csv,
RECFM=V,LRECL=32767,File Size (bytes)=0,
Last Modified=24 March 2025 08:59:36,
Create Time=24 March 2025 08:59:36
NOTE: 21 records were written to the file "C:\\Users\\YourName\\Documents\\SalesSummary.csv".
The minimum record length was 29.
The maximum record length was 38.
NOTE: There were 20 observations read from the data set WORK.SALESSUMMARY.
NOTE: DATA statement used (Total process time):
real time 0.35 seconds
cpu time 0.06 seconds
/* Step 10: Univariate Analysis for Revenue Distribution */
PROC UNIVARIATE DATA=PharmaSales;
VAR Revenue;
HISTOGRAM Revenue / NORMAL;
PROBPLOT Revenue;
RUN;
Output:
| Moments | |||
|---|---|---|---|
| N | 5000 | Sum Weights | 5000 |
| Mean | 7533.93118 | Sum Observations | 37669655.9 |
| Std Deviation | 5385.68296 | Variance | 29005580.9 |
| Skewness | 0.75511335 | Kurtosis | -0.1421338 |
| Uncorrected SS | 4.28799E11 | Corrected SS | 1.44999E11 |
| Coeff Variation | 71.4856936 | Std Error Mean | 76.1650588 |
| Parameters for Normal Distribution | ||
|---|---|---|
| Parameter | Symbol | Estimate |
| Mean | Mu | 7533.931 |
| Std Dev | Sigma | 5385.683 |
/* Step 11: Creating a Pie Chart for Product Sales */
PROC GCHART DATA=PharmaSales;
PIE Product / SUMVAR=Revenue VALUE=INSIDE SLICE=OUTSIDE;
TITLE "Revenue Distribution by Product";
RUN;
Log:
NOTE: 37748 bytes written to C:\Users\Lenovo\AppData\Local\Temp\SAS Temporary
Files\_TD7760_DESKTOP-QFAA4KV_\gchart.png.
/* Step 12: Using PROC FORMAT to Categorize Revenue */
PROC FORMAT;
VALUE RevenueGrp
LOW - 5000 = 'Low Revenue'
5001 - 15000 = 'Medium Revenue'
15001 - HIGH = 'High Revenue';
RUN;
DATA PharmaSalesFormatted;
SET PharmaSales;
RevenueCategory = PUT(Revenue, RevenueGrp.);
RUN;
PROC PRINT DATA=PharmaSalesFormatted;
RUN;
Output:
| Obs | Region | Product | SalesRep | SaleDate | Revenue | UnitsSold | PricePerUnit | RevenueCategory |
|---|---|---|---|---|---|---|---|---|
| 1 | East | Antacid | Rep 4 | 01JAN2024 | $12,426.00 | 76 | 163.50 | Medium Revenue |
| 2 | East | Antacid | Rep36 | 01JAN2024 | $16,466.10 | 70 | 235.23 | High Revenue |
| 3 | East | Antacid | Rep41 | 02JAN2024 | $2,375.67 | 11 | 215.97 | Low Revenue |
| 4 | East | Antacid | Rep31 | 02JAN2024 | $11,127.84 | 48 | 231.83 | Medium Revenue |
| 5 | East | Antacid | Rep15 | 02JAN2024 | $9,700.56 | 54 | 179.64 | Medium Revenue |
| 6 | East | Antacid | Rep23 | 03JAN2024 | $8,348.54 | 46 | 181.49 | Medium Revenue |
| 7 | East | Antacid | Rep22 | 03JAN2024 | $4,703.00 | 50 | 94.06 | Low Revenue |
| 8 | East | Antacid | Rep14 | 03JAN2024 | $377.92 | 2 | 188.96 | Low Revenue |
| 9 | East | Antacid | Rep15 | 03JAN2024 | $16,551.29 | 79 | 209.51 | High Revenue |
| 10 | East | Antacid | Rep 1 | 03JAN2024 | $18,342.72 | 88 | 208.44 | High Revenue |
| 11 | East | Antacid | Rep17 | 03JAN2024 | $2,902.50 | 45 | 64.50 | Low Revenue |
| 12 | East | Antacid | Rep 3 | 03JAN2024 | $4,250.55 | 43 | 98.85 | Low Revenue |
| 13 | East | Antacid | Rep19 | 04JAN2024 | $16,507.92 | 78 | 211.64 | High Revenue |
| 14 | East | Antacid | Rep13 | 04JAN2024 | $2,138.15 | 35 | 61.09 | Low Revenue |
| 15 | East | Antacid | Rep27 | 05JAN2024 | $5,492.19 | 51 | 107.69 | Medium Revenue |
/* Step 13: Creating a Cross-tabulation Report */
PROC TABULATE DATA=PharmaSalesFormatted;
CLASS Region RevenueCategory;
VAR Revenue;
TABLE Region, RevenueCategory*Revenue*(N SUM MEAN);
RUN;
Output:
| RevenueCategory | |||||||||
|---|---|---|---|---|---|---|---|---|---|
| High Revenue | Low Revenue | Medium Revenue | |||||||
| Revenue | Revenue | Revenue | |||||||
| N | Sum | Mean | N | Sum | Mean | N | Sum | Mean | |
| Region | 150 | 2749216.04 | 18328.11 | 458 | 1164505.28 | 2542.59 | 628 | 5621135.48 | 8950.85 |
| East | |||||||||
| North | 129 | 2374670.33 | 18408.30 | 495 | 1243509.53 | 2512.14 | 618 | 5612959.17 | 9082.46 |
| South | 149 | 2711658.71 | 18199.05 | 479 | 1251274.52 | 2612.26 | 654 | 5972627.52 | 9132.46 |
| West | 135 | 2407497.95 | 17833.32 | 526 | 1283081.03 | 2439.32 | 579 | 5277520.35 | 9114.89 |
/* Step 14: Using PROC TRANSPOSE to Pivot Data */
PROC TRANSPOSE DATA=SalesSummary OUT=SalesTransposed;
BY Region;
VAR TotalUnits TotalRevenue;
ID Product;
RUN;
PROC PRINT DATA=SalesTransposed;
RUN;
Output:
| Obs | Region | _NAME_ | Antacid | Antibiotic | Antifungal | Antiviral | Painkiller |
|---|---|---|---|---|---|---|---|
| 1 | East | TotalUnits | 12921.00 | 11063.00 | 12821.00 | 13939.00 | 11869.00 |
| 2 | East | TotalRevenue | 2058392.86 | 1732170.26 | 1884646.04 | 2090965.07 | 1768682.57 |
| 3 | North | TotalUnits | 12318.00 | 11288.00 | 12831.00 | 12201.00 | 13882.00 |
| 4 | North | TotalRevenue | 1818173.25 | 1684153.30 | 1838029.24 | 1858553.86 | 2032229.38 |
| 5 | South | TotalUnits | 14209.00 | 11466.00 | 13079.00 | 13178.00 | 14469.00 |
| 6 | South | TotalRevenue | 2169839.75 | 1746899.04 | 1944976.91 | 2023474.02 | 2050371.03 |
| 7 | West | TotalUnits | 13034.00 | 11341.00 | 11666.00 | 11862.00 | 11872.00 |
| 8 | West | TotalRevenue | 1981865.19 | 1771929.76 | 1752772.69 | 1742710.39 | 1718821.30 |
/* Step 15: Generating a Line Plot for Revenue Trends */
PROC SGPLOT DATA=PharmaSales;
SERIES X=SaleDate Y=Revenue / GROUP=Region;
TITLE "Revenue Trends Over Time";
RUN;
Log:
NOTE: PROCEDURE SGPLOT used (Total process time):
real time 0.90 seconds
cpu time 0.14 seconds
NOTE: Marker and line antialiasing has been disabled because the threshold has been reached. You can
set ANTIALIASMAX=5000 in the ODS GRAPHICS statement to restore antialiasing.
NOTE: Marker and line antialiasing has been disabled because the threshold has been reached. You can
set ANTIALIASMAX=5000 in the ODS GRAPHICS statement to restore antialiasing.
NOTE: Listing image output written to SGPlot3.png.
NOTE: There were 5000 observations read from the data set WORK.PHARMASALES.
/* Step 16: Creating a Macro for Automated Analysis */
%MACRO AnalyzeSales(region);
PROC MEANS DATA=PharmaSales MEAN MIN MAX;
WHERE Region = "®ion";
VAR Revenue UnitsSold;
RUN;
%MEND AnalyzeSales;
%AnalyzeSales(North);
Output:
| Variable | Mean | Minimum | Maximum | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
|
|
%AnalyzeSales(South);
Output:
| Variable | Mean | Minimum | Maximum | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
|
|
%AnalyzeSales(East);
Output:
| Variable | Mean | Minimum | Maximum | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
|
|
%AnalyzeSales(West);
Output:
| Variable | Mean | Minimum | Maximum | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
|
|
/* Step 17: Checking for Missing Values */
PROC MEANS DATA=PharmaSales N NMISS;
VAR Revenue UnitsSold;
RUN;
Output:
| Variable | N | N Miss | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
|
/* Step 18: Creating a Boxplot to Analyze Revenue Distribution */
PROC SGPLOT DATA=PharmaSales;
VBOX Revenue / CATEGORY=Product;
TITLE "Revenue Distribution by Product";
RUN;
Log:
NOTE: PROCEDURE SGPLOT used (Total process time):
real time 0.43 seconds
cpu time 0.04 seconds
NOTE: Listing image output written to SGPlot5.png.
NOTE: There were 5000 observations read from the data set WORK.PHARMASALES.
/* Step 19: Merging Data with External Dataset (Simulated) */
DATA ExternalData;
INPUT Region $ MarketShare;
DATALINES;
North 25
South 30
East 20
West 25
;
RUN;
DATA PharmaSalesMerged;
MERGE PharmaSales (IN=a) ExternalData (IN=b);
BY Region;
IF a AND b;
RUN;
Output:
/* Step 20: Final Summary Report */
PROC PRINT DATA=PharmaSalesMerged;
TITLE "Final Merged Sales Report";
RUN;
Output:
| Obs | Region | Product | SalesRep | SaleDate | Revenue | UnitsSold | PricePerUnit | MarketShare |
|---|---|---|---|---|---|---|---|---|
| 1 | North | Antacid | Rep17 | 01JAN2024 | $21,668.88 | 94 | 230.52 | 25 |
| 2 | North | Antacid | Rep12 | 01JAN2024 | $1,999.80 | 11 | 181.80 | 25 |
| 3 | North | Antacid | Rep 6 | 01JAN2024 | $1,918.54 | 26 | 73.79 | 25 |
| 4 | North | Antacid | Rep14 | 02JAN2024 | $3,275.89 | 23 | 142.43 | 25 |
| 5 | North | Antacid | Rep10 | 03JAN2024 | $12,016.40 | 88 | 136.55 | 25 |
| 6 | North | Antacid | Rep27 | 04JAN2024 | $318.34 | 2 | 159.17 | 25 |
| 7 | North | Antacid | Rep34 | 04JAN2024 | $4,489.10 | 22 | 204.05 | 25 |
| 8 | North | Antacid | Rep11 | 04JAN2024 | $8,442.84 | 69 | 122.36 | 25 |
| 9 | North | Antacid | Rep36 | 04JAN2024 | $602.00 | 5 | 120.40 | 25 |
| 10 | North | Antacid | Rep38 | 04JAN2024 | $7,311.70 | 85 | 86.02 | 25 |
| 11 | North | Antacid | Rep49 | 05JAN2024 | $6,221.76 | 32 | 194.43 | 25 |
| 12 | North | Antacid | Rep35 | 05JAN2024 | $13,522.80 | 60 | 225.38 | 25 |
| 13 | North | Antacid | Rep11 | 05JAN2024 | $17,137.84 | 89 | 192.56 | 25 |
| 14 | North | Antacid | Rep38 | 06JAN2024 | $7,952.49 | 63 | 126.23 | 25 |
| 15 | North | Antacid | Rep25 | 06JAN2024 | $10,245.42 | 54 | 189.73 | 25 |
/* Step 21: Additional Statistical Analysis */
PROC CORR DATA=PharmaSales;
VAR UnitsSold Revenue;
TITLE "Correlation Between Units Sold and Revenue";
RUN;
Output:
| Correlation Between Units Sold and Revenue |
| 2 Variables: | UnitsSold Revenue |
|---|
| Simple Statistics | ||||||
|---|---|---|---|---|---|---|
| Variable | N | Mean | Std Dev | Sum | Minimum | Maximum |
| UnitsSold | 5000 | 50.26180 | 28.58422 | 251309 | 1.00000 | 100.00000 |
| Revenue | 5000 | 7534 | 5386 | 37669656 | 52.60000 | 24523 |
| Pearson Correlation
Coefficients, N = 5000 Prob > |r| under H0: Rho=0 | ||||||
|---|---|---|---|---|---|---|
| UnitsSold | Revenue | |||||
| UnitsSold |
|
| ||||
| Revenue |
|
| ||||
PROC REG DATA=PharmaSales;
MODEL Revenue = UnitsSold PricePerUnit;
TITLE "Regression Analysis of Revenue";
RUN;
Output:
Regression Analysis of Revenue
| Number of Observations Read | 5000 |
|---|---|
| Number of Observations Used | 5000 |
| Analysis of Variance | |||||
|---|---|---|---|---|---|
| Source | DF | Sum of Squares |
Mean Square |
F Value | Pr > F |
| Model | 2 | 1.316928E11 | 65846375739 | 24728.0 | <.0001 |
| Error | 4997 | 13306147624 | 2662827 | ||
| Corrected Total | 4999 | 1.449989E11 | |||
| Root MSE | 1631.81715 | R-Square | 0.9082 |
|---|---|---|---|
| Dependent Mean | 7533.93118 | Adj R-Sq | 0.9082 |
| Coeff Var | 21.65957 |
| Parameter Estimates | |||||
|---|---|---|---|---|---|
| Variable | DF | Parameter Estimate |
Standard Error |
t Value | Pr > |t| |
| Intercept | 1 | -7556.41606 | 76.92294 | -98.23 | <.0001 |
| UnitsSold | 1 | 150.53167 | 0.80755 | 186.41 | <.0001 |
| PricePerUnit | 1 | 50.00734 | 0.40160 | 124.52 | <.0001 |
/* Step 22: Generating Boxplot for Units Sold by Region */
PROC SGPLOT DATA=PharmaSales;
VBOX UnitsSold / CATEGORY=Region;
TITLE "Units Sold Distribution by Region";
RUN;
Log:
NOTE: PROCEDURE SGPLOT used (Total process time):
real time 0.40 seconds
cpu time 0.04 seconds
NOTE: Listing image output written to SGPlot7.png.
NOTE: There were 5000 observations read from the data set WORK.PHARMASALES.
Note: Here Out Of 5000 Only 15 Observations Are Given.
No comments:
Post a Comment