-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path04_DataWranglling_OnlineRetailUCI_HomePage.sql
132 lines (103 loc) · 5.29 KB
/
04_DataWranglling_OnlineRetailUCI_HomePage.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
/*EDA - Raw DB*/
-- Retrieve the database
SELECT *
FROM online_retail_UCI_DB..online_retail_main
-- 01 -- Add one empty column name row_state into online_retail_raw table
ALTER TABLE online_retail_UCI_DB..online_retail_raw
ADD row_state NVARCHAR (50)
-- 02 - Create raw view (for HomePage Dashboard - Data source Overview)
CREATE VIEW raw_view_w_RowNum AS
SELECT *,
LEFT(DATENAME(Month,InvoiceDate),3) AS Month_3,
DATENAME(weekday, InvoiceDate) AS dow,
DATEPART(hour, InvoiceDate) AS "hour",
Quantity*UnitPrice AS total_values,
ROW_NUMBER() OVER (PARTITION BY InvoiceNo, StockCode, "Description", Quantity, InvoiceDate, UnitPrice, CustomerID, Country ORDER BY (SELECT NULL)) AS RowNum
FROM online_retail_UCI_DB..online_retail_raw
SELECT *
INTO new_table
FROM raw_view_w_RowNum
UPDATE new_table
SET row_state = 'duplicated rows'
WHERE RowNum > 1
SELECT *
FROM new_table
WHERE row_state IS NULL
UPDATE new_table
SET row_state = 'sale rows'
WHERE RowNum =1 AND
(Quantity > 0 AND UnitPrice > 0 AND StockCode <> 'B')
OR (UnitPrice = 0 AND InvoiceNo NOT LIKE 'C%' AND InvoiceNo NOT LIKE 'A%'
AND "Description" IS NOT NULL AND Quantity > 0 AND CustomerID IS NOT NULL)
UPDATE new_table
SET row_state = 'cancellation rows'
WHERE Quantity < 0 AND InvoiceNo LIKE 'C%' AND RowNum = 1
UPDATE new_table
SET row_state = 'test rows'
WHERE UnitPrice = 0 AND InvoiceNo NOT LIKE 'C%' AND CustomerID IS NULL AND RowNum = 1
UPDATE new_table
SET row_state = 'vague rows'
WHERE InvoiceNo LIKE 'A%'
CREATE VIEW raw_view AS
SELECT *
FROM new_table
SELECT ROUND(100*COUNT(CASE WHEN "row_state" = 'sale rows' THEN "row_state" END)/COUNT( "row_state"),3) AS "Sale rows %",
ROUND(100*COUNT(CASE WHEN "row_state" = 'cancellation rows' THEN "row_state" END)/COUNT( "row_state"),3) AS "Cancellations rows %",
ROUND(100*COUNT(CASE WHEN "row_state" = 'duplicated rows' THEN "row_state" END)/COUNT( "row_state"),3) AS "Duplicated rows %",
ROUND(100*COUNT(CASE WHEN "row_state" = 'test rows' THEN "row_state" END)/COUNT( "row_state"),3) AS "Test rows %",
ROUND(100*COUNT(CASE WHEN "row_state" = 'vague rows' THEN "row_state" END)/COUNT( "row_state"),3) AS "Vague rows %"
FROM new_table
SELECT row_state, COUNT(row_state) AS "count"
FROM raw_view
GROUP BY row_state
-- 03 - Create Test_view
CREATE VIEW test_view AS
SELECT *,
LEFT(DATENAME(Month,InvoiceDate),3) AS Month_3,
DATENAME(weekday, InvoiceDate) AS dow,
DATEPART(hour, InvoiceDate) AS "hour",
Quantity*UnitPrice AS total_values
FROM online_retail_UCI_DB..online_retail_main
WHERE UnitPrice = 0 AND InvoiceNo NOT LIKE 'C%' AND CustomerID IS NULL
-- 04 - Create sale view
CREATE VIEW sale_view AS
SELECT * ,
LEFT(DATENAME(Month,InvoiceDate),3) AS Month_3,
DATENAME(weekday, InvoiceDate) AS dow,
DATEPART(hour, InvoiceDate) AS "hour",
Quantity*UnitPrice AS total_sales
FROM online_retail_UCI_DB..online_retail_main
WHERE (Quantity > 0 AND UnitPrice > 0 AND StockCode <> 'B')
OR (UnitPrice = 0 AND InvoiceNo NOT LIKE 'C%' AND InvoiceNo NOT LIKE 'A%'
AND "Description" IS NOT NULL AND Quantity > 0 AND CustomerID IS NOT NULL)
-- Result: 524,877 associated rows. / 524,917
/* The charateristic of each column in sale_analysis view:
InvoiceNo: starting without letter C and A, nvarchar
StockCode: not NULL, nvarchar
Description: Product Name, nvarchar
Quantity: positive value, int
InvoiceDate: not NULL, datetime
UnitPrice: positive value
CustomerID: nvarchar, contains NULL and not NULL values
Country: nvarchar */
-- 05 - Create cancellation view
CREATE VIEW cancellation_view AS
SELECT *,
LEFT(DATENAME(Month,InvoiceDate),3) AS Month_3,
DATENAME(weekday, InvoiceDate) AS dow,
DATEPART(hour, InvoiceDate) AS "hour",
ABS(Quantity)*UnitPrice AS total_cancelled_value
FROM online_retail_UCI_DB..online_retail_main
WHERE Quantity < 0 AND InvoiceNo LIKE 'C%'\
SELECT *
FROM
-- Result: 9,251 associated rows.
/* The charateristic of each column in sale_analysis view:
InvoiceNo: only starting with letter C, nvarchar
StockCode: not NULL, nvarchar
Description: Product Name, nvarchar
Quantity: negative value, int
InvoiceDate: not NULL, datetime
UnitPrice: positive value
CustomerID: nvarchar, contains NULL and not NULL values
Country: nvarchar */