-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate_custom_data_equijoin_result_ref.sh
executable file
·130 lines (112 loc) · 5.29 KB
/
generate_custom_data_equijoin_result_ref.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#!/bin/bash
#############################################################################
#
# Description - this script is responsible for generating the correct equijoin
# result tables AFTER the "./generate_custom_data.py" script
# finishes generating the input customer and purchases tables.
# This way the main C equijoin program has the correct equijoin
# results it needs to verify that it performed the equijoins
# correctly.
#
# Author - Ted Li
#
#
#############################################################################
## GLOBAL VARIABLES DEFINITION SECTION ##
# the "true" command always returns the value programs
# return when they exit successfully, so store that
# value for use later when exiting this program.
EXIT_SUCCESS="$(true && echo "$?")"
# the "false" command always returns the value programs
# return when they exit with a general error, so store that
# value for use later when handling errors
EXIT_FAILURE="$(true && echo "$?")"
# Filepath to the customer data table to be equijoined with
# purchases table
CUSTOMER_TABLE_FILE="./data/custom_customer_data.csv"
# Filepath to purchases table
PURCHASES_TABLE_FILE="./data/custom_purchases_data.csv"
# Filepath to correct equijoin result table containing data only about inactive customers
read CORRECT_EQUIJOIN_TABLE_INACTIVE_CUSTOMERS <<- EOF
./data/custom_results/custom_correct_join_result_inactive_customers.csv
EOF
# Filepath to correct equijoin result table containing data only about active customers
read CORRECT_EQUIJOIN_TABLE_ACTIVE_CUSTOMERS <<- EOF
./data/custom_results/custom_correct_join_result_active_customers.csv
EOF
# Table header for both equijoin result tables; the value of this variable
# is what gets written to each file storing the correct equijoin result
# when sqlite3 doesn't write anything to the file because the equijoin
# result is an empty table.
read EQUIJOIN_RESULT_TABLE_HEADER <<- EOF
epochTimePurchased,customerID,customerName,purchaseEAN13,purchaseQuantity
EOF
## END GLOBAL VARIABLES DEFINITION SECTION ##
## BEGIN MAIN PROGRAM ##
# If sqlite3 isn't installed, then ask user to try running
# this script after installing sqlite3.
if [[ -z $(which sqlite3) ]]; then
printf "\nPlease try running this script again AFTER "
printf "installing sqlite3 on your machine.\n\n"
exit "$EXIT_FAILURE"
fi
# Notify user of next action
printf "\nGenerating correct equijoin results using sqlite3 for main\n"
printf " C program to use for checking correctness of the C program's\n"
printf " equijoin results; this may take some time.\n"
printf "\nImporting '%s' as \"customer_data\" table and \n" "$CUSTOMER_TABLE_FILE"
printf " importing '%s' as \"purchases_data\" table...\n" "$PURCHASES_TABLE_FILE"
# Generate correct equijoin results by joining customer data table
# and purchases data table on customerID column. The results
# consists of two separate csv files where the first file is a
# table consisting only of active customers and the other is a
# table consisting only of inactive customers; sort the results
# by timestamp of each purchase in each resulting table.
sqlite3 -batch -csv -header <<- EOF
.output $CORRECT_EQUIJOIN_TABLE_ACTIVE_CUSTOMERS
.import $CUSTOMER_TABLE_FILE customer_data
.import $PURCHASES_TABLE_FILE purchases_data
SELECT
purchases_data.epochTimePurchased,
customer_data.customerID,
customer_data.customerName,
purchases_data.purchaseEAN13,
purchases_data.purchaseQuantity
FROM purchases_data JOIN customer_data
ON customer_data.customerID = purchases_data.customerID
AND customer_data.isActiveCustomer = 'Y'
ORDER BY purchases_data.epochTimePurchased;
.output $CORRECT_EQUIJOIN_TABLE_INACTIVE_CUSTOMERS
SELECT
purchases_data.epochTimePurchased,
customer_data.customerID,
customer_data.customerName,
purchases_data.purchaseEAN13,
purchases_data.purchaseQuantity
FROM purchases_data JOIN customer_data
ON customer_data.customerID = purchases_data.customerID
AND customer_data.isActiveCustomer = 'N'
ORDER BY purchases_data.epochTimePurchased;
.quit
EOF
# Next two if statements handle edge cases where either of the two
# equijoin results is an empty table. sqlite3 doesn't print anything
# to each file storing the equijoin result table if the corresponding
# equijoin result is an empty table.
#
# As each equijoin results file is simply an empty file if sqlite3
# outputs an empty table to the file, just overwrite the file
# with the actual correct equijoin result for these edge cases.
if [[ $(stat --printf="%s" "$CORRECT_EQUIJOIN_TABLE_ACTIVE_CUSTOMERS") -eq 0 ]]; then
printf "%s\n" "$EQUIJOIN_RESULT_TABLE_HEADER" > "$CORRECT_EQUIJOIN_TABLE_ACTIVE_CUSTOMERS"
fi
if [[ $(stat --printf="%s" "$CORRECT_EQUIJOIN_TABLE_INACTIVE_CUSTOMERS") -eq 0 ]]; then
printf "%s\n" "$EQUIJOIN_RESULT_TABLE_HEADER" > "$CORRECT_EQUIJOIN_TABLE_INACTIVE_CUSTOMERS"
fi
# Notify user where correct equijoin results have been written to
printf "\nCorrect equijoin result table for ACTIVE CUSTOMERS ONLY\n"
printf " has been written to '%s'.\n" "$CORRECT_EQUIJOIN_TABLE_ACTIVE_CUSTOMERS"
printf "\nCorrect equijoin result table for INACTIVE CUSTOMERS ONLY\n"
printf " has been written to '%s'.\n\n" "$CORRECT_EQUIJOIN_TABLE_INACTIVE_CUSTOMERS"
exit "$EXIT_SUCCESS"
## END MAIN PROGRAM ##