-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdb.py
131 lines (107 loc) · 4.61 KB
/
db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import constants
import pandas as pd
import datetime
from sqlalchemy import MetaData, Table
def trim(days_ago):
n = datetime.datetime.now()
r = n - datetime.timedelta(days=days_ago)
delete = 'DELETE FROM {} WHERE TIME < \'' + r.isoformat() + '\';'
print(delete)
constants.postgres_engine.execute(delete.format(constants.data_table))
def delete_all():
n = datetime.datetime.now()
delete = 'DELETE FROM {};'
print(delete.format(constants.data_table))
constants.postgres_engine.execute(delete.format(constants.data_table))
constants.postgres_engine.execute(delete.format(constants.counts_table))
constants.postgres_engine.execute(delete.format(constants.locations_table))
def drop_all():
metadata = MetaData(bind=constants.postgres_engine, reflect=True)
d_table = Table(constants.data_table, metadata)
d_table.drop(constants.postgres_engine, checkfirst=True)
c_table = Table(constants.counts_table, metadata)
c_table.drop(constants.postgres_engine, checkfirst=True)
l_table = Table(constants.locations_table, metadata)
l_table.drop(constants.postgres_engine, checkfirst=True)
def get_between_days_ago(ago1, ago2):
if ago1 == ago2:
return
if ago1 > ago2:
t = ago1
ago1 = ago2
ago2 = t
since1 = datetime.datetime.now() - datetime.timedelta(days = ago1)
since2 = datetime.datetime.now() - datetime.timedelta(days = ago2)
selection = 'SELECT * from {} WHERE TIME BETWEEN \''+since2.isoformat()+'\' AND \''+since1.isoformat()+'\' ORDER BY TIME';
stored_df = pd.read_sql(
selection.format(constants.data_table), constants.postgres_engine
)
return stored_df
def get_data(platform):
# In this function, we retrieve the data from postgres using pandas's read_sql method.
# This data is periodically getting updated via a separate Celery Process in tasks.py.
# "dataset_table" is the name of the table that we initialized in tasks.py.
selection = 'SELECT * from {}'
if platform is not None:
selection = selection + ' WHERE PLATFORM_CODE=\'' + str(platform) +'\''
selection = selection + ' ORDER BY TIME;'
updated_df = pd.read_sql(
selection.format(constants.data_table), constants.postgres_engine
)
return updated_df
def test_selection():
selection = 'SELECT * from {}'
selection = selection + ' WHERE PLATFORM_TYPE=\'ARGO\''
selection = selection + ' ORDER BY TIME LIMIT 10;'
updated_df = pd.read_sql(
selection.format(constants.data_table), constants.postgres_engine
)
return updated_df
def exists():
try:
df = test_selection()
except Exception as e:
print(e)
return False
if df is not None:
return True
else:
return False
def get_nobs(grouping):
q = ''
for var in constants.surface_variables:
q = q + "COUNT({}) as {}, ".format(var, var)
for var in constants.depth_variables:
q = q + "COUNT({}) as {}, ".format(var, var)
nobs_df = pd.read_sql(
"SELECT {}, {} FROM {} GROUP BY {};".format(grouping, q[:-2], constants.data_table, grouping), constants.postgres_engine
)
nobs_df['total'] = nobs_df.sum(numeric_only=True, axis=1)
return nobs_df
def get_platform_counts(grouping):
platform_count = pd.read_sql(
"SELECT {}, count(*) FROM {} GROUP BY {} ORDER BY {}".format(grouping, constants.locations_table, grouping, grouping), constants.postgres_engine
)
platform_dict = dict(zip(platform_count[grouping], platform_count['count']))
return platform_count
def get_range(column):
column_range = pd.read_sql(
"SELECT min({}) AS min_{}, MAX({}) as max_{} FROM {}".format(column, column, column, column, constants.data_table), constants.postgres_engine
)
return column_range;
def get_counts():
# In this function, we retrieve the data from postgres using pandas's read_sql method.
# This data is periodically getting updated via a separate Celery Process in tasks.py.
# "dataset_table" is the name of the table that we initialized in tasks.py.
updated_df = pd.read_sql(
"SELECT * FROM {};".format(constants.counts_table), constants.postgres_engine
)
return updated_df
def get_locations():
# In this function, we retrieve the data from postgres using pandas's read_sql method.
# This data is periodically getting updated via a separate Celery Process in tasks.py.
# "dataset_table" is the name of the table that we initialized in tasks.py.
updated_df = pd.read_sql(
"SELECT * FROM {};".format(constants.locations_table), constants.postgres_engine
)
return updated_df