-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathacquire.py
64 lines (53 loc) · 2.26 KB
/
acquire.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import pandas as pd
import numpy as np
import os
import env
# In[ ]:
# creating a connection to connect to the Codeup Student Database
def get_connection(db, user=env.user, host=env.host, password=env.password):
return f'mysql+pymysql://{user}:{password}@{host}/{db}'
# In[ ]:
def get_zillow_data():
'''This function will connect to the Codeup Student Database. It will then cache a local copy to the computer to use for later
in the form of a CSV file. If you want to reproduce the results, you will need your own env.py file and database credentials.'''
filename = "zillow_db.csv"
if os.path.isfile(filename):
return pd.read_csv(filename)
else:
# read the SQL query into a dataframe
df = pd.read_sql('''
SELECT prop.*,
pred.logerror,
pred.transactiondate,
air.airconditioningdesc,
arch.architecturalstyledesc,
build.buildingclassdesc,
heat.heatingorsystemdesc,
landuse.propertylandusedesc,
story.storydesc,
construct.typeconstructiondesc
FROM properties_2017 prop
INNER JOIN (SELECT parcelid,
logerror,
Max(transactiondate) transactiondate
FROM predictions_2017
GROUP BY parcelid, logerror) pred
USING (parcelid)
LEFT JOIN airconditioningtype air USING (airconditioningtypeid)
LEFT JOIN architecturalstyletype arch USING (architecturalstyletypeid)
LEFT JOIN buildingclasstype build USING (buildingclasstypeid)
LEFT JOIN heatingorsystemtype heat USING (heatingorsystemtypeid)
LEFT JOIN propertylandusetype landuse USING (propertylandusetypeid)
LEFT JOIN storytype story USING (storytypeid)
LEFT JOIN typeconstructiontype construct USING (typeconstructiontypeid)
WHERE prop.latitude IS NOT NULL
AND prop.longitude IS NOT NULL
AND (propertylandusetypeid IN (261, 262, 263, 264, 268, 273, 274, 276, 279));
''' , get_connection('zillow'))
# Write that dataframe to disk for later. Called "caching" the data for later.
df.to_csv('zillow_db.csv')
# Return the dataframe to the calling code
return df