-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclean_loans_data.py
executable file
·63 lines (46 loc) · 1.4 KB
/
clean_loans_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/local/bin/python
import csv
INFILENAME = 'loansData.csv'
OUTFILENAME = 'loansData_clean.csv'
def clean_loan_length(ll):
return {
"36 months": 36,
"60 months": 60,
}[ll]
def clean_employment_length(el):
return {
"< 1 year": 0,
"2 years": 2,
"5 years": 5,
"9 years": 9,
"3 years": 3,
"10+ years": 10,
"8 years": 8,
"6 years": 6,
"1 year": 1,
"7 years": 7,
"4 years": 4,
"n/a": 0,
}[el]
def clean_fico_range(fico_range):
low, hi = fico_range.split("-")
low, hi = int(low), int(hi)
return (hi - low) / 2 + low
def clean_percent(p):
return p.replace("%", "")
def clean_line(line):
line["FICO.Range"] = clean_fico_range(line["FICO.Range"])
line["Interest.Rate"] = clean_percent(line["Interest.Rate"])
line["Debt.To.Income.Ratio"] = clean_percent(line["Debt.To.Income.Ratio"])
line["Employment.Length"] = clean_employment_length(line["Employment.Length"])
line["Loan.Length"] = clean_loan_length(line["Loan.Length"])
return line
def main(infilename=INFILENAME, outfilename=OUTFILENAME):
infile = open(outfilename)
outfile = open(infilename, "w")
reader = csv.DictReader(infile)
writer = csv.DictWriter(outfile, reader.fieldnames)
for aline in reader:
writer.writeline(clean_line(aline))
if __name__ == "__main__":
main()