-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix issues related to repo restructure
- Fixed all imports - Fixed test and coverage settings in pyproject.toml - Removed all Python path magic in __init.py__ files - Moved data files into the repo, and used `importlib` to load files by package name instead of path. This is more portable, especially once we turn this into a distributable package. - Refactored global_data to only load data once at module load time
- Loading branch information
Showing
16 changed files
with
39 additions
and
87 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +0,0 @@ | ||
import os | ||
import sys | ||
|
||
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||
sys.path.append(ROOT_DIR) | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
File renamed without changes.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,36 +1,20 @@ | ||
import os | ||
import sys | ||
import csv | ||
from importlib.resources import files | ||
|
||
import pandas as pd | ||
|
||
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # noqa: E402 | ||
sys.path.append(ROOT_DIR) # noqa: E402 | ||
# global variable for NAICS codes | ||
naics_codes: dict[str,str] = {} | ||
naics_file_path = files('regtech_data_validator.data.naics').joinpath('2022_codes.csv') | ||
|
||
from config import CENSUS_PROCESSED_CSV_PATH, NAICS_CSV_PATH # noqa: E402 | ||
with naics_file_path.open('r') as f: | ||
for row in csv.DictReader(f): | ||
naics_codes[row['code']] = row['title'] | ||
|
||
naics_codes = {} | ||
|
||
# global variable for geoids | ||
census_geoids = {} | ||
# global variable for Census GEOIDs | ||
census_geoids: set[str] = set() | ||
census_file_path = files('regtech_data_validator.data.census').joinpath('Census2022.processed.csv') | ||
|
||
|
||
def read_naics_codes(csv_path: str = NAICS_CSV_PATH): | ||
""" | ||
read NAICS CSV file with this format: (code, description) | ||
and populate global value: naics_codes | ||
""" | ||
naics_codes.clear() | ||
df = pd.read_csv(csv_path, dtype=str, na_filter=False) | ||
for _, row in df.iterrows(): | ||
naics_codes.update({row.iloc[0]: row.iloc[1]}) | ||
|
||
|
||
def read_geoids(csv_path: str = CENSUS_PROCESSED_CSV_PATH): | ||
""" | ||
read geoids CSV file with this format: (code) | ||
and populate global value: census_geoids | ||
""" | ||
census_geoids.clear() | ||
df = pd.read_csv(csv_path, dtype=str, na_filter=False) | ||
for _, row in df.iterrows(): | ||
census_geoids.update({row.iloc[0]: None}) | ||
with census_file_path.open('r') as f: | ||
for row in csv.DictReader(f): | ||
census_geoids.add(row['geoid']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +0,0 @@ | ||
import os | ||
import sys | ||
|
||
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | ||
|
||
sys.path.append(os.path.join(ROOT_DIR, "validator")) | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,25 +1,11 @@ | ||
import pytest | ||
|
||
from validator import global_data | ||
from regtech_data_validator import global_data | ||
|
||
|
||
class TestGlobalData: | ||
def test_valid_naics_codes(self): | ||
global_data.read_naics_codes() | ||
assert len(global_data.naics_codes) == 96 | ||
|
||
def test_valid_geoids(self): | ||
global_data.read_geoids() | ||
assert len(global_data.census_geoids) == 87275 | ||
|
||
def test_invalid_naics_file(self): | ||
failed_fpath = "./data/naics/processed/2022_codes.csv1" | ||
with pytest.raises(Exception) as exc: | ||
global_data.read_naics_codes(failed_fpath) | ||
assert exc.type == FileNotFoundError | ||
|
||
def test_invalid_geoids_file(self): | ||
failed_fpath = "./data/census/processed/Census2022.processed.csv2" | ||
with pytest.raises(Exception) as exc: | ||
global_data.read_geoids(failed_fpath) | ||
assert exc.type == FileNotFoundError |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters