Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
willf committed Jan 10, 2025
1 parent d1ddeb2 commit b1e497a
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 9 deletions.
1 change: 0 additions & 1 deletion delete_repos.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,3 @@ gh repo delete edgi-govdata-archiving/CDC_EIJ_DATA_53_Washington
gh repo delete edgi-govdata-archiving/CDC_EIJ_DATA_54_West_Virginia
gh repo delete edgi-govdata-archiving/CDC_EIJ_DATA_55_Wisconsin
gh repo delete edgi-govdata-archiving/CDC_EIJ_DATA_56_Wyoming
gh repo delete edgi-govdata-archiving/temp
8 changes: 8 additions & 0 deletions remove_git_directories.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash

for arg in "$@"; do
if [ -d "$arg" ]; then
echo "If $arg has a .git directory, it's being deleted now"
rm -rf "$arg/.git"
fi
done
40 changes: 32 additions & 8 deletions scrape_cdc_county_level_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -3551,7 +3551,7 @@ def generate_url(path):
return f"https://eji.cdc.gov/{path}"


def download(url, path):
def do_download(url, path):
logger.info(f"Downloading {url} to {path}")
response = requests.get(url)
if response.status_code != 200:
Expand All @@ -3568,6 +3568,11 @@ def download(url, path):
file.write(response.content)


def do_spreadsheet(url):
# write url to the spreadsheet
print(url)


@click.command()
@click.option(
"--log-level",
Expand All @@ -3581,7 +3586,22 @@ def download(url, path):
help="Set the target destination for downloads (default: .)",
type=click.Path(),
)
def main(log_level, target):
@click.option(
"--spreadsheet",
default=True,
type=bool,
is_flag=True,
show_default=True,
help="Output the data to a spreadsheet",
)
@click.option(
"--download",
default=False,
type=bool,
is_flag=True,
help="Download the files",
)
def main(log_level, target, spreadsheet, download):
global logger
logger = setup_logger(getattr(logging, log_level.upper(), logging.INFO))
logger.info("Scraping CDE County Level Reports")
Expand All @@ -3592,13 +3612,17 @@ def main(log_level, target):
f"Processing State: {state} County: {county} State FIPS: {state_fips} County FIPS: {county_fips}"
)
url_path = generate_url_path(state_fips, state, county_fips, county)
# continue if the file already exists
path = pathlib.Path(target) / url_path
if path.exists():
logger.info(f"Skipping {url_path} as it already exists")
continue
url = generate_url(url_path)
download(url, path)
# if download:
# # continue if the file already exists
# path = pathlib.Path(target) / url_path
# if path.exists():
# logger.info(f"Skipping {url_path} as it already exists")
# continue
# else:
# do_download(url, path)
if spreadsheet:
do_spreadsheet(url)


if __name__ == "__main__":
Expand Down

0 comments on commit b1e497a

Please sign in to comment.