Skip to content

Commit

Permalink
reorg
Browse files Browse the repository at this point in the history
  • Loading branch information
mahf708 authored Nov 23, 2024
1 parent df1e53a commit 68a0b41
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 11 deletions.
15 changes: 4 additions & 11 deletions e3sm-diags/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,9 @@ ENV TZ=America/Los_Angeles
ENV LANGUAGE=en_US:en \
LANG=en_US.UTF-8

RUN wget https://github.com/E3SM-Project/e3sm_diags/archive/refs/heads/main.zip
RUN unzip main.zip
RUN cd e3sm_diags-main
RUN python3 -m pip install .
RUN python3 -m tests.integration.download_data
RUN mkdir -p /e3sm_diags_downloaded_data
RUN mv tests/integration/integration_test_data /e3sm_diags_downloaded_data/integration_test_data
RUN mv tests/integration/expected /e3sm_diags_downloaded_data/expected
RUN mv tests/integration/integration_test_data_20230523 /e3sm_diags_downloaded_data/integration_test_data_20230523
RUN cd ..
RUN rm -rf main.zip e3sm_diags-main
RUN mkdir -p /e3sm_diags_downloaded_data/tests/integration

COPY download_files.py /app/download_files.py
RUN python3 /app/download_files.py

ENTRYPOINT ["/bin/bash", "--rcfile", "/etc/profile", "-l"]
106 changes: 106 additions & 0 deletions e3sm-diags/download_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import os
import re
import urllib.request
from typing import List

from tests.integration.config import TEST_DATA_DIR, TEST_IMAGES_DIR, TEST_ROOT_PATH

TEST_ROOT_PATH = "/e3sm_diags_downloaded_data/tests/integration/"
TEST_DATA_DIR = "integration_test_data"
TEST_IMAGES_DIR = "integration_test_images"


# https://stackoverflow.com/questions/49113616/how-to-download-file-using-python
def retrieve_file(url, file_path):
dir_path = os.path.join(*os.path.split(file_path)[:-1])
# https://stackoverflow.com/questions/12517451/automatically-creating-directories-with-file-output
if dir_path:
os.makedirs(dir_path, exist_ok=True)
print("Downloading {} to {}".format(url, file_path))
urllib.request.urlretrieve(url, file_path)
return file_path


def download_files(url_prefix, url_suffix, directory_prefix=None):
print(f"Downloading {url_suffix}")
print("url_prefix={}".format(url_prefix))
print("url_suffix={}".format(url_suffix))
print("(local) directory_prefix={}".format(directory_prefix))

url = os.path.join(url_prefix, url_suffix)

if directory_prefix:
links_file_path = os.path.join(directory_prefix, url_suffix)
else:
links_file_path = url_suffix

links_file_path = "{}.html".format(links_file_path)
print(
"Downloading files from {}; checking for links on {}".format(
url, links_file_path
)
)
html_path = retrieve_file(url, links_file_path)
links: List[str] = []

with open(html_path, "r") as html:
for line in html:
match_object = re.search(r'href=[\'"]?([^\'" >]+)', line)
if match_object:
link = match_object.group(1)
# Ignore parent directory and sorting links
if (
("../" not in link)
and (not link.startswith("/"))
and ("?" not in link)
):
print("Found a link: {}".format(link))
links.append(link)

if os.path.exists(links_file_path):
os.remove(links_file_path)

files = []
directories = []

for link in links:
if link.endswith("/"):
# List directories to download.
directories.append(link)
else:
# List '.csv', '.mat', '.nc', and '.png' files to download.
files.append(link)

print("\n###Downloading files")

if directory_prefix:
new_directory_prefix = os.path.join(directory_prefix, url_suffix)
else:
new_directory_prefix = url_suffix
for f in files:
url_to_download = os.path.join(url, f)
file_path = os.path.join(new_directory_prefix, f)
retrieve_file(url_to_download, file_path)

print("\n###Downloading directories")
for d in directories:
new_directory = d.rstrip("/")
download_files(url, new_directory, directory_prefix=new_directory_prefix)


def download():
download_files(
"https://web.lcrc.anl.gov/public/e3sm/e3sm_diags_test_data/integration",
TEST_DATA_DIR,
directory_prefix=TEST_ROOT_PATH,
)
download_files(
"https://web.lcrc.anl.gov/public/e3sm/e3sm_diags_test_data/integration/expected",
TEST_IMAGES_DIR,
directory_prefix=TEST_ROOT_PATH,
)
print(f"Downloaded {TEST_DATA_DIR} and {TEST_ROOT_PATH}")


if __name__ == "__main__":
download()

0 comments on commit 68a0b41

Please sign in to comment.