From e397b6be6234e56e23bcc50f97d25dd154ba2ccf Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Thu, 6 May 2021 04:00:58 +0200 Subject: [PATCH] Finalize testing for `gcs` filesystem (#1400) * filesystem with the same config should share the same random prefix * `fs` texture returns `uri` instead of each `{filesystem}_fs` * fix matching fields for gcs testbench * add more tests for `gcs` filesystem --- tests/test_filesystem.py | 79 +++++++++++-------- .../test_gcloud/testbench/testbench_utils.py | 43 ++++++---- 2 files changed, 74 insertions(+), 48 deletions(-) diff --git a/tests/test_filesystem.py b/tests/test_filesystem.py index 548359541..8d96de1af 100644 --- a/tests/test_filesystem.py +++ b/tests/test_filesystem.py @@ -29,7 +29,7 @@ ROOT_PREFIX = f"tf-io-root-{int(time.time())}/" # This is the number of attributes each filesystem should return in `*_fs`. -NUM_ATR_FS = 7 +NUM_ATR_FS = 6 S3_URI = "s3" AZ_URI = "az" @@ -127,7 +127,7 @@ def mkdirs(path): path += "/" write(path, b"") - yield S3_URI, path_to, read, write, mkdirs, posixpath.join, (client, bucket_name) + yield path_to, read, write, mkdirs, posixpath.join, (client, bucket_name) monkeypatch.undo() @@ -182,7 +182,7 @@ def mkdirs(path): if path[-1] == "/": write(path, b"") - yield AZ_URI, path_to, read, write, mkdirs, posixpath.join, ( + yield path_to, read, write, mkdirs, posixpath.join, ( client, container_name, account, @@ -196,13 +196,13 @@ def az_dsn_fs(az_fs): yield [None] * NUM_ATR_FS return - uri, _, read, write, mkdirs, join, fs_internal = az_fs + _, read, write, mkdirs, join, fs_internal = az_fs _, container_name, account = fs_internal def path_to_dsn(*args): return f"{AZ_URI}://{account}.blob.core.windows.net/{container_name}/{posixpath.join(ROOT_PREFIX, *args)}" - yield uri, path_to_dsn, read, write, mkdirs, join, fs_internal + yield path_to_dsn, read, write, mkdirs, join, fs_internal @pytest.fixture(scope="module") @@ -223,10 +223,9 @@ def write(*_): def mkdirs(_): pass - yield HTTPS_URI, path_to, read, write, mkdirs, posixpath.join, None + yield path_to, read, write, mkdirs, posixpath.join, None -# TODO(vnvo2409): some tests with `gcs` are falling. @pytest.fixture(scope="module") def gcs_fs(): if should_skip(GCS_URI): @@ -275,29 +274,41 @@ def mkdirs(path): path += "/" write(path, b"") - yield GCS_URI, path_to, read, write, mkdirs, posixpath.join, None + yield path_to, read, write, mkdirs, posixpath.join, None monkeypatch.undo() @pytest.fixture def fs(request, s3_fs, az_fs, az_dsn_fs, https_fs, gcs_fs): - uri, path_to, read, write, mkdirs, join, internal = [None] * NUM_ATR_FS - should_skip(request.param, check_only=False) + path_to, read, write, mkdirs, join, internal = [None] * NUM_ATR_FS + test_fs_uri = request.param + real_uri = test_fs_uri + should_skip(test_fs_uri, check_only=False) + + if test_fs_uri == S3_URI: + path_to, read, write, mkdirs, join, internal = s3_fs + elif test_fs_uri == AZ_URI: + path_to, read, write, mkdirs, join, internal = az_fs + elif test_fs_uri == AZ_DSN_URI: + real_uri = AZ_URI + path_to, read, write, mkdirs, join, internal = az_dsn_fs + elif test_fs_uri == HTTPS_URI: + path_to, read, write, mkdirs, join, internal = https_fs + elif test_fs_uri == GCS_URI: + path_to, read, write, mkdirs, join, internal = gcs_fs + + path_to_rand = None + test_patchs = request.getfixturevalue("patchs") + if (test_fs_uri, test_patchs) in fs.path_to_rand_cache: + path_to_rand = fs.path_to_rand_cache[(test_fs_uri, test_patchs)] + else: + path_to_rand = functools.partial(path_to, str(random.getrandbits(32))) + mkdirs(path_to_rand("")) + fs.path_to_rand_cache[(test_fs_uri, test_patchs)] = path_to_rand + yield real_uri, path_to_rand, read, write, mkdirs, join, internal - if request.param == S3_URI: - uri, path_to, read, write, mkdirs, join, internal = s3_fs - elif request.param == AZ_URI: - uri, path_to, read, write, mkdirs, join, internal = az_fs - elif request.param == AZ_DSN_URI: - uri, path_to, read, write, mkdirs, join, internal = az_dsn_fs - elif request.param == HTTPS_URI: - uri, path_to, read, write, mkdirs, join, internal = https_fs - elif request.param == GCS_URI: - uri, path_to, read, write, mkdirs, join, internal = gcs_fs - path_to_rand = functools.partial(path_to, str(random.getrandbits(32))) - mkdirs(path_to_rand("")) - yield uri, path_to_rand, read, write, mkdirs, join, internal +fs.path_to_rand_cache = {} @pytest.mark.parametrize( @@ -328,7 +339,9 @@ def test_io_read_file(fs, patchs, monkeypatch): @pytest.mark.parametrize( - "fs, patchs", [(S3_URI, None), (AZ_URI, None), (AZ_DSN_URI, None)], indirect=["fs"] + "fs, patchs", + [(S3_URI, None), (AZ_URI, None), (AZ_DSN_URI, None), (GCS_URI, None)], + indirect=["fs"], ) def test_io_write_file(fs, patchs, monkeypatch): _, path_to, read, _, _, _, _ = fs @@ -459,7 +472,7 @@ def test_dataset_from_remote_filename(fs, patchs, monkeypatch): @pytest.mark.parametrize( - "fs, patchs", [(S3_URI, None), (AZ_URI, None)], indirect=["fs"] + "fs, patchs", [(S3_URI, None), (AZ_URI, None), (GCS_URI, None)], indirect=["fs"] ) def test_gfile_GFile_writable(fs, patchs, monkeypatch): uri, path_to, read, _, _, _, _ = fs @@ -488,7 +501,7 @@ def test_gfile_GFile_writable(fs, patchs, monkeypatch): @pytest.mark.parametrize( - "fs, patchs", [(S3_URI, None), (AZ_URI, None)], indirect=["fs"] + "fs, patchs", [(S3_URI, None), (AZ_URI, None), (GCS_URI, None)], indirect=["fs"] ) def test_gfile_isdir(fs, patchs, monkeypatch): _, path_to, _, write, mkdirs, join, _ = fs @@ -506,10 +519,10 @@ def test_gfile_isdir(fs, patchs, monkeypatch): @pytest.mark.parametrize( - "fs, patchs", [(S3_URI, None), (AZ_URI, None)], indirect=["fs"] + "fs, patchs", [(S3_URI, None), (AZ_URI, None), (GCS_URI, None)], indirect=["fs"] ) def test_gfile_listdir(fs, patchs, monkeypatch): - _, path_to, _, write, mkdirs, join, _ = fs + uri, path_to, _, write, mkdirs, join, _ = fs mock_patchs(monkeypatch, patchs) root_path = "test_gfile_listdir" @@ -519,6 +532,10 @@ def test_gfile_listdir(fs, patchs, monkeypatch): num_childs = 5 childrens = [None] * num_childs childrens[0] = join(dname, "subdir") + # TODO(vnvo2409): `gs` filesystem requires `/` at the end of directory's path. + # Consider if we could change the behavior for matching the other filesystems. + if uri == GCS_URI: + childrens[0] += "/" mkdirs(childrens[0]) body = b"123456789" @@ -532,7 +549,7 @@ def test_gfile_listdir(fs, patchs, monkeypatch): @pytest.mark.parametrize( - "fs, patchs", [(S3_URI, None), (AZ_URI, None)], indirect=["fs"] + "fs, patchs", [(S3_URI, None), (AZ_URI, None), (GCS_URI, None)], indirect=["fs"] ) def test_gfile_makedirs(fs, patchs, monkeypatch): _, path_to, _, write, _, join, _ = fs @@ -569,7 +586,7 @@ def test_gfile_remove(fs, patchs, monkeypatch): @pytest.mark.parametrize( - "fs, patchs", [(S3_URI, None), (AZ_URI, None)], indirect=["fs"] + "fs, patchs", [(S3_URI, None), (AZ_URI, None), (GCS_URI, None)], indirect=["fs"] ) def test_gfile_rmtree(fs, patchs, monkeypatch): _, path_to, _, write, mkdirs, join, _ = fs @@ -646,7 +663,7 @@ def test_gfile_rename(fs, patchs, monkeypatch): @pytest.mark.parametrize( - "fs, patchs", [(S3_URI, None), (AZ_URI, None)], indirect=["fs"] + "fs, patchs", [(S3_URI, None), (AZ_URI, None), (GCS_URI, None)], indirect=["fs"] ) def test_gfile_glob(fs, patchs, monkeypatch): _, path_to, _, write, _, join, _ = fs diff --git a/tests/test_gcloud/testbench/testbench_utils.py b/tests/test_gcloud/testbench/testbench_utils.py index 8928273c8..8290fc661 100644 --- a/tests/test_gcloud/testbench/testbench_utils.py +++ b/tests/test_gcloud/testbench/testbench_utils.py @@ -18,6 +18,9 @@ import hashlib import json import random +import re + +field_match = re.compile(r"(?:(\w+)\((\w+(?:,\w+)*)\))|(\w+)") def filter_fields_from_response(fields, response): @@ -33,23 +36,29 @@ def filter_fields_from_response(fields, response): if fields is None: return json.dumps(response) tmp = {} - for key in fields.split(","): - key.replace(" ", "") - parentheses_idx = key.find("(") - if parentheses_idx != -1: - main_key = key[:parentheses_idx] - child_key = key[parentheses_idx + 1 : -1] - if main_key in response: - children = response[main_key] - if isinstance(children, list): - tmp_list = [] - for value in children: - tmp_list.append(value[child_key]) - tmp[main_key] = tmp_list - elif isinstance(children, dict): - tmp[main_key] = children[child_key] - elif key in response: - tmp[key] = response[key] + fields.replace(" ", "") + for keys in field_match.findall(fields): + if keys[2]: + if keys[2] not in response: + continue + tmp[keys[2]] = response[keys[2]] + else: + if keys[0] not in response: + continue + childrens = response[keys[0]] + if isinstance(childrens, list): + tmp_list = [] + for children in childrens: + child = {} + for child_key in keys[1].split(","): + child[child_key] = children[child_key] + tmp_list.append(child) + tmp[keys[0]] = tmp_list + elif isinstance(childrens, dict): + child = {} + for child_key in keys[1].split(","): + child[child_key] = children[child_key] + tmp[keys[0]] = child return json.dumps(tmp)