Skip to content

Commit

Permalink
fix: display country list when get_name has no parameters (#28)
Browse files Browse the repository at this point in the history
  • Loading branch information
12rambau authored Aug 14, 2023
2 parents 5c08e2d + 531c6ba commit b83089e
Show file tree
Hide file tree
Showing 5 changed files with 349 additions and 52 deletions.
9 changes: 9 additions & 0 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,15 @@ For example to get the name and codes of all the departments in France you can r

pygaul.get_names(admin="1270", content_level=2, complete=True)

.. note::

You can also get the list of all the country names by omitting admin and name parameters. If a level is not provided the table will only show country names but other parameters remain availables.

.. code-block:: python
pygaul.get_names()
Suggestion
----------

Expand Down
105 changes: 58 additions & 47 deletions pygaul/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,59 +43,70 @@ def get_names(
# sanitary check on parameters
if name and admin:
raise ValueError('"name" and "id" cannot be set at the same time.')
elif not name and not admin:
raise ValueError('at least "name" or "admin" need to be set.')

# set the id we look for and tell the function if its a name or an admin
is_name = True if name else False
id = name if name else admin

# read the data and find if the element exist
# if a name or admin number is set, we need to filter the dataset accordingly
# if not we will simply consider the world dataset
df = pd.read_parquet(__gaul_data__)
column = "ADM{}_NAME" if is_name else "ADM{}_CODE"
is_in = (
df.filter([column.format(i) for i in range(3)])
.apply(lambda col: col.str.lower())
.isin([id.lower()])
)

if not is_in.any().any():
# find the 5 closest names/id
columns = [df[column.format(i)].dropna().str.lower().values for i in range(3)]
ids = np.unique(np.concatenate(columns))
close_ids = get_close_matches(id.lower(), ids, n=5)
if is_name is True:
close_ids = [i.capitalize() for i in close_ids]
else:
close_ids = [i.upper() for i in close_ids]
raise ValueError(
f'The requested "{id}" is not part of FAO GAUL 2015. The closest matches are: {", ".join(close_ids)}.'
if name or admin:
# set the id we look for and tell the function if its a name or an admin
is_name = True if name else False
id = name if name else admin

# read the data and find if the element exist
column = "ADM{}_NAME" if is_name else "ADM{}_CODE"
is_in = (
df.filter([column.format(i) for i in range(3)])
.apply(lambda col: col.str.lower())
.isin([id.lower()])
)

# Get the code of the associated country of the identifed area and the associated level
line = is_in[~((~is_in).all(axis=1))].idxmax(1)
level = line.iloc[0][3]

# load the max_level available in the requested area
sub_df = df[df[column.format(level)].str.fullmatch(id, case=False)]
max_level = next(
i for i in reversed(range(3)) if (sub_df[f"ADM{i}_NAME"] != "").any()
)

# get the request level from user
if content_level == -1:
content_level = level
elif content_level < int(level):
warnings.warn(
f"The requested level ({content_level}) is higher than the area ({level}). Fallback to {level}."
if not is_in.any().any():
# find the 5 closest names/id
columns = [
df[column.format(i)].dropna().str.lower().values for i in range(3)
]
ids = np.unique(np.concatenate(columns))
close_ids = get_close_matches(id.lower(), ids, n=5)
if is_name is True:
close_ids = [i.capitalize() for i in close_ids]
else:
close_ids = [i.upper() for i in close_ids]
raise ValueError(
f'The requested "{id}" is not part of FAO GAUL 2015. The closest '
f'matches are: {", ".join(close_ids)}.'
)

# Get the code of the associated country of the identifed area and the associated level
line = is_in[~((~is_in).all(axis=1))].idxmax(1)
level = line.iloc[0][3]

# load the max_level available in the requested area
sub_df = df[df[column.format(level)].str.fullmatch(id, case=False)]
max_level = next(
i for i in reversed(range(3)) if (sub_df[f"ADM{i}_NAME"] != "").any()
)
content_level = level

if int(content_level) > max_level:
warnings.warn(
f"The requested level ({content_level}) is higher than the max level in this country ({max_level}). Fallback to {max_level}."
)
content_level = max_level
# get the request level from user
content_level, level = int(content_level), int(level)
if content_level == -1:
content_level = level
elif content_level < level:
warnings.warn(
f"The requested level ({content_level}) is higher than the area ({level}). "
f"Fallback to {level}."
)
content_level = level

if content_level > max_level:
warnings.warn(
f"The requested level ({content_level}) is higher than the max level "
f"in this country ({max_level}). Fallback to {max_level}."
)
content_level = max_level

else: # no admin and no name
sub_df = df
content_level = 0 if content_level == -1 else content_level

# get the columns name corresponding to the requested level
columns = [f"ADM{content_level}_NAME", f"ADM{content_level}_CODE"]
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -122,4 +122,4 @@ warn_redundant_casts = true
using = "PEP631:test;dev;doc"

[tool.codespell]
skip = "./pygaul/data/gaul_continent.json"
skip = "./pygaul/data/gaul_continent.json,**/*.csv"
8 changes: 4 additions & 4 deletions tests/test_get_name.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
import pygaul


def test_empty():
"""Empty request."""
with pytest.raises(Exception):
pygaul.get_names()
def test_empty(dataframe_regression):
"""Empty request, should list the countries."""
df = pygaul.get_names()
dataframe_regression.check(df)


def test_duplicate_input():
Expand Down
Loading

0 comments on commit b83089e

Please sign in to comment.