Skip to content

Commit

Permalink
ux: Provide helpful link to documentation when error due to missing A…
Browse files Browse the repository at this point in the history
…PI token (#1364)
  • Loading branch information
eddiebergman authored Oct 17, 2024
1 parent 40f5ea2 commit c5a3c9e
Show file tree
Hide file tree
Showing 7 changed files with 168 additions and 87 deletions.
33 changes: 30 additions & 3 deletions openml/_api_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from .__version__ import __version__
from .exceptions import (
OpenMLHashException,
OpenMLNotAuthorizedError,
OpenMLServerError,
OpenMLServerException,
OpenMLServerNoResult,
Expand All @@ -36,6 +37,8 @@
FILE_ELEMENTS_TYPE = Dict[str, Union[str, Tuple[str, str]]]
DATABASE_CONNECTION_ERRCODE = 107

API_TOKEN_HELP_LINK = "https://openml.github.io/openml-python/main/examples/20_basic/introduction_tutorial.html#authentication" # noqa: S105


def _robot_delay(n: int) -> float:
wait = (1 / (1 + math.exp(-(n * 0.5 - 4)))) * 60
Expand Down Expand Up @@ -456,21 +459,28 @@ def __parse_server_exception(
url: str,
file_elements: FILE_ELEMENTS_TYPE | None,
) -> OpenMLServerError:
if response.status_code == 414:
if response.status_code == requests.codes.URI_TOO_LONG:
raise OpenMLServerError(f"URI too long! ({url})")

# OpenML has a sophisticated error system where information about failures is provided,
# in the response body itself.
# First, we need to parse it out.
try:
server_exception = xmltodict.parse(response.text)
except xml.parsers.expat.ExpatError as e:
raise e
except Exception as e:
# OpenML has a sophisticated error system
# where information about failures is provided. try to parse this
# If we failed to parse it out, then something has gone wrong in the body we have sent back
# from the server and there is little extra information we can capture.
raise OpenMLServerError(
f"Unexpected server error when calling {url}. Please contact the developers!\n"
f"Status code: {response.status_code}\n{response.text}",
) from e

# Now we can parse out the specific error codes that we return. These
# are in addition to the typical HTTP error codes, but encode more
# specific informtion. You can find these codes here:
# https://github.com/openml/OpenML/blob/develop/openml_OS/views/pages/api_new/v1/xml/pre.php
server_error = server_exception["oml:error"]
code = int(server_error["oml:code"])
message = server_error["oml:message"]
Expand All @@ -496,4 +506,21 @@ def __parse_server_exception(
)
else:
full_message = f"{message} - {additional_information}"

if code in [
102, # flow/exists post
137, # dataset post
350, # dataset/42 delete
310, # flow/<something> post
320, # flow/42 delete
400, # run/42 delete
460, # task/42 delete
]:
msg = (
f"The API call {url} requires authentication via an API key.\nPlease configure "
"OpenML-Python to use your API as described in this example:"
"\nhttps://openml.github.io/openml-python/main/examples/20_basic/introduction_tutorial.html#authentication"
)
return OpenMLNotAuthorizedError(message=msg)

return OpenMLServerException(code=code, message=full_message, url=url)
21 changes: 17 additions & 4 deletions openml/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@
import platform
import shutil
import warnings
from contextlib import contextmanager
from io import StringIO
from pathlib import Path
from typing import Any, cast
from typing import Any, Iterator, cast
from typing_extensions import Literal, TypedDict
from urllib.parse import urlparse

Expand Down Expand Up @@ -174,11 +175,11 @@ def get_server_base_url() -> str:
apikey: str = _defaults["apikey"]
show_progress: bool = _defaults["show_progress"]
# The current cache directory (without the server name)
_root_cache_directory = Path(_defaults["cachedir"])
_root_cache_directory: Path = Path(_defaults["cachedir"])
avoid_duplicate_runs = _defaults["avoid_duplicate_runs"]

retry_policy = _defaults["retry_policy"]
connection_n_retries = _defaults["connection_n_retries"]
retry_policy: Literal["human", "robot"] = _defaults["retry_policy"]
connection_n_retries: int = _defaults["connection_n_retries"]


def set_retry_policy(value: Literal["human", "robot"], n_retries: int | None = None) -> None:
Expand Down Expand Up @@ -497,6 +498,18 @@ def set_root_cache_directory(root_cache_directory: str | Path) -> None:
stop_using_configuration_for_example = ConfigurationForExamples.stop_using_configuration_for_example


@contextmanager
def overwrite_config_context(config: dict[str, Any]) -> Iterator[_Config]:
"""A context manager to temporarily override variables in the configuration."""
existing_config = get_config_as_dict()
merged_config = {**existing_config, **config}

_setup(merged_config) # type: ignore
yield merged_config # type: ignore

_setup(existing_config)


__all__ = [
"get_cache_directory",
"set_root_cache_directory",
Expand Down
2 changes: 1 addition & 1 deletion openml/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ def _delete_entity(entity_type: str, entity_id: int) -> bool:
" please open an issue at: https://github.com/openml/openml/issues/new"
),
) from e
raise
raise e


@overload
Expand Down
76 changes: 56 additions & 20 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
# License: BSD 3-Clause
from __future__ import annotations

from collections.abc import Iterator
import logging
import os
import shutil
Expand Down Expand Up @@ -195,55 +196,90 @@ def pytest_addoption(parser):
def _expected_static_cache_state(root_dir: Path) -> list[Path]:
_c_root_dir = root_dir / "org" / "openml" / "test"
res_paths = [root_dir, _c_root_dir]

for _d in ["datasets", "tasks", "runs", "setups"]:
res_paths.append(_c_root_dir / _d)

for _id in ["-1","2"]:
for _id in ["-1", "2"]:
tmp_p = _c_root_dir / "datasets" / _id
res_paths.extend([
tmp_p / "dataset.arff",
tmp_p / "features.xml",
tmp_p / "qualities.xml",
tmp_p / "description.xml",
])
res_paths.extend(
[
tmp_p / "dataset.arff",
tmp_p / "features.xml",
tmp_p / "qualities.xml",
tmp_p / "description.xml",
]
)

res_paths.append(_c_root_dir / "datasets" / "30" / "dataset_30.pq")
res_paths.append(_c_root_dir / "runs" / "1" / "description.xml")
res_paths.append(_c_root_dir / "setups" / "1" / "description.xml")

for _id in ["1", "3", "1882"]:
tmp_p = _c_root_dir / "tasks" / _id
res_paths.extend([
tmp_p / "datasplits.arff",
tmp_p / "task.xml",
])

res_paths.extend(
[
tmp_p / "datasplits.arff",
tmp_p / "task.xml",
]
)

return res_paths


def assert_static_test_cache_correct(root_dir: Path) -> None:
for p in _expected_static_cache_state(root_dir):
assert p.exists(), f"Expected path {p} does not exist"
assert p.exists(), f"Expected path {p} exists"


@pytest.fixture(scope="class")
def long_version(request):
request.cls.long_version = request.config.getoption("--long")


@pytest.fixture()
@pytest.fixture(scope="session")
def test_files_directory() -> Path:
return Path(__file__).parent / "files"


@pytest.fixture()
@pytest.fixture(scope="session")
def test_api_key() -> str:
return "c0c42819af31e706efe1f4b88c23c6c1"


@pytest.fixture(autouse=True)
def verify_cache_state(test_files_directory) -> None:
@pytest.fixture(autouse=True, scope="function")
def verify_cache_state(test_files_directory) -> Iterator[None]:
assert_static_test_cache_correct(test_files_directory)
yield
assert_static_test_cache_correct(test_files_directory)


@pytest.fixture(autouse=True, scope="session")
def as_robot() -> Iterator[None]:
policy = openml.config.retry_policy
n_retries = openml.config.connection_n_retries
openml.config.set_retry_policy("robot", n_retries=20)
yield
openml.config.set_retry_policy(policy, n_retries)


@pytest.fixture(autouse=True, scope="session")
def with_test_server():
openml.config.start_using_configuration_for_example()
yield
openml.config.stop_using_configuration_for_example()


@pytest.fixture(autouse=True)
def with_test_cache(test_files_directory, request):
if not test_files_directory.exists():
raise ValueError(
f"Cannot find test cache dir, expected it to be {test_files_directory!s}!",
)
_root_cache_directory = openml.config._root_cache_directory
tmp_cache = test_files_directory / request.node.name
openml.config.set_root_cache_directory(tmp_cache)
yield
openml.config.set_root_cache_directory(_root_cache_directory)
if tmp_cache.exists():
shutil.rmtree(tmp_cache)
66 changes: 39 additions & 27 deletions tests/test_evaluations/test_evaluations_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,35 +3,47 @@

import unittest

from openml.config import overwrite_config_context


class TestEvaluationsExample(unittest.TestCase):
def test_example_python_paper(self):
# Example script which will appear in the upcoming OpenML-Python paper
# This test ensures that the example will keep running!

import matplotlib.pyplot as plt
import numpy as np

import openml

df = openml.evaluations.list_evaluations_setups(
"predictive_accuracy",
flows=[8353],
tasks=[6],
output_format="dataframe",
parameters_in_separate_columns=True,
) # Choose an SVM flow, for example 8353, and a task.

hp_names = ["sklearn.svm.classes.SVC(16)_C", "sklearn.svm.classes.SVC(16)_gamma"]
df[hp_names] = df[hp_names].astype(float).apply(np.log)
C, gamma, score = df[hp_names[0]], df[hp_names[1]], df["value"]

cntr = plt.tricontourf(C, gamma, score, levels=12, cmap="RdBu_r")
plt.colorbar(cntr, label="accuracy")
plt.xlim((min(C), max(C)))
plt.ylim((min(gamma), max(gamma)))
plt.xlabel("C (log10)", size=16)
plt.ylabel("gamma (log10)", size=16)
plt.title("SVM performance landscape", size=20)

plt.tight_layout()
with overwrite_config_context(
{
"server": "https://www.openml.org/api/v1/xml",
"apikey": None,
}
):
import matplotlib.pyplot as plt
import numpy as np

import openml

df = openml.evaluations.list_evaluations_setups(
"predictive_accuracy",
flows=[8353],
tasks=[6],
output_format="dataframe",
parameters_in_separate_columns=True,
) # Choose an SVM flow, for example 8353, and a task.

assert len(df) > 0, (
"No evaluation found for flow 8353 on task 6, could "
"be that this task is not available on the test server."
)

hp_names = ["sklearn.svm.classes.SVC(16)_C", "sklearn.svm.classes.SVC(16)_gamma"]
df[hp_names] = df[hp_names].astype(float).apply(np.log)
C, gamma, score = df[hp_names[0]], df[hp_names[1]], df["value"]

cntr = plt.tricontourf(C, gamma, score, levels=12, cmap="RdBu_r")
plt.colorbar(cntr, label="accuracy")
plt.xlim((min(C), max(C)))
plt.ylim((min(gamma), max(gamma)))
plt.xlabel("C (log10)", size=16)
plt.ylabel("gamma (log10)", size=16)
plt.title("SVM performance landscape", size=20)

plt.tight_layout()
26 changes: 25 additions & 1 deletion tests/test_openml/test_api_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@
import pytest

import openml
from openml.config import ConfigurationForExamples
import openml.testing
from openml._api_calls import _download_minio_bucket
from openml._api_calls import _download_minio_bucket, API_TOKEN_HELP_LINK


class TestConfig(openml.testing.TestBase):
Expand Down Expand Up @@ -99,3 +100,26 @@ def test_download_minio_failure(mock_minio, tmp_path: Path) -> None:

with pytest.raises(ValueError):
_download_minio_bucket(source=some_url, destination=tmp_path)


@pytest.mark.parametrize(
"endpoint, method",
[
# https://github.com/openml/OpenML/blob/develop/openml_OS/views/pages/api_new/v1/xml/pre.php
("flow/exists", "post"), # 102
("dataset", "post"), # 137
("dataset/42", "delete"), # 350
# ("flow/owned", "post"), # 310 - Couldn't find what would trigger this
("flow/42", "delete"), # 320
("run/42", "delete"), # 400
("task/42", "delete"), # 460
],
)
def test_authentication_endpoints_requiring_api_key_show_relevant_help_link(
endpoint: str,
method: str,
) -> None:
# We need to temporarily disable the API key to test the error message
with openml.config.overwrite_config_context({"apikey": None}):
with pytest.raises(openml.exceptions.OpenMLNotAuthorizedError, match=API_TOKEN_HELP_LINK):
openml._api_calls._perform_api_call(call=endpoint, request_method=method, data=None)
31 changes: 0 additions & 31 deletions tests/test_utils/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,37 +8,6 @@
from openml.testing import _check_dataset


@pytest.fixture(autouse=True)
def as_robot():
policy = openml.config.retry_policy
n_retries = openml.config.connection_n_retries
openml.config.set_retry_policy("robot", n_retries=20)
yield
openml.config.set_retry_policy(policy, n_retries)


@pytest.fixture(autouse=True)
def with_test_server():
openml.config.start_using_configuration_for_example()
yield
openml.config.stop_using_configuration_for_example()


@pytest.fixture(autouse=True)
def with_test_cache(test_files_directory, request):
if not test_files_directory.exists():
raise ValueError(
f"Cannot find test cache dir, expected it to be {test_files_directory!s}!",
)
_root_cache_directory = openml.config._root_cache_directory
tmp_cache = test_files_directory / request.node.name
openml.config.set_root_cache_directory(tmp_cache)
yield
openml.config.set_root_cache_directory(_root_cache_directory)
if tmp_cache.exists():
shutil.rmtree(tmp_cache)


@pytest.fixture()
def min_number_tasks_on_test_server() -> int:
"""After a reset at least 1068 tasks are on the test server"""
Expand Down

0 comments on commit c5a3c9e

Please sign in to comment.