Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update extension.py, flow.py, task.py and functions.py #1238

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions openml/extensions/sklearn/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -2109,6 +2109,19 @@ def instantiate_model_from_hpo_class(
return base_estimator

def _extract_trace_data(self, model, rep_no, fold_no):
"""Extracts data from a machine learning model's cross-validation results and creates an ARFF (Attribute-Relation File Format) trace.
Parameters
----------
model : Any
A fitted hyperparameter optimization model.
rep_no : int
The repetition number.
fold_no : int
The fold number.
Returns
-------
A list of ARFF tracecontent.
"""
arff_tracecontent = []
for itt_no in range(0, len(model.cv_results_["mean_test_score"])):
# we use the string values for True and False, as it is defined in
Expand Down
28 changes: 28 additions & 0 deletions openml/flows/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,19 @@ def get_subflow(self, structure):


def _copy_server_fields(source_flow, target_flow):
""" Recursively copies the fields added by the server from the `source_flow` to the `target_flow`.

Parameters
----------
source_flow : OpenMLFlow
To copy the fields from.
target_flow : OpenMLFlow
To copy the fields to.

Returns
-------
None
"""
fields_added_by_the_server = ["flow_id", "uploader", "version", "upload_date"]
for field in fields_added_by_the_server:
setattr(target_flow, field, getattr(source_flow, field))
Expand All @@ -533,5 +546,20 @@ def _copy_server_fields(source_flow, target_flow):


def _add_if_nonempty(dic, key, value):
""" Adds a key-value pair to a dictionary if the value is not None.

Parameters
----------
dic : dict
To add the key-value pair to.
key : hashable
To add to the dictionary.
value : Any
To add to the dictionary.

Returns
-------
None
"""
if value is not None:
dic[key] = value
14 changes: 14 additions & 0 deletions openml/flows/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,20 @@ def get_flow_id(


def __list_flows(api_call: str, output_format: str = "dict") -> Union[Dict, pd.DataFrame]:
""" Retrieve information about flows from OpenML API and parse it to a dictionary or a Pandas DataFrame.

Parameters
----------
api_call : str
Retrieves the information about flows.
output_format : str
The output format.

Returns
-------
Union[Dict, pd.DataFrame]
The flows information in the specified output format.
"""

xml_string = openml._api_calls._perform_api_call(api_call, "get")
flows_dict = xmltodict.parse(xml_string, force_list=("oml:flow",))
Expand Down
29 changes: 24 additions & 5 deletions openml/runs/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,36 +31,55 @@ class OpenMLRun(OpenMLBase):
Parameters
----------
task_id: int
The ID of the OpenML task associated with the run.
flow_id: int
The ID of the OpenML flow associated with the run.
dataset_id: int
The ID of the OpenML dataset used for the run.
setup_string: str
The setup string of the run.
output_files: Dict[str, str]
A dictionary that specifies where each related file can be found.
Specifies where each related file can be found.
setup_id: int
An integer representing the ID of the setup used for the run.
tags: List[str]
Representing the tags associated with the run.
uploader: int
User ID of the uploader.
User ID of the uploader.
uploader_name: str
The name of the person who uploaded the run.
evaluations: Dict
Representing the evaluations of the run.
fold_evaluations: Dict
The evaluations of the run for each fold.
sample_evaluations: Dict
The evaluations of the run for each sample.
data_content: List[List]
The predictions generated from executing this run.
trace: OpenMLRunTrace
The trace containing information on internal model evaluations of this run.
model: object
The untrained model that was evaluated in the run.
task_type: str
The type of the OpenML task associated with the run.
task_evaluation_measure: str
The evaluation measure used for the task.
flow_name: str
The name of the OpenML flow associated with the run.
parameter_settings: List[OrderedDict]
Representing the parameter settings used for the run.
predictions_url: str
The URL of the predictions file.
task: OpenMLTask
An instance of the OpenMLTask class, representing the OpenML task associated with the run.
flow: OpenMLFlow
An instance of the OpenMLFlow class, representing the OpenML flow associated with the run.
run_id: int
The ID of the run.
description_text: str, optional
Description text to add to the predictions file.
If left None, is set to the time the arff file is generated.
Description text to add to the predictions file. If left None, is set to the time the arff file is generated.
run_details: str, optional (default=None)
Description of the run stored in the run meta-data.
Description of the run stored in the run meta-data.
"""

def __init__(
Expand Down
10 changes: 10 additions & 0 deletions openml/tasks/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ class OpenMLTask(OpenMLBase):

Parameters
----------
task_id : Optional[int]
Refers to the unique identifier of a task.
task_type_id : TaskType
Refers to the type of task.
task_type : str
Expand All @@ -44,6 +46,14 @@ class OpenMLTask(OpenMLBase):
Refers to the data.
estimation_procedure_id: int
Refers to the type of estimates used.
estimation_procedure_type : Optional[str]
Refers to the type of estimation procedure used for the task.
estimation_parameters : Optional[Dict[str, str]]
Estimation parameters used for the task.
evaluation_measure : Optional[str]
Refers to the evaluation measure.
data_splits_url : Optional[str]
Refers to the URL of the data splits used for the task.
"""

def __init__(
Expand Down