From 3d79e6e31044357ff1936aa9c4e5a02638f8abdd Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Wed, 16 Aug 2023 18:51:30 +0530
Subject: [PATCH 01/48] Update extension.py

---
 openml/extensions/sklearn/extension.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
index 82d202e9c..98c0a2e6d 100644
--- a/openml/extensions/sklearn/extension.py
+++ b/openml/extensions/sklearn/extension.py
@@ -2101,6 +2101,20 @@ def instantiate_model_from_hpo_class(
         return base_estimator
 
     def _extract_trace_data(self, model, rep_no, fold_no):
+        """Extracts data from a machine learning model's cross-validation results and creates an ARFF (Attribute-Relation File Format) trace.
+        
+            Parameters
+            ----------
+            model : Any
+                A fitted hyperparameter optimization model.
+            rep_no : int
+                The repetition number.
+            fold_no : int
+                The fold number.
+            Returns
+            -------
+            A list of ARFF tracecontent.
+        """
         arff_tracecontent = []
         for itt_no in range(0, len(model.cv_results_["mean_test_score"])):
             # we use the string values for True and False, as it is defined in

From 2c4519ed80c816044b4b0163eefea217a34548b3 Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Wed, 16 Aug 2023 18:54:20 +0530
Subject: [PATCH 02/48] Update task.py

---
 openml/tasks/task.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/openml/tasks/task.py b/openml/tasks/task.py
index 36e0ada1c..f8783c785 100644
--- a/openml/tasks/task.py
+++ b/openml/tasks/task.py
@@ -36,6 +36,8 @@ class OpenMLTask(OpenMLBase):
 
     Parameters
     ----------
+    task_id : Union[int, None]
+        Refers to the unique identifier of a task.
     task_type_id : TaskType
         Refers to the type of task.
     task_type : str
@@ -44,6 +46,14 @@ class OpenMLTask(OpenMLBase):
         Refers to the data.
     estimation_procedure_id: int
         Refers to the type of estimates used.
+    estimation_procedure_type : Optional[str] 
+        Refers to the type of estimation procedure used for the task.
+    estimation_parameters : Optional[Dict[str, str]]
+        Estimation parameters used for the task.
+    evaluation_measure : Optional[str]
+        Refers to the evaluation measure.
+    data_splits_url : Optional[str]
+        Refers to the URL of the data splits used for the task.
     """
 
     def __init__(

From 7ddbdf4fc45faeb15422061e9dbadf5f4c05f7d2 Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Wed, 16 Aug 2023 18:56:22 +0530
Subject: [PATCH 03/48] Update flow.py

---
 openml/flows/flow.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/openml/flows/flow.py b/openml/flows/flow.py
index b9752e77c..f1c5935e8 100644
--- a/openml/flows/flow.py
+++ b/openml/flows/flow.py
@@ -523,6 +523,18 @@ def get_subflow(self, structure):
 
 
 def _copy_server_fields(source_flow, target_flow):
+    """ Recursively copies the fields added by the server from the `source_flow` to the `target_flow`.
+    
+    Parameters
+    ----------
+    source_flow : OpenMLFlow
+        To copy the fields from.
+    target_flow : OpenMLFlow
+        To copy the fields to.
+    Returns
+    -------
+    None
+    """
     fields_added_by_the_server = ["flow_id", "uploader", "version", "upload_date"]
     for field in fields_added_by_the_server:
         setattr(target_flow, field, getattr(source_flow, field))
@@ -533,5 +545,19 @@ def _copy_server_fields(source_flow, target_flow):
 
 
 def _add_if_nonempty(dic, key, value):
+    """ Adds a key-value pair to a dictionary if the value is not None.
+     
+    Parameters
+    ----------
+    dic: dict
+        To add the key-value pair to.
+    key: hashable
+        To add to the dictionary.
+    value: Any
+        To add to the dictionary.
+    Returns
+    -------
+    None
+    """
     if value is not None:
         dic[key] = value

From 7d5a04fc460aff1bc142d314dd51fa907440d3c9 Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Wed, 16 Aug 2023 18:59:17 +0530
Subject: [PATCH 04/48] Update functions.py

---
 openml/flows/functions.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/openml/flows/functions.py b/openml/flows/functions.py
index c4faded0a..11b26d367 100644
--- a/openml/flows/functions.py
+++ b/openml/flows/functions.py
@@ -337,6 +337,18 @@ def get_flow_id(
 
 
 def __list_flows(api_call: str, output_format: str = "dict") -> Union[Dict, pd.DataFrame]:
+    """
+    Retrieve information about flows from OpenML API and parse it to a dictionary or a Pandas DataFrame.
+    Parameters
+    ----------
+    api_call: str
+        Retrieves the information about flows.
+    output_format: str in {"dict", "dataframe"}
+        The output format.
+    Returns
+    -------
+        The flows information in the specified output format.
+    """
     xml_string = openml._api_calls._perform_api_call(api_call, "get")
     flows_dict = xmltodict.parse(xml_string, force_list=("oml:flow",))
 

From 58ef19d8ef2dcfb3252a24dd5bbfda563be7c59a Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Wed, 16 Aug 2023 19:07:43 +0530
Subject: [PATCH 05/48] Update functions.py

---
 openml/runs/functions.py | 60 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index ee582dbb7..5aaca77d7 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -128,6 +128,16 @@ def run_model_on_task(
     flow = extension.model_to_flow(model)
 
     def get_task_and_type_conversion(task: Union[int, str, OpenMLTask]) -> OpenMLTask:
+        """ Retrieve an OpenMLTask object from either an integer or string ID, or directly from an OpenMLTask object.
+        Parameters
+        ----------
+        task : Union[int, str, OpenMLTask]
+            The task ID or the OpenMLTask object.
+        Returns
+        -------
+        OpenMLTask
+            The OpenMLTask object.
+        """
         if isinstance(task, (int, str)):
             return get_task(int(task))
         else:
@@ -451,6 +461,27 @@ def _run_task_get_arffcontent(
     "OrderedDict[str, OrderedDict]",
     "OrderedDict[str, OrderedDict]",
 ]:
+    """ Runs the hyperparameter optimization on the given task and returns the arfftrace content.
+    Parameters
+    ----------
+    model : Any
+        The model that is to be evalauted.
+    task : OpenMLTask
+        The OpenMLTask to evaluate.
+    extension : Extension
+        The OpenML extension object.
+    add_local_measures : bool
+        Whether to compute additional local evaluation measures.
+    dataset_format : str
+        The format in which to download the dataset.
+    n_jobs : int
+        Number of jobs to run in parallel. If None, use 1 core by default. If -1, use all available cores.
+        
+    Returns
+    -------
+    Tuple[List[List], Optional[OpenMLRunTrace], OrderedDict[str, OrderedDict], OrderedDict[str, OrderedDict]]
+        A tuple containing the arfftrace content, the OpenML run trace, the global and local evaluation measures.
+    """
     arff_datacontent = []  # type: List[List]
     traces = []  # type: List[OpenMLRunTrace]
     # stores fold-based evaluation measures. In case of a sample based task,
@@ -636,6 +667,35 @@ def _run_task_get_arffcontent_parallel_helper(
     Optional[OpenMLRunTrace],
     "OrderedDict[str, float]",
 ]:
+    """ Helper function that runs a single model on a single task fold sample.
+    
+    Parameters
+    ----------
+    extension : Extension
+        An OpenML extension instance.
+    fold_no : int
+        The fold number to be run.
+    model : Any
+        The model that is to be evaluated.
+    rep_no : int
+        Repetition number to be run.
+    sample_no : int
+        Sample number to be run.
+    task : OpenMLTask
+        The task object from OpenML.
+    dataset_format : str
+        The dataset format to be used.
+    configuration : Dict
+        Hyperparameters to configure the model.
+        
+    Returns
+    -------
+    Tuple[np.ndarray, Optional[pd.DataFrame], np.ndarray, Optional[pd.DataFrame],
+           Optional[OpenMLRunTrace], OrderedDict[str, float]]
+    A tuple containing the predictions, probability estimates (if applicable), 
+    actual target values, actual target value probabilities (if applicable), 
+    the trace object of the OpenML run (if applicable), and a dictionary of local measures for this particular fold.
+    """
     # Sets up the OpenML instantiated in the child process to match that of the parent's
     # if configuration=None, loads the default
     config._setup(configuration)

From 63a20bbbd06996bc722e9f7e9eab8999f8ab792b Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Wed, 16 Aug 2023 19:12:11 +0530
Subject: [PATCH 06/48] Update trace.py

---
 openml/runs/trace.py | 50 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/openml/runs/trace.py b/openml/runs/trace.py
index f6b038a55..85fd55334 100644
--- a/openml/runs/trace.py
+++ b/openml/runs/trace.py
@@ -33,7 +33,17 @@ class OpenMLRunTrace(object):
 
     """
 
-    def __init__(self, run_id, trace_iterations):
+    def __init__(self, run_id: int, trace_iterations: List[List]):
+        """
+        Object to hold the trace content of a run.
+        
+        Parameters
+        ----------
+        run_id : int
+            Id for which the trace content is to be stored.
+        trace_iterations : List[List]
+            The trace content obtained by running a flow on a task.
+        """
         self.run_id = run_id
         self.trace_iterations = trace_iterations
 
@@ -228,6 +238,24 @@ def trace_from_arff(cls, arff_obj):
 
     @classmethod
     def _trace_from_arff_struct(cls, attributes, content, error_message):
+        """ Generate a trace dictionary from ARFF structure.
+        
+        Parameters
+        ----------
+        cls : type
+            The trace object to be created.
+        attributes : List[Tuple[str, str]]
+            Attribute descriptions.
+        content : List[List[Union[int, float, str]]]
+            List of instances.
+        error_message : str
+            Error message to raise if `setup_string` is in `attributes`.
+            
+        Returns
+        -------
+        OrderedDict
+            A dictionary representing the trace.
+        """
         trace = OrderedDict()
         attribute_idx = {att[0]: idx for idx, att in enumerate(attributes)}
 
@@ -345,6 +373,26 @@ def trace_from_xml(cls, xml):
 
     @classmethod
     def merge_traces(cls, traces: List["OpenMLRunTrace"]) -> "OpenMLRunTrace":
+        """Merge multiple traces into a single trace.
+        
+        Parameters
+        ----------
+        cls : type
+            Type of the trace object to be created.
+        traces : List[OpenMLRunTrace]
+            List of traces to merge.
+            
+        Returns
+        -------
+        OpenMLRunTrace
+            A trace object representing the merged traces.
+            
+        Raises
+        ------
+        ValueError
+            If the parameters in the iterations of the traces being merged are not equal.
+            If a key (repeat, fold, iteration) is encountered twice while merging the traces.
+        """
         merged_trace = (
             OrderedDict()
         )  # type: OrderedDict[Tuple[int, int, int], OpenMLTraceIteration]  # noqa E501

From 0197de31527429e5bff58cb0267a072d94cde97a Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Wed, 16 Aug 2023 19:16:22 +0530
Subject: [PATCH 07/48] Update functions.py

---
 openml/setups/functions.py | 39 +++++++++++++++++++++++++++++++++++---
 1 file changed, 36 insertions(+), 3 deletions(-)

diff --git a/openml/setups/functions.py b/openml/setups/functions.py
index 52969fb8c..069c3c38e 100644
--- a/openml/setups/functions.py
+++ b/openml/setups/functions.py
@@ -60,8 +60,24 @@ def setup_exists(flow) -> int:
     return setup_id if setup_id > 0 else False
 
 
-def _get_cached_setup(setup_id):
-    """Load a run from the cache."""
+def _get_cached_setup(setup_id: int):
+    """Load a run from the cache.
+    
+    Parameters
+    ----------
+    setup_id : int
+        ID of the setup to be loaded.
+        
+    Returns
+    -------
+    OpenMLSetup
+        The loaded setup object.
+        
+    Raises
+    ------
+    OpenMLCacheException
+        If the setup file for the given setup ID is not cached.
+    """
     cache_dir = config.get_cache_directory()
     setup_cache_dir = os.path.join(cache_dir, "setups", str(setup_id))
     try:
@@ -271,7 +287,21 @@ def initialize_model(setup_id: int) -> Any:
     return model
 
 
-def _to_dict(flow_id, openml_parameter_settings):
+def _to_dict(flow_id: int, openml_parameter_settings):
+    """ Convert a flow ID and a list of OpenML parameter settings to a dictionary representation that can be serialized to XML.
+    
+    Parameters
+    ----------
+    flow_id : int
+        ID of the flow.
+    openml_parameter_settings : List[OpenMLParameter]
+        A list of OpenML parameter settings.
+        
+    Returns
+    -------
+    OrderedDict
+        A dictionary representation of the flow ID and parameter settings.
+    """
     # for convenience, this function (ab)uses the run object.
     xml = OrderedDict()
     xml["oml:run"] = OrderedDict()
@@ -319,6 +349,9 @@ def _create_setup_from_xml(result_dict, output_format="object"):
 
 
 def _create_setup_parameter_from_xml(result_dict, output_format="object"):
+    """
+        Create an OpenMLParameter object or a dictionary from an API xml result.
+    """
     if output_format == "object":
         return OpenMLParameter(
             input_id=int(result_dict["oml:id"]),

From 1385155e76e23ae39e611953c6d7853e66bef05a Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Wed, 16 Aug 2023 19:20:57 +0530
Subject: [PATCH 08/48] Update functions.py

---
 openml/study/functions.py | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/openml/study/functions.py b/openml/study/functions.py
index 7b72a31eb..d84f12580 100644
--- a/openml/study/functions.py
+++ b/openml/study/functions.py
@@ -107,6 +107,20 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
             tags.append(current_tag)
 
     def get_nested_ids_from_result_dict(key: str, subkey: str) -> Optional[List]:
+        """ Extracts a list of nested IDs from a result dictionary.
+        
+        Parameters
+        ----------
+        key : str
+            Nested OpenML IDs.
+        subkey : str
+            The subkey contains the nested OpenML IDs.
+            
+        Returns
+        -------
+        Optional[List]
+            A list of nested OpenML IDs, or None if the key is not present in the dictionary.
+        """
         if result_dict.get(key) is not None:
             return [int(oml_id) for oml_id in result_dict[key][subkey]]
         return None
@@ -591,6 +605,20 @@ def _list_studies(output_format="dict", **kwargs) -> Union[Dict, pd.DataFrame]:
 
 
 def __list_studies(api_call, output_format="object") -> Union[Dict, pd.DataFrame]:
+    """ Retrieves the list of OpenML studies and returns it in a dictionary or a Pandas DataFrame.
+    
+    Parameters
+    ----------
+    api_call : str
+        The API call for retrieving the list of OpenML studies.
+    output_format : str in {"object", "dataframe"}
+        Format of the output, either 'object' for a dictionary or 'dataframe' for a Pandas DataFrame.
+        
+    Returns
+    -------
+    Union[Dict, pd.DataFrame]
+        A dictionary or Pandas DataFrame of OpenML studies, depending on the value of 'output_format'.
+    """
     xml_string = openml._api_calls._perform_api_call(api_call, "get")
     study_dict = xmltodict.parse(xml_string, force_list=("oml:study",))
 

From 43ed1522eecd2b6328907a7650d5368a8fb48948 Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Wed, 16 Aug 2023 19:22:57 +0530
Subject: [PATCH 09/48] Update functions.py

---
 openml/tasks/functions.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
index 00a8e822d..e0efb9c5e 100644
--- a/openml/tasks/functions.py
+++ b/openml/tasks/functions.py
@@ -230,6 +230,28 @@ def _list_tasks(task_type=None, output_format="dict", **kwargs):
 
 
 def __list_tasks(api_call, output_format="dict"):
+    """ Returns a dictionary or a Pandas DataFrame with information about OpenML tasks.
+    
+    Parameters
+    ----------
+    api_call : str
+        The API call specifying which tasks to return.
+    output_format : str in {"dict", "dataframe"}
+        Output format for the returned object.
+        
+    Returns
+    -------
+    Union[Dict, pd.DataFrame]
+        A dictionary or a Pandas DataFrame with information about OpenML tasks.
+        
+    Raises
+    ------
+    ValueError
+        If the XML returned by the OpenML API does not contain 'oml:tasks', '@xmlns:oml', or has an incorrect value for
+        '@xmlns:oml'.
+    KeyError
+        If an invalid key is found in the XML for a task.
+    """
     xml_string = openml._api_calls._perform_api_call(api_call, "get")
     tasks_dict = xmltodict.parse(xml_string, force_list=("oml:task", "oml:input"))
     # Minimalistic check if the XML is useful

From 6c5f9d4f3ecbe825f8ff722fb635108826d87b5f Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Wed, 16 Aug 2023 19:26:03 +0530
Subject: [PATCH 10/48] Update split.py

---
 openml/tasks/split.py | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/openml/tasks/split.py b/openml/tasks/split.py
index e47c6040a..bea7f9390 100644
--- a/openml/tasks/split.py
+++ b/openml/tasks/split.py
@@ -136,9 +136,48 @@ def _from_arff_file(cls, filename: str) -> "OpenMLSplit":
         return cls(name, "", repetitions)
 
     def from_dataset(self, X, Y, folds, repeats):
+        """ Generates a new OpenML dataset object from input data and cross-validation settings.
+        
+        Parameters
+        ----------
+        X : array-like or sparse matrix
+            The input feature matrix.
+        Y : array-like, shape
+            The target variable values.
+        folds : int
+            Number of cross-validation folds to generate.
+        repeats : int
+            Number of times to repeat the cross-validation process.
+            
+        Raises
+        ------
+        NotImplementedError
+            This method is not implemented yet.
+        """
         raise NotImplementedError()
 
     def get(self, repeat=0, fold=0, sample=0):
+        """ Returns the specified data split from the CrossValidationSplit object.
+        
+        Parameters
+        ----------
+        repeat : int
+            Index of the repeat to retrieve.
+        fold : int
+            Index of the fold to retrieve.
+        sample : int
+            Index of the sample to retrieve.
+            
+        Returns
+        -------
+        numpy.ndarray
+            The data split for the specified repeat, fold, and sample.
+            
+        Raises
+        ------
+        ValueError
+            If the specified repeat, fold, or sample is not known.
+        """
         if repeat not in self.split:
             raise ValueError("Repeat %s not known" % str(repeat))
         if fold not in self.split[repeat]:

From e4db820a1a19373f5b3e1d4dc06defbe911691b8 Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Wed, 16 Aug 2023 19:58:36 +0530
Subject: [PATCH 11/48] Update task.py

---
 openml/tasks/task.py | 146 +++++++++++++++++++++++++++++++++++++------
 1 file changed, 128 insertions(+), 18 deletions(-)

diff --git a/openml/tasks/task.py b/openml/tasks/task.py
index f8783c785..26b275572 100644
--- a/openml/tasks/task.py
+++ b/openml/tasks/task.py
@@ -36,24 +36,24 @@ class OpenMLTask(OpenMLBase):
 
     Parameters
     ----------
-    task_id : Union[int, None]
-        Refers to the unique identifier of a task.
-    task_type_id : TaskType
-        Refers to the type of task.
-    task_type : str
-        Refers to the task.
+    task_id: Union[int, None]
+        Refers to the unique identifier of OpenML task.
+    task_type_id: TaskType
+        Refers to the type of OpenML task.
+    task_type: str
+        Refers to the OpenML task.
     data_set_id: int
         Refers to the data.
     estimation_procedure_id: int
         Refers to the type of estimates used.
-    estimation_procedure_type : Optional[str] 
-        Refers to the type of estimation procedure used for the task.
-    estimation_parameters : Optional[Dict[str, str]]
-        Estimation parameters used for the task.
-    evaluation_measure : Optional[str]
+    estimation_procedure_type: str, default=None 
+        Refers to the type of estimation procedure used for the OpenML task.
+    estimation_parameters: [Dict[str, str]], default=None
+        Estimation parameters used for the OpenML task.
+    evaluation_measure: str, default=None
         Refers to the evaluation measure.
-    data_splits_url : Optional[str]
-        Refers to the URL of the data splits used for the task.
+    data_splits_url: str, default=None
+        Refers to the URL of the data splits used for the OpenML task.
     """
 
     def __init__(
@@ -216,8 +216,26 @@ class OpenMLSupervisedTask(OpenMLTask, ABC):
 
     Parameters
     ----------
+    task_type_id : TaskType
+        ID of the task type.
+    task_type : str
+        Name of the task type.
+    data_set_id : int
+        ID of the OpenML dataset associated with the task.
     target_name : str
         Name of the target feature (the class variable).
+    estimation_procedure_id : int, default=None
+        ID of the estimation procedure for the task.
+    estimation_procedure_type : str, default=None
+        Type of the estimation procedure for the task.
+    estimation_parameters : dict, default=None
+        Estimation parameters for the task.
+    evaluation_measure : str, default=None
+        Name of the evaluation measure for the task.
+    data_splits_url : str, default=None
+        URL of the data splits for the task.
+    task_id: Union[int, None]
+        Refers to the unique identifier of task.
     """
 
     def __init__(
@@ -319,8 +337,30 @@ class OpenMLClassificationTask(OpenMLSupervisedTask):
 
     Parameters
     ----------
-    class_labels : List of str (optional)
-    cost_matrix: array (optional)
+    task_type_id : TaskType
+        ID of the Classification task type.
+    task_type : str
+        Name of the Classification task type.
+    data_set_id : int
+        ID of the OpenML dataset associated with the Classification task.
+    target_name : str
+        Name of the target variable.
+    estimation_procedure_id : int, default=None
+        ID of the estimation procedure for the Classification task.
+    estimation_procedure_type : str, default=None
+        Type of the estimation procedure.
+    estimation_parameters : dict, default=None
+        Estimation parameters for the Classification task.
+    evaluation_measure : str, default=None
+        Name of the evaluation measure.
+    data_splits_url : str, default=None
+        URL of the data splits for the Classification task.
+    task_id : Union[int, None]
+        ID of the Classification task (if it already exists on OpenML).
+    class_labels : List of str, default=None
+        A list of class labels (for classification tasks).
+    cost_matrix : array, default=None
+        A cost matrix (for classification tasks).
     """
 
     def __init__(
@@ -358,7 +398,31 @@ def __init__(
 
 
 class OpenMLRegressionTask(OpenMLSupervisedTask):
-    """OpenML Regression object."""
+    """OpenML Regression object.
+    
+    Parameters
+    ----------
+    task_type_id : TaskType
+        Task type ID of the OpenML Regression task.
+    task_type : str
+        Task type of the OpenML Regression task.
+    data_set_id : int
+        ID of the OpenML dataset.
+    target_name : str
+        Name of the target feature used in the Regression task.
+    estimation_procedure_id : int, default=None
+        ID of the OpenML estimation procedure.
+    estimation_procedure_type : str, default=None
+        Type of the OpenML estimation procedure.
+    estimation_parameters : dict, default=None
+        Parameters used by the OpenML estimation procedure.
+    data_splits_url : str, default=None
+        URL of the OpenML data splits for the Regression task.
+    task_id : Union[int, None]
+        ID of the OpenML Regression task.
+    evaluation_measure : str, default=None
+        Evaluation measure used in the Regression task.
+    """
 
     def __init__(
         self,
@@ -392,7 +456,25 @@ class OpenMLClusteringTask(OpenMLTask):
 
     Parameters
     ----------
-    target_name : str (optional)
+    task_type_id : TaskType
+        Task type ID of the OpenML clustering task.
+    task_type : str
+        Task type of the OpenML clustering task.
+    data_set_id : int
+        ID of the OpenML dataset used in clustering the task.
+    estimation_procedure_id : int, default=None
+        ID of the OpenML estimation procedure.
+    task_id : Union[int, None]
+        ID of the OpenML clustering task.
+    estimation_procedure_type : str, default=None
+        Type of the OpenML estimation procedure used in the clustering task.
+    estimation_parameters : dict, default=None
+        Parameters used by the OpenML estimation procedure.
+    data_splits_url : str, default=None
+        URL of the OpenML data splits for the clustering task.
+    evaluation_measure : str, default=None
+        Evaluation measure used in the clustering task.
+    target_name : str, default=None
         Name of the target feature (class) that is not part of the
         feature set for the clustering task.
     """
@@ -469,7 +551,35 @@ def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
 
 
 class OpenMLLearningCurveTask(OpenMLClassificationTask):
-    """OpenML Learning Curve object."""
+     """OpenML Learning Curve object.
+     
+    Parameters
+    ----------
+    task_type_id : TaskType
+        ID of the Learning Curve task.
+    task_type : str
+        Name of the Learning Curve task.
+    data_set_id : int
+        ID of the dataset that this task is associated with.
+    target_name : str
+        Name of the target feature in the dataset.
+    estimation_procedure_id : int, default=None
+        ID of the estimation procedure to use for evaluating models.
+    estimation_procedure_type : str, default=None
+        Type of the estimation procedure.
+    estimation_parameters : dict, default=None
+        Additional parameters for the estimation procedure.
+    data_splits_url : str, default=None
+        URL of the file containing the data splits for Learning Curve task.
+    task_id : Union[int, None]
+        ID of the Learning Curve task.
+    evaluation_measure : str, default=None
+        Name of the evaluation measure to use for evaluating models.
+    class_labels : list of str, default=None
+        Class labels for Learning Curve tasks.
+    cost_matrix : numpy array, default=None
+        Cost matrix for Learning Curve tasks.
+    """
 
     def __init__(
         self,

From 994854103176f392e52e18a48cc425c07dc0c21c Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Fri, 18 Aug 2023 16:37:40 +0530
Subject: [PATCH 12/48] Update openml/flows/flow.py

Co-authored-by: Lennart Purucker <contact@lennart-purucker.com>
---
 openml/flows/flow.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openml/flows/flow.py b/openml/flows/flow.py
index f1c5935e8..70ecaa3a3 100644
--- a/openml/flows/flow.py
+++ b/openml/flows/flow.py
@@ -523,7 +523,7 @@ def get_subflow(self, structure):
 
 
 def _copy_server_fields(source_flow, target_flow):
-    """ Recursively copies the fields added by the server from the `source_flow` to the `target_flow`.
+    """Recursively copies the fields added by the server from the `source_flow` to the `target_flow`.
     
     Parameters
     ----------

From 1f7979344b9d2271e72044b7b2a4c139c3356264 Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Fri, 18 Aug 2023 16:40:57 +0530
Subject: [PATCH 13/48] Update openml/flows/flow.py

Co-authored-by: Lennart Purucker <contact@lennart-purucker.com>
---
 openml/flows/flow.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openml/flows/flow.py b/openml/flows/flow.py
index 70ecaa3a3..42e5948b7 100644
--- a/openml/flows/flow.py
+++ b/openml/flows/flow.py
@@ -545,7 +545,7 @@ def _copy_server_fields(source_flow, target_flow):
 
 
 def _add_if_nonempty(dic, key, value):
-    """ Adds a key-value pair to a dictionary if the value is not None.
+    """Adds a key-value pair to a dictionary if the value is not None.
      
     Parameters
     ----------

From bb0077c12f02a7255d3c32bc5c01fce5080895e2 Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Fri, 18 Aug 2023 16:41:09 +0530
Subject: [PATCH 14/48] Update openml/flows/functions.py

Co-authored-by: Lennart Purucker <contact@lennart-purucker.com>
---
 openml/flows/functions.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/openml/flows/functions.py b/openml/flows/functions.py
index 11b26d367..3bb044f58 100644
--- a/openml/flows/functions.py
+++ b/openml/flows/functions.py
@@ -337,8 +337,8 @@ def get_flow_id(
 
 
 def __list_flows(api_call: str, output_format: str = "dict") -> Union[Dict, pd.DataFrame]:
-    """
-    Retrieve information about flows from OpenML API and parse it to a dictionary or a Pandas DataFrame.
+    """Retrieve information about flows from OpenML API and parse it to a dictionary or a Pandas DataFrame.
+    
     Parameters
     ----------
     api_call: str

From e986733d8f3d12cccb0f5a47fe44860c317f51f3 Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Fri, 18 Aug 2023 16:41:20 +0530
Subject: [PATCH 15/48] Update openml/extensions/sklearn/extension.py

Co-authored-by: Lennart Purucker <contact@lennart-purucker.com>
---
 openml/extensions/sklearn/extension.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
index 98c0a2e6d..2da49eb72 100644
--- a/openml/extensions/sklearn/extension.py
+++ b/openml/extensions/sklearn/extension.py
@@ -2103,17 +2103,17 @@ def instantiate_model_from_hpo_class(
     def _extract_trace_data(self, model, rep_no, fold_no):
         """Extracts data from a machine learning model's cross-validation results and creates an ARFF (Attribute-Relation File Format) trace.
         
-            Parameters
-            ----------
-            model : Any
-                A fitted hyperparameter optimization model.
-            rep_no : int
-                The repetition number.
-            fold_no : int
-                The fold number.
-            Returns
-            -------
-            A list of ARFF tracecontent.
+        Parameters
+        ----------
+        model : Any
+            A fitted hyperparameter optimization model.
+        rep_no : int
+            The repetition number.
+        fold_no : int
+            The fold number.
+        Returns
+        -------
+        A list of ARFF tracecontent.
         """
         arff_tracecontent = []
         for itt_no in range(0, len(model.cv_results_["mean_test_score"])):

From 426e5fbc99094a103a0b52148455d50c25f510b4 Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Fri, 18 Aug 2023 16:41:32 +0530
Subject: [PATCH 16/48] Update openml/flows/functions.py

Co-authored-by: Lennart Purucker <contact@lennart-purucker.com>
---
 openml/flows/functions.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/openml/flows/functions.py b/openml/flows/functions.py
index 3bb044f58..a84238f36 100644
--- a/openml/flows/functions.py
+++ b/openml/flows/functions.py
@@ -346,6 +346,7 @@ def __list_flows(api_call: str, output_format: str = "dict") -> Union[Dict, pd.D
     output_format: str in {"dict", "dataframe"}
         The output format.
     Returns
+    
     -------
         The flows information in the specified output format.
     """

From 581d47ae0e593b2ab57021677603ff56d631c6d8 Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Fri, 18 Aug 2023 16:41:45 +0530
Subject: [PATCH 17/48] Update openml/runs/functions.py

Co-authored-by: Lennart Purucker <contact@lennart-purucker.com>
---
 openml/runs/functions.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index 5aaca77d7..b3adcecdf 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -128,11 +128,13 @@ def run_model_on_task(
     flow = extension.model_to_flow(model)
 
     def get_task_and_type_conversion(task: Union[int, str, OpenMLTask]) -> OpenMLTask:
-        """ Retrieve an OpenMLTask object from either an integer or string ID, or directly from an OpenMLTask object.
+        """Retrieve an OpenMLTask object from either an integer or string ID, or directly from an OpenMLTask object.
+        
         Parameters
         ----------
         task : Union[int, str, OpenMLTask]
             The task ID or the OpenMLTask object.
+        
         Returns
         -------
         OpenMLTask

From 31d6e039fc638cf8d5f62346e275ff06003c016f Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Fri, 18 Aug 2023 16:42:03 +0530
Subject: [PATCH 18/48] Update openml/runs/functions.py

Co-authored-by: Lennart Purucker <contact@lennart-purucker.com>
---
 openml/runs/functions.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index b3adcecdf..23a01838c 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -463,7 +463,8 @@ def _run_task_get_arffcontent(
     "OrderedDict[str, OrderedDict]",
     "OrderedDict[str, OrderedDict]",
 ]:
-    """ Runs the hyperparameter optimization on the given task and returns the arfftrace content.
+    """Runs the hyperparameter optimization on the given task and returns the arfftrace content.
+    
     Parameters
     ----------
     model : Any

From 4dfe6fe24ee73510b86473d2dad867145eba52c7 Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Fri, 18 Aug 2023 16:42:18 +0530
Subject: [PATCH 19/48] Update openml/runs/functions.py

Co-authored-by: Lennart Purucker <contact@lennart-purucker.com>
---
 openml/runs/functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index 23a01838c..645663662 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -670,7 +670,7 @@ def _run_task_get_arffcontent_parallel_helper(
     Optional[OpenMLRunTrace],
     "OrderedDict[str, float]",
 ]:
-    """ Helper function that runs a single model on a single task fold sample.
+    """Helper function that runs a single model on a single task fold sample.
     
     Parameters
     ----------

From 3c29b1d5ef06b136099680b7c4800857fa4d6e4f Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Fri, 18 Aug 2023 16:42:33 +0530
Subject: [PATCH 20/48] Update openml/runs/trace.py

Co-authored-by: Lennart Purucker <contact@lennart-purucker.com>
---
 openml/runs/trace.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/openml/runs/trace.py b/openml/runs/trace.py
index 85fd55334..5a0992f82 100644
--- a/openml/runs/trace.py
+++ b/openml/runs/trace.py
@@ -34,8 +34,7 @@ class OpenMLRunTrace(object):
     """
 
     def __init__(self, run_id: int, trace_iterations: List[List]):
-        """
-        Object to hold the trace content of a run.
+        """Object to hold the trace content of a run.
         
         Parameters
         ----------

From bf07329882597541c517367bae81e40e44ca7af2 Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Fri, 18 Aug 2023 16:42:45 +0530
Subject: [PATCH 21/48] Update openml/runs/trace.py

Co-authored-by: Lennart Purucker <contact@lennart-purucker.com>
---
 openml/runs/trace.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openml/runs/trace.py b/openml/runs/trace.py
index 5a0992f82..28171900f 100644
--- a/openml/runs/trace.py
+++ b/openml/runs/trace.py
@@ -237,7 +237,7 @@ def trace_from_arff(cls, arff_obj):
 
     @classmethod
     def _trace_from_arff_struct(cls, attributes, content, error_message):
-        """ Generate a trace dictionary from ARFF structure.
+        """Generate a trace dictionary from ARFF structure.
         
         Parameters
         ----------

From 711303a4729645d6883cf2d6ed18862dae94e4cc Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Fri, 18 Aug 2023 16:42:57 +0530
Subject: [PATCH 22/48] Update openml/setups/functions.py

Co-authored-by: Lennart Purucker <contact@lennart-purucker.com>
---
 openml/setups/functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openml/setups/functions.py b/openml/setups/functions.py
index 069c3c38e..76cce7014 100644
--- a/openml/setups/functions.py
+++ b/openml/setups/functions.py
@@ -288,7 +288,7 @@ def initialize_model(setup_id: int) -> Any:
 
 
 def _to_dict(flow_id: int, openml_parameter_settings):
-    """ Convert a flow ID and a list of OpenML parameter settings to a dictionary representation that can be serialized to XML.
+    """Convert a flow ID and a list of OpenML parameter settings to a dictionary representation that can be serialized to XML.
     
     Parameters
     ----------

From ad8aec23f70fa6b2b54c2cf2bb194e4fae6a35bf Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Fri, 18 Aug 2023 16:43:09 +0530
Subject: [PATCH 23/48] Update openml/study/functions.py

Co-authored-by: Lennart Purucker <contact@lennart-purucker.com>
---
 openml/study/functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openml/study/functions.py b/openml/study/functions.py
index d84f12580..461b96ba5 100644
--- a/openml/study/functions.py
+++ b/openml/study/functions.py
@@ -107,7 +107,7 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
             tags.append(current_tag)
 
     def get_nested_ids_from_result_dict(key: str, subkey: str) -> Optional[List]:
-        """ Extracts a list of nested IDs from a result dictionary.
+        """Extracts a list of nested IDs from a result dictionary.
         
         Parameters
         ----------

From 2150d64e94a77dae4a30ad5938753465528419ba Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Fri, 18 Aug 2023 16:43:20 +0530
Subject: [PATCH 24/48] Update openml/study/functions.py

Co-authored-by: Lennart Purucker <contact@lennart-purucker.com>
---
 openml/study/functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openml/study/functions.py b/openml/study/functions.py
index 461b96ba5..cf9587a45 100644
--- a/openml/study/functions.py
+++ b/openml/study/functions.py
@@ -605,7 +605,7 @@ def _list_studies(output_format="dict", **kwargs) -> Union[Dict, pd.DataFrame]:
 
 
 def __list_studies(api_call, output_format="object") -> Union[Dict, pd.DataFrame]:
-    """ Retrieves the list of OpenML studies and returns it in a dictionary or a Pandas DataFrame.
+    """Retrieves the list of OpenML studies and returns it in a dictionary or a Pandas DataFrame.
     
     Parameters
     ----------

From f9fee1eccec7355f19ad483ab4b456b9f7fbfc97 Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Fri, 18 Aug 2023 16:43:28 +0530
Subject: [PATCH 25/48] Update openml/tasks/functions.py

Co-authored-by: Lennart Purucker <contact@lennart-purucker.com>
---
 openml/tasks/functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
index e0efb9c5e..2a327ddb0 100644
--- a/openml/tasks/functions.py
+++ b/openml/tasks/functions.py
@@ -230,7 +230,7 @@ def _list_tasks(task_type=None, output_format="dict", **kwargs):
 
 
 def __list_tasks(api_call, output_format="dict"):
-    """ Returns a dictionary or a Pandas DataFrame with information about OpenML tasks.
+    """Returns a dictionary or a Pandas DataFrame with information about OpenML tasks.
     
     Parameters
     ----------

From 3776710ac36c074e3b3940e58bc5e984b152babf Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Fri, 18 Aug 2023 16:43:37 +0530
Subject: [PATCH 26/48] Update openml/tasks/split.py

Co-authored-by: Lennart Purucker <contact@lennart-purucker.com>
---
 openml/tasks/split.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openml/tasks/split.py b/openml/tasks/split.py
index bea7f9390..655126d3a 100644
--- a/openml/tasks/split.py
+++ b/openml/tasks/split.py
@@ -136,7 +136,7 @@ def _from_arff_file(cls, filename: str) -> "OpenMLSplit":
         return cls(name, "", repetitions)
 
     def from_dataset(self, X, Y, folds, repeats):
-        """ Generates a new OpenML dataset object from input data and cross-validation settings.
+        """Generates a new OpenML dataset object from input data and cross-validation settings.
         
         Parameters
         ----------

From 091335da207be1c36ef31dd8dcb57fa1be346148 Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Fri, 18 Aug 2023 16:43:43 +0530
Subject: [PATCH 27/48] Update openml/tasks/split.py

Co-authored-by: Lennart Purucker <contact@lennart-purucker.com>
---
 openml/tasks/split.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openml/tasks/split.py b/openml/tasks/split.py
index 655126d3a..2df3aad72 100644
--- a/openml/tasks/split.py
+++ b/openml/tasks/split.py
@@ -157,7 +157,7 @@ def from_dataset(self, X, Y, folds, repeats):
         raise NotImplementedError()
 
     def get(self, repeat=0, fold=0, sample=0):
-        """ Returns the specified data split from the CrossValidationSplit object.
+        """Returns the specified data split from the CrossValidationSplit object.
         
         Parameters
         ----------

From 2f9b2ad80e13b4e930e75d5877fb8207817b850b Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Mon, 21 Aug 2023 10:11:47 +0530
Subject: [PATCH 28/48] Update openml/tasks/task.py


From 571951f18a67c6ec58f3649bec5acf4f22b56458 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 21 Aug 2023 04:42:21 +0000
Subject: [PATCH 29/48] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 openml/extensions/sklearn/extension.py |  2 +-
 openml/flows/flow.py                   |  4 ++--
 openml/flows/functions.py              |  4 ++--
 openml/runs/functions.py               | 16 ++++++++--------
 openml/runs/trace.py                   | 12 ++++++------
 openml/setups/functions.py             | 12 ++++++------
 openml/study/functions.py              |  8 ++++----
 openml/tasks/functions.py              |  6 +++---
 openml/tasks/split.py                  | 10 +++++-----
 9 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
index 2da49eb72..d27b29293 100644
--- a/openml/extensions/sklearn/extension.py
+++ b/openml/extensions/sklearn/extension.py
@@ -2102,7 +2102,7 @@ def instantiate_model_from_hpo_class(
 
     def _extract_trace_data(self, model, rep_no, fold_no):
         """Extracts data from a machine learning model's cross-validation results and creates an ARFF (Attribute-Relation File Format) trace.
-        
+
         Parameters
         ----------
         model : Any
diff --git a/openml/flows/flow.py b/openml/flows/flow.py
index 42e5948b7..6b011679d 100644
--- a/openml/flows/flow.py
+++ b/openml/flows/flow.py
@@ -524,7 +524,7 @@ def get_subflow(self, structure):
 
 def _copy_server_fields(source_flow, target_flow):
     """Recursively copies the fields added by the server from the `source_flow` to the `target_flow`.
-    
+
     Parameters
     ----------
     source_flow : OpenMLFlow
@@ -546,7 +546,7 @@ def _copy_server_fields(source_flow, target_flow):
 
 def _add_if_nonempty(dic, key, value):
     """Adds a key-value pair to a dictionary if the value is not None.
-     
+
     Parameters
     ----------
     dic: dict
diff --git a/openml/flows/functions.py b/openml/flows/functions.py
index a84238f36..0c3efd39c 100644
--- a/openml/flows/functions.py
+++ b/openml/flows/functions.py
@@ -338,7 +338,7 @@ def get_flow_id(
 
 def __list_flows(api_call: str, output_format: str = "dict") -> Union[Dict, pd.DataFrame]:
     """Retrieve information about flows from OpenML API and parse it to a dictionary or a Pandas DataFrame.
-    
+
     Parameters
     ----------
     api_call: str
@@ -346,7 +346,7 @@ def __list_flows(api_call: str, output_format: str = "dict") -> Union[Dict, pd.D
     output_format: str in {"dict", "dataframe"}
         The output format.
     Returns
-    
+
     -------
         The flows information in the specified output format.
     """
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index 645663662..0acbb1fdd 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -129,12 +129,12 @@ def run_model_on_task(
 
     def get_task_and_type_conversion(task: Union[int, str, OpenMLTask]) -> OpenMLTask:
         """Retrieve an OpenMLTask object from either an integer or string ID, or directly from an OpenMLTask object.
-        
+
         Parameters
         ----------
         task : Union[int, str, OpenMLTask]
             The task ID or the OpenMLTask object.
-        
+
         Returns
         -------
         OpenMLTask
@@ -464,7 +464,7 @@ def _run_task_get_arffcontent(
     "OrderedDict[str, OrderedDict]",
 ]:
     """Runs the hyperparameter optimization on the given task and returns the arfftrace content.
-    
+
     Parameters
     ----------
     model : Any
@@ -479,7 +479,7 @@ def _run_task_get_arffcontent(
         The format in which to download the dataset.
     n_jobs : int
         Number of jobs to run in parallel. If None, use 1 core by default. If -1, use all available cores.
-        
+
     Returns
     -------
     Tuple[List[List], Optional[OpenMLRunTrace], OrderedDict[str, OrderedDict], OrderedDict[str, OrderedDict]]
@@ -671,7 +671,7 @@ def _run_task_get_arffcontent_parallel_helper(
     "OrderedDict[str, float]",
 ]:
     """Helper function that runs a single model on a single task fold sample.
-    
+
     Parameters
     ----------
     extension : Extension
@@ -690,13 +690,13 @@ def _run_task_get_arffcontent_parallel_helper(
         The dataset format to be used.
     configuration : Dict
         Hyperparameters to configure the model.
-        
+
     Returns
     -------
     Tuple[np.ndarray, Optional[pd.DataFrame], np.ndarray, Optional[pd.DataFrame],
            Optional[OpenMLRunTrace], OrderedDict[str, float]]
-    A tuple containing the predictions, probability estimates (if applicable), 
-    actual target values, actual target value probabilities (if applicable), 
+    A tuple containing the predictions, probability estimates (if applicable),
+    actual target values, actual target value probabilities (if applicable),
     the trace object of the OpenML run (if applicable), and a dictionary of local measures for this particular fold.
     """
     # Sets up the OpenML instantiated in the child process to match that of the parent's
diff --git a/openml/runs/trace.py b/openml/runs/trace.py
index 28171900f..1f3808255 100644
--- a/openml/runs/trace.py
+++ b/openml/runs/trace.py
@@ -35,7 +35,7 @@ class OpenMLRunTrace(object):
 
     def __init__(self, run_id: int, trace_iterations: List[List]):
         """Object to hold the trace content of a run.
-        
+
         Parameters
         ----------
         run_id : int
@@ -238,7 +238,7 @@ def trace_from_arff(cls, arff_obj):
     @classmethod
     def _trace_from_arff_struct(cls, attributes, content, error_message):
         """Generate a trace dictionary from ARFF structure.
-        
+
         Parameters
         ----------
         cls : type
@@ -249,7 +249,7 @@ def _trace_from_arff_struct(cls, attributes, content, error_message):
             List of instances.
         error_message : str
             Error message to raise if `setup_string` is in `attributes`.
-            
+
         Returns
         -------
         OrderedDict
@@ -373,19 +373,19 @@ def trace_from_xml(cls, xml):
     @classmethod
     def merge_traces(cls, traces: List["OpenMLRunTrace"]) -> "OpenMLRunTrace":
         """Merge multiple traces into a single trace.
-        
+
         Parameters
         ----------
         cls : type
             Type of the trace object to be created.
         traces : List[OpenMLRunTrace]
             List of traces to merge.
-            
+
         Returns
         -------
         OpenMLRunTrace
             A trace object representing the merged traces.
-            
+
         Raises
         ------
         ValueError
diff --git a/openml/setups/functions.py b/openml/setups/functions.py
index 76cce7014..3580de81a 100644
--- a/openml/setups/functions.py
+++ b/openml/setups/functions.py
@@ -62,17 +62,17 @@ def setup_exists(flow) -> int:
 
 def _get_cached_setup(setup_id: int):
     """Load a run from the cache.
-    
+
     Parameters
     ----------
     setup_id : int
         ID of the setup to be loaded.
-        
+
     Returns
     -------
     OpenMLSetup
         The loaded setup object.
-        
+
     Raises
     ------
     OpenMLCacheException
@@ -289,14 +289,14 @@ def initialize_model(setup_id: int) -> Any:
 
 def _to_dict(flow_id: int, openml_parameter_settings):
     """Convert a flow ID and a list of OpenML parameter settings to a dictionary representation that can be serialized to XML.
-    
+
     Parameters
     ----------
     flow_id : int
         ID of the flow.
     openml_parameter_settings : List[OpenMLParameter]
         A list of OpenML parameter settings.
-        
+
     Returns
     -------
     OrderedDict
@@ -350,7 +350,7 @@ def _create_setup_from_xml(result_dict, output_format="object"):
 
 def _create_setup_parameter_from_xml(result_dict, output_format="object"):
     """
-        Create an OpenMLParameter object or a dictionary from an API xml result.
+    Create an OpenMLParameter object or a dictionary from an API xml result.
     """
     if output_format == "object":
         return OpenMLParameter(
diff --git a/openml/study/functions.py b/openml/study/functions.py
index cf9587a45..f3d19218e 100644
--- a/openml/study/functions.py
+++ b/openml/study/functions.py
@@ -108,14 +108,14 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
 
     def get_nested_ids_from_result_dict(key: str, subkey: str) -> Optional[List]:
         """Extracts a list of nested IDs from a result dictionary.
-        
+
         Parameters
         ----------
         key : str
             Nested OpenML IDs.
         subkey : str
             The subkey contains the nested OpenML IDs.
-            
+
         Returns
         -------
         Optional[List]
@@ -606,14 +606,14 @@ def _list_studies(output_format="dict", **kwargs) -> Union[Dict, pd.DataFrame]:
 
 def __list_studies(api_call, output_format="object") -> Union[Dict, pd.DataFrame]:
     """Retrieves the list of OpenML studies and returns it in a dictionary or a Pandas DataFrame.
-    
+
     Parameters
     ----------
     api_call : str
         The API call for retrieving the list of OpenML studies.
     output_format : str in {"object", "dataframe"}
         Format of the output, either 'object' for a dictionary or 'dataframe' for a Pandas DataFrame.
-        
+
     Returns
     -------
     Union[Dict, pd.DataFrame]
diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
index 2a327ddb0..d54bc4b42 100644
--- a/openml/tasks/functions.py
+++ b/openml/tasks/functions.py
@@ -231,19 +231,19 @@ def _list_tasks(task_type=None, output_format="dict", **kwargs):
 
 def __list_tasks(api_call, output_format="dict"):
     """Returns a dictionary or a Pandas DataFrame with information about OpenML tasks.
-    
+
     Parameters
     ----------
     api_call : str
         The API call specifying which tasks to return.
     output_format : str in {"dict", "dataframe"}
         Output format for the returned object.
-        
+
     Returns
     -------
     Union[Dict, pd.DataFrame]
         A dictionary or a Pandas DataFrame with information about OpenML tasks.
-        
+
     Raises
     ------
     ValueError
diff --git a/openml/tasks/split.py b/openml/tasks/split.py
index 2df3aad72..8112ba41b 100644
--- a/openml/tasks/split.py
+++ b/openml/tasks/split.py
@@ -137,7 +137,7 @@ def _from_arff_file(cls, filename: str) -> "OpenMLSplit":
 
     def from_dataset(self, X, Y, folds, repeats):
         """Generates a new OpenML dataset object from input data and cross-validation settings.
-        
+
         Parameters
         ----------
         X : array-like or sparse matrix
@@ -148,7 +148,7 @@ def from_dataset(self, X, Y, folds, repeats):
             Number of cross-validation folds to generate.
         repeats : int
             Number of times to repeat the cross-validation process.
-            
+
         Raises
         ------
         NotImplementedError
@@ -158,7 +158,7 @@ def from_dataset(self, X, Y, folds, repeats):
 
     def get(self, repeat=0, fold=0, sample=0):
         """Returns the specified data split from the CrossValidationSplit object.
-        
+
         Parameters
         ----------
         repeat : int
@@ -167,12 +167,12 @@ def get(self, repeat=0, fold=0, sample=0):
             Index of the fold to retrieve.
         sample : int
             Index of the sample to retrieve.
-            
+
         Returns
         -------
         numpy.ndarray
             The data split for the specified repeat, fold, and sample.
-            
+
         Raises
         ------
         ValueError

From 125ea1d74c5c29465be008c16a241154a300af30 Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Mon, 21 Aug 2023 10:42:36 +0530
Subject: [PATCH 30/48] Update openml/tasks/task.py

---
 openml/tasks/task.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/openml/tasks/task.py b/openml/tasks/task.py
index 26b275572..b4449a030 100644
--- a/openml/tasks/task.py
+++ b/openml/tasks/task.py
@@ -551,8 +551,8 @@ def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
 
 
 class OpenMLLearningCurveTask(OpenMLClassificationTask):
-     """OpenML Learning Curve object.
-     
+    """OpenML Learning Curve object.
+
     Parameters
     ----------
     task_type_id : TaskType

From 7fa83591855ce4bad1c6bafff91d265c8a46340b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 21 Aug 2023 05:12:52 +0000
Subject: [PATCH 31/48] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 openml/tasks/task.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/openml/tasks/task.py b/openml/tasks/task.py
index b4449a030..f205bd926 100644
--- a/openml/tasks/task.py
+++ b/openml/tasks/task.py
@@ -46,7 +46,7 @@ class OpenMLTask(OpenMLBase):
         Refers to the data.
     estimation_procedure_id: int
         Refers to the type of estimates used.
-    estimation_procedure_type: str, default=None 
+    estimation_procedure_type: str, default=None
         Refers to the type of estimation procedure used for the OpenML task.
     estimation_parameters: [Dict[str, str]], default=None
         Estimation parameters used for the OpenML task.
@@ -399,7 +399,7 @@ def __init__(
 
 class OpenMLRegressionTask(OpenMLSupervisedTask):
     """OpenML Regression object.
-    
+
     Parameters
     ----------
     task_type_id : TaskType

From 08055e7372e9723e3789e5415d266b9513c6c3b6 Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Tue, 19 Sep 2023 11:58:11 +0530
Subject: [PATCH 32/48] Update openml/extensions/sklearn/extension.py

---
 openml/extensions/sklearn/extension.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
index d27b29293..065792c5a 100644
--- a/openml/extensions/sklearn/extension.py
+++ b/openml/extensions/sklearn/extension.py
@@ -2101,7 +2101,8 @@ def instantiate_model_from_hpo_class(
         return base_estimator
 
     def _extract_trace_data(self, model, rep_no, fold_no):
-        """Extracts data from a machine learning model's cross-validation results and creates an ARFF (Attribute-Relation File Format) trace.
+        """Extracts data from a machine learning model's cross-validation results 
+        and creates an ARFF (Attribute-Relation File Format) trace.
 
         Parameters
         ----------

From 0d272b6a88ab308d0a71608a2b7309ed4766828f Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Tue, 19 Sep 2023 11:59:30 +0530
Subject: [PATCH 33/48] Update openml/flows/flow.py

---
 openml/flows/flow.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/openml/flows/flow.py b/openml/flows/flow.py
index 6b011679d..52b813d36 100644
--- a/openml/flows/flow.py
+++ b/openml/flows/flow.py
@@ -523,7 +523,8 @@ def get_subflow(self, structure):
 
 
 def _copy_server_fields(source_flow, target_flow):
-    """Recursively copies the fields added by the server from the `source_flow` to the `target_flow`.
+    """Recursively copies the fields added by the server 
+    from the `source_flow` to the `target_flow`.
 
     Parameters
     ----------

From de5fa6c516da313cda57a0336d30c32f7ccde7e0 Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Tue, 19 Sep 2023 12:00:24 +0530
Subject: [PATCH 34/48] Update openml/flows/functions.py

---
 openml/flows/functions.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/openml/flows/functions.py b/openml/flows/functions.py
index 0c3efd39c..f07f78e17 100644
--- a/openml/flows/functions.py
+++ b/openml/flows/functions.py
@@ -337,7 +337,8 @@ def get_flow_id(
 
 
 def __list_flows(api_call: str, output_format: str = "dict") -> Union[Dict, pd.DataFrame]:
-    """Retrieve information about flows from OpenML API and parse it to a dictionary or a Pandas DataFrame.
+    """Retrieve information about flows from OpenML API 
+    and parse it to a dictionary or a Pandas DataFrame.
 
     Parameters
     ----------

From 35353871a44fc9faef469f96d6bca78233081c48 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 19 Sep 2023 06:31:05 +0000
Subject: [PATCH 35/48] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 openml/extensions/sklearn/extension.py | 2 +-
 openml/flows/flow.py                   | 2 +-
 openml/flows/functions.py              | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
index 065792c5a..4c7a8912d 100644
--- a/openml/extensions/sklearn/extension.py
+++ b/openml/extensions/sklearn/extension.py
@@ -2101,7 +2101,7 @@ def instantiate_model_from_hpo_class(
         return base_estimator
 
     def _extract_trace_data(self, model, rep_no, fold_no):
-        """Extracts data from a machine learning model's cross-validation results 
+        """Extracts data from a machine learning model's cross-validation results
         and creates an ARFF (Attribute-Relation File Format) trace.
 
         Parameters
diff --git a/openml/flows/flow.py b/openml/flows/flow.py
index 52b813d36..4831eb6a7 100644
--- a/openml/flows/flow.py
+++ b/openml/flows/flow.py
@@ -523,7 +523,7 @@ def get_subflow(self, structure):
 
 
 def _copy_server_fields(source_flow, target_flow):
-    """Recursively copies the fields added by the server 
+    """Recursively copies the fields added by the server
     from the `source_flow` to the `target_flow`.
 
     Parameters
diff --git a/openml/flows/functions.py b/openml/flows/functions.py
index f07f78e17..45eea42dc 100644
--- a/openml/flows/functions.py
+++ b/openml/flows/functions.py
@@ -337,7 +337,7 @@ def get_flow_id(
 
 
 def __list_flows(api_call: str, output_format: str = "dict") -> Union[Dict, pd.DataFrame]:
-    """Retrieve information about flows from OpenML API 
+    """Retrieve information about flows from OpenML API
     and parse it to a dictionary or a Pandas DataFrame.
 
     Parameters

From 4cc0abf94be3626b759641228f8b7db3ffe1e473 Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Tue, 19 Sep 2023 12:01:30 +0530
Subject: [PATCH 36/48] Update openml/runs/functions.py

---
 openml/runs/functions.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index 0acbb1fdd..4251c7a49 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -128,7 +128,8 @@ def run_model_on_task(
     flow = extension.model_to_flow(model)
 
     def get_task_and_type_conversion(task: Union[int, str, OpenMLTask]) -> OpenMLTask:
-        """Retrieve an OpenMLTask object from either an integer or string ID, or directly from an OpenMLTask object.
+        """Retrieve an OpenMLTask object from either an integer or string ID,
+        or directly from an OpenMLTask object.
 
         Parameters
         ----------

From 554215fb702d887aeec5bf59b8cde974273db380 Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Tue, 19 Sep 2023 12:09:39 +0530
Subject: [PATCH 37/48] Update openml/runs/functions.py

---
 openml/runs/functions.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index 4251c7a49..6590444cb 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -464,7 +464,8 @@ def _run_task_get_arffcontent(
     "OrderedDict[str, OrderedDict]",
     "OrderedDict[str, OrderedDict]",
 ]:
-    """Runs the hyperparameter optimization on the given task and returns the arfftrace content.
+    """Runs the hyperparameter optimization on the given task
+    and returns the arfftrace content.
 
     Parameters
     ----------
@@ -479,12 +480,15 @@ def _run_task_get_arffcontent(
     dataset_format : str
         The format in which to download the dataset.
     n_jobs : int
-        Number of jobs to run in parallel. If None, use 1 core by default. If -1, use all available cores.
+        Number of jobs to run in parallel.
+        If None, use 1 core by default. If -1, use all available cores.
 
     Returns
     -------
-    Tuple[List[List], Optional[OpenMLRunTrace], OrderedDict[str, OrderedDict], OrderedDict[str, OrderedDict]]
-        A tuple containing the arfftrace content, the OpenML run trace, the global and local evaluation measures.
+    Tuple[List[List], Optional[OpenMLRunTrace], 
+        OrderedDict[str, OrderedDict], OrderedDict[str, OrderedDict]]
+    A tuple containing the arfftrace content,
+    the OpenML run trace, the global and local evaluation measures.
     """
     arff_datacontent = []  # type: List[List]
     traces = []  # type: List[OpenMLRunTrace]
@@ -698,7 +702,8 @@ def _run_task_get_arffcontent_parallel_helper(
            Optional[OpenMLRunTrace], OrderedDict[str, float]]
     A tuple containing the predictions, probability estimates (if applicable),
     actual target values, actual target value probabilities (if applicable),
-    the trace object of the OpenML run (if applicable), and a dictionary of local measures for this particular fold.
+    the trace object of the OpenML run (if applicable),
+    and a dictionary of local measures for this particular fold.
     """
     # Sets up the OpenML instantiated in the child process to match that of the parent's
     # if configuration=None, loads the default

From 4e1765eec22b923805eb2e0ccc753021a88901bd Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 19 Sep 2023 06:39:58 +0000
Subject: [PATCH 38/48] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 openml/runs/functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index 6590444cb..5e31ed370 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -485,7 +485,7 @@ def _run_task_get_arffcontent(
 
     Returns
     -------
-    Tuple[List[List], Optional[OpenMLRunTrace], 
+    Tuple[List[List], Optional[OpenMLRunTrace],
         OrderedDict[str, OrderedDict], OrderedDict[str, OrderedDict]]
     A tuple containing the arfftrace content,
     the OpenML run trace, the global and local evaluation measures.

From f97655536692798735a1c5ef1781f044a0bf5226 Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Tue, 19 Sep 2023 12:10:47 +0530
Subject: [PATCH 39/48] Update openml/setups/functions.py

---
 openml/setups/functions.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/openml/setups/functions.py b/openml/setups/functions.py
index 3580de81a..e26770a03 100644
--- a/openml/setups/functions.py
+++ b/openml/setups/functions.py
@@ -288,7 +288,8 @@ def initialize_model(setup_id: int) -> Any:
 
 
 def _to_dict(flow_id: int, openml_parameter_settings):
-    """Convert a flow ID and a list of OpenML parameter settings to a dictionary representation that can be serialized to XML.
+    """Convert a flow ID and a list of OpenML parameter settings to 
+    a dictionary representation that can be serialized to XML.
 
     Parameters
     ----------

From 95bfc6940b7b82ad946b384f7448b60194aecf8d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 19 Sep 2023 06:41:08 +0000
Subject: [PATCH 40/48] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 openml/setups/functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openml/setups/functions.py b/openml/setups/functions.py
index e26770a03..410a1e964 100644
--- a/openml/setups/functions.py
+++ b/openml/setups/functions.py
@@ -288,7 +288,7 @@ def initialize_model(setup_id: int) -> Any:
 
 
 def _to_dict(flow_id: int, openml_parameter_settings):
-    """Convert a flow ID and a list of OpenML parameter settings to 
+    """Convert a flow ID and a list of OpenML parameter settings to
     a dictionary representation that can be serialized to XML.
 
     Parameters

From c0eedfaaa33758d502df99def9ed53ad36acc608 Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Tue, 19 Sep 2023 12:12:52 +0530
Subject: [PATCH 41/48] Update openml/study/functions.py

---
 openml/study/functions.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/openml/study/functions.py b/openml/study/functions.py
index f3d19218e..b10792289 100644
--- a/openml/study/functions.py
+++ b/openml/study/functions.py
@@ -605,19 +605,22 @@ def _list_studies(output_format="dict", **kwargs) -> Union[Dict, pd.DataFrame]:
 
 
 def __list_studies(api_call, output_format="object") -> Union[Dict, pd.DataFrame]:
-    """Retrieves the list of OpenML studies and returns it in a dictionary or a Pandas DataFrame.
+    """Retrieves the list of OpenML studies and 
+    returns it in a dictionary or a Pandas DataFrame.
 
     Parameters
     ----------
     api_call : str
         The API call for retrieving the list of OpenML studies.
     output_format : str in {"object", "dataframe"}
-        Format of the output, either 'object' for a dictionary or 'dataframe' for a Pandas DataFrame.
+        Format of the output, either 'object' for a dictionary
+        or 'dataframe' for a Pandas DataFrame.
 
     Returns
     -------
     Union[Dict, pd.DataFrame]
-        A dictionary or Pandas DataFrame of OpenML studies, depending on the value of 'output_format'.
+        A dictionary or Pandas DataFrame of OpenML studies, 
+        depending on the value of 'output_format'.
     """
     xml_string = openml._api_calls._perform_api_call(api_call, "get")
     study_dict = xmltodict.parse(xml_string, force_list=("oml:study",))

From af2eac7ce7b1ee29faa048b7607e411ac53caf17 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 19 Sep 2023 06:43:08 +0000
Subject: [PATCH 42/48] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 openml/study/functions.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/openml/study/functions.py b/openml/study/functions.py
index b10792289..05d100ccd 100644
--- a/openml/study/functions.py
+++ b/openml/study/functions.py
@@ -605,7 +605,7 @@ def _list_studies(output_format="dict", **kwargs) -> Union[Dict, pd.DataFrame]:
 
 
 def __list_studies(api_call, output_format="object") -> Union[Dict, pd.DataFrame]:
-    """Retrieves the list of OpenML studies and 
+    """Retrieves the list of OpenML studies and
     returns it in a dictionary or a Pandas DataFrame.
 
     Parameters
@@ -619,7 +619,7 @@ def __list_studies(api_call, output_format="object") -> Union[Dict, pd.DataFrame
     Returns
     -------
     Union[Dict, pd.DataFrame]
-        A dictionary or Pandas DataFrame of OpenML studies, 
+        A dictionary or Pandas DataFrame of OpenML studies,
         depending on the value of 'output_format'.
     """
     xml_string = openml._api_calls._perform_api_call(api_call, "get")

From ae8581fda39ba55e66cadc44aca22fea4706bba4 Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Tue, 19 Sep 2023 12:15:26 +0530
Subject: [PATCH 43/48] Update openml/tasks/functions.py

---
 openml/tasks/functions.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
index d54bc4b42..41d8d0197 100644
--- a/openml/tasks/functions.py
+++ b/openml/tasks/functions.py
@@ -247,8 +247,8 @@ def __list_tasks(api_call, output_format="dict"):
     Raises
     ------
     ValueError
-        If the XML returned by the OpenML API does not contain 'oml:tasks', '@xmlns:oml', or has an incorrect value for
-        '@xmlns:oml'.
+        If the XML returned by the OpenML API does not contain 'oml:tasks', '@xmlns:oml',
+        or has an incorrect value for '@xmlns:oml'.
     KeyError
         If an invalid key is found in the XML for a task.
     """

From e3c475a1c10ce68fdb5ebe86de539ed3ec74a7f2 Mon Sep 17 00:00:00 2001
From: Lennart Purucker <contact@lennart-purucker.com>
Date: Mon, 30 Oct 2023 14:45:50 -0700
Subject: [PATCH 44/48] Update trace.py to fix mypy error

---
 openml/runs/trace.py | 160 ++++++++++++++++++++++---------------------
 1 file changed, 82 insertions(+), 78 deletions(-)

diff --git a/openml/runs/trace.py b/openml/runs/trace.py
index 1f3808255..1b2057c9f 100644
--- a/openml/runs/trace.py
+++ b/openml/runs/trace.py
@@ -4,7 +4,7 @@
 from dataclasses import dataclass
 import json
 import os
-from typing import List, Tuple, Optional  # noqa F401
+from typing import List, Tuple, Optional, Dict, Union  # noqa F401
 
 import arff
 import xmltodict
@@ -19,6 +19,82 @@
 ]
 
 
+@dataclass
+class OpenMLTraceIteration:
+    """
+    OpenML Trace Iteration: parsed output from Run Trace call
+    Exactly one of `setup_string` or `parameters` must be provided.
+
+    Parameters
+    ----------
+    repeat : int
+        repeat number (in case of no repeats: 0)
+
+    fold : int
+        fold number (in case of no folds: 0)
+
+    iteration : int
+        iteration number of optimization procedure
+
+    setup_string : str, optional
+        json string representing the parameters
+        If not provided, ``parameters`` should be set.
+
+    evaluation : double
+        The evaluation that was awarded to this trace iteration.
+        Measure is defined by the task
+
+    selected : bool
+        Whether this was the best of all iterations, and hence
+        selected for making predictions. Per fold/repeat there
+        should be only one iteration selected
+
+    parameters : OrderedDict, optional
+        Dictionary specifying parameter names and their values.
+        If not provided, ``setup_string`` should be set.
+    """
+
+    repeat: int
+    fold: int
+    iteration: int
+
+    evaluation: float
+    selected: bool
+
+    setup_string: Optional[str] = None
+    parameters: Optional[OrderedDict] = None
+
+    def __post_init__(self):
+        # TODO: refactor into one argument of type <str | OrderedDict>
+        if self.setup_string and self.parameters:
+            raise ValueError(
+                "Can only be instantiated with either `setup_string` or `parameters` argument."
+            )
+        elif not (self.setup_string or self.parameters):
+            raise ValueError(
+                "Either `setup_string` or `parameters` needs to be passed as argument."
+            )
+        if self.parameters is not None and not isinstance(self.parameters, OrderedDict):
+            raise TypeError(
+                "argument parameters is not an instance of OrderedDict, but %s"
+                % str(type(self.parameters))
+            )
+
+    def get_parameters(self):
+        result = {}
+        # parameters have prefix 'parameter_'
+
+        if self.setup_string:
+            for param in self.setup_string:
+                key = param[len(PREFIX) :]
+                value = self.setup_string[param]
+                result[key] = json.loads(value)
+        else:
+            for param, value in self.parameters.items():
+                result[param[len(PREFIX) :]] = value
+        return result
+
+
 class OpenMLRunTrace(object):
     """OpenML Run Trace: parsed output from Run Trace call
 
@@ -33,7 +109,11 @@ class OpenMLRunTrace(object):
 
     """
 
-    def __init__(self, run_id: int, trace_iterations: List[List]):
+    def __init__(
+        self,
+        run_id: Union[int, None],
+        trace_iterations: Dict[Tuple[int, int, int], OpenMLTraceIteration],
+    ):
         """Object to hold the trace content of a run.
 
         Parameters
@@ -431,79 +511,3 @@ def __repr__(self):
     def __iter__(self):
         for val in self.trace_iterations.values():
             yield val
-
-
-@dataclass
-class OpenMLTraceIteration:
-    """
-    OpenML Trace Iteration: parsed output from Run Trace call
-    Exactly one of `setup_string` or `parameters` must be provided.
-
-    Parameters
-    ----------
-    repeat : int
-        repeat number (in case of no repeats: 0)
-
-    fold : int
-        fold number (in case of no folds: 0)
-
-    iteration : int
-        iteration number of optimization procedure
-
-    setup_string : str, optional
-        json string representing the parameters
-        If not provided, ``parameters`` should be set.
-
-    evaluation : double
-        The evaluation that was awarded to this trace iteration.
-        Measure is defined by the task
-
-    selected : bool
-        Whether this was the best of all iterations, and hence
-        selected for making predictions. Per fold/repeat there
-        should be only one iteration selected
-
-    parameters : OrderedDict, optional
-        Dictionary specifying parameter names and their values.
-        If not provided, ``setup_string`` should be set.
-    """
-
-    repeat: int
-    fold: int
-    iteration: int
-
-    evaluation: float
-    selected: bool
-
-    setup_string: Optional[str] = None
-    parameters: Optional[OrderedDict] = None
-
-    def __post_init__(self):
-        # TODO: refactor into one argument of type <str | OrderedDict>
-        if self.setup_string and self.parameters:
-            raise ValueError(
-                "Can only be instantiated with either `setup_string` or `parameters` argument."
-            )
-        elif not (self.setup_string or self.parameters):
-            raise ValueError(
-                "Either `setup_string` or `parameters` needs to be passed as argument."
-            )
-        if self.parameters is not None and not isinstance(self.parameters, OrderedDict):
-            raise TypeError(
-                "argument parameters is not an instance of OrderedDict, but %s"
-                % str(type(self.parameters))
-            )
-
-    def get_parameters(self):
-        result = {}
-        # parameters have prefix 'parameter_'
-
-        if self.setup_string:
-            for param in self.setup_string:
-                key = param[len(PREFIX) :]
-                value = self.setup_string[param]
-                result[key] = json.loads(value)
-        else:
-            for param, value in self.parameters.items():
-                result[param[len(PREFIX) :]] = value
-        return result

From 51798e0de3b48b17b24367b9da16316efdd07429 Mon Sep 17 00:00:00 2001
From: Lennart Purucker <contact@lennart-purucker.com>
Date: Mon, 30 Oct 2023 14:46:23 -0700
Subject: [PATCH 45/48] Update functions.py to fix mypy error

---
 openml/flows/functions.py | 701 +++++++++++++-------------------------
 1 file changed, 246 insertions(+), 455 deletions(-)

diff --git a/openml/flows/functions.py b/openml/flows/functions.py
index 45eea42dc..bc6d21aaa 100644
--- a/openml/flows/functions.py
+++ b/openml/flows/functions.py
@@ -1,586 +1,377 @@
 # License: BSD 3-Clause
 import warnings
-
-import dateutil.parser
 from collections import OrderedDict
-import os
 import io
-import re
+import os
+from typing import Any, Union, List, Dict, Optional
+
 import xmltodict
 import pandas as pd
-from typing import Any, Union, Dict, Optional, List
 
-from ..exceptions import OpenMLCacheException
-import openml._api_calls
-from . import OpenMLFlow
+import openml
+from .. import config
+from .setup import OpenMLSetup, OpenMLParameter
+from openml.flows import flow_exists
+import openml.exceptions
 import openml.utils
 
 
-FLOWS_CACHE_DIR_NAME = "flows"
-
-
-def _get_cached_flows() -> OrderedDict:
-    """Return all the cached flows.
-
-    Returns
-    -------
-    flows : OrderedDict
-        Dictionary with flows. Each flow is an instance of OpenMLFlow.
+def setup_exists(flow) -> int:
     """
-    flows = OrderedDict()  # type: 'OrderedDict[int, OpenMLFlow]'
-
-    flow_cache_dir = openml.utils._create_cache_directory(FLOWS_CACHE_DIR_NAME)
-    directory_content = os.listdir(flow_cache_dir)
-    directory_content.sort()
-    # Find all flow ids for which we have downloaded
-    # the flow description
-
-    for filename in directory_content:
-        if not re.match(r"[0-9]*", filename):
-            continue
-
-        fid = int(filename)
-        flows[fid] = _get_cached_flow(fid)
-
-    return flows
-
-
-def _get_cached_flow(fid: int) -> OpenMLFlow:
-    """Get the cached flow with the given id.
+    Checks whether a hyperparameter configuration already exists on the server.
 
     Parameters
     ----------
-    fid : int
-        Flow id.
+    flow : flow
+        The openml flow object. Should have flow id present for the main flow
+        and all subflows (i.e., it should be downloaded from the server by
+        means of flow.get, and not instantiated locally)
 
     Returns
     -------
-    OpenMLFlow.
+    setup_id : int
+        setup id iff exists, False otherwise
     """
+    # sadly, this api call relies on a run object
+    openml.flows.functions._check_flow_for_server_id(flow)
+    if flow.model is None:
+        raise ValueError("Flow should have model field set with the actual model.")
+    if flow.extension is None:
+        raise ValueError("Flow should have model field set with the correct extension.")
+
+    # checks whether the flow exists on the server and flow ids align
+    exists = flow_exists(flow.name, flow.external_version)
+    if exists != flow.flow_id:
+        raise ValueError(
+            f"Local flow id ({flow.id}) differs from server id ({exists}). "
+            "If this issue persists, please contact the developers."
+        )
 
-    fid_cache_dir = openml.utils._create_cache_directory_for_id(FLOWS_CACHE_DIR_NAME, fid)
-    flow_file = os.path.join(fid_cache_dir, "flow.xml")
-
-    try:
-        with io.open(flow_file, encoding="utf8") as fh:
-            return _create_flow_from_xml(fh.read())
-    except (OSError, IOError):
-        openml.utils._remove_cache_dir_for_id(FLOWS_CACHE_DIR_NAME, fid_cache_dir)
-        raise OpenMLCacheException("Flow file for fid %d not " "cached" % fid)
+    openml_param_settings = flow.extension.obtain_parameter_values(flow)
+    description = xmltodict.unparse(_to_dict(flow.flow_id, openml_param_settings), pretty=True)
+    file_elements = {
+        "description": ("description.arff", description)
+    }  # type: openml._api_calls.FILE_ELEMENTS_TYPE
+    result = openml._api_calls._perform_api_call(
+        "/setup/exists/", "post", file_elements=file_elements
+    )
+    result_dict = xmltodict.parse(result)
+    setup_id = int(result_dict["oml:setup_exists"]["oml:id"])
+    return setup_id if setup_id > 0 else False
 
 
-@openml.utils.thread_safe_if_oslo_installed
-def get_flow(flow_id: int, reinstantiate: bool = False, strict_version: bool = True) -> OpenMLFlow:
-    """Download the OpenML flow for a given flow ID.
+def _get_cached_setup(setup_id: int):
+    """Load a run from the cache.
 
     Parameters
     ----------
-    flow_id : int
-        The OpenML flow id.
-
-    reinstantiate: bool
-        Whether to reinstantiate the flow to a model instance.
-
-    strict_version : bool, default=True
-        Whether to fail if version requirements are not fulfilled.
+    setup_id : int
+        ID of the setup to be loaded.
 
     Returns
     -------
-    flow : OpenMLFlow
-        the flow
-    """
-    flow_id = int(flow_id)
-    flow = _get_flow_description(flow_id)
+    OpenMLSetup
+        The loaded setup object.
 
-    if reinstantiate:
-        flow.model = flow.extension.flow_to_model(flow, strict_version=strict_version)
-        if not strict_version:
-            # check if we need to return a new flow b/c of version mismatch
-            new_flow = flow.extension.model_to_flow(flow.model)
-            if new_flow.dependencies != flow.dependencies:
-                return new_flow
-    return flow
+    Raises
+    ------
+    OpenMLCacheException
+        If the setup file for the given setup ID is not cached.
+    """
+    cache_dir = config.get_cache_directory()
+    setup_cache_dir = os.path.join(cache_dir, "setups", str(setup_id))
+    try:
+        setup_file = os.path.join(setup_cache_dir, "description.xml")
+        with io.open(setup_file, encoding="utf8") as fh:
+            setup_xml = xmltodict.parse(fh.read())
+            setup = _create_setup_from_xml(setup_xml, output_format="object")
+        return setup
 
+    except (OSError, IOError):
+        raise openml.exceptions.OpenMLCacheException(
+            "Setup file for setup id %d not cached" % setup_id
+        )
 
-def _get_flow_description(flow_id: int) -> OpenMLFlow:
-    """Get the Flow for a given  ID.
 
-    Does the real work for get_flow. It returns a cached flow
-    instance if the flow exists locally, otherwise it downloads the
-    flow and returns an instance created from the xml representation.
+def get_setup(setup_id):
+    """
+     Downloads the setup (configuration) description from OpenML
+     and returns a structured object
 
     Parameters
     ----------
-    flow_id : int
-        The OpenML flow id.
+    setup_id : int
+        The Openml setup_id
 
     Returns
     -------
-    OpenMLFlow
+    dict or OpenMLSetup(an initialized openml setup object)
     """
-    try:
-        return _get_cached_flow(flow_id)
-    except OpenMLCacheException:
-        xml_file = os.path.join(
-            openml.utils._create_cache_directory_for_id(FLOWS_CACHE_DIR_NAME, flow_id),
-            "flow.xml",
-        )
+    setup_dir = os.path.join(config.get_cache_directory(), "setups", str(setup_id))
+    setup_file = os.path.join(setup_dir, "description.xml")
+
+    if not os.path.exists(setup_dir):
+        os.makedirs(setup_dir)
 
-        flow_xml = openml._api_calls._perform_api_call("flow/%d" % flow_id, request_method="get")
-        with io.open(xml_file, "w", encoding="utf8") as fh:
-            fh.write(flow_xml)
+    try:
+        return _get_cached_setup(setup_id)
+    except openml.exceptions.OpenMLCacheException:
+        url_suffix = "/setup/%d" % setup_id
+        setup_xml = openml._api_calls._perform_api_call(url_suffix, "get")
+        with io.open(setup_file, "w", encoding="utf8") as fh:
+            fh.write(setup_xml)
 
-        return _create_flow_from_xml(flow_xml)
+    result_dict = xmltodict.parse(setup_xml)
+    return _create_setup_from_xml(result_dict, output_format="object")
 
 
-def list_flows(
+def list_setups(
     offset: Optional[int] = None,
     size: Optional[int] = None,
+    flow: Optional[int] = None,
     tag: Optional[str] = None,
-    output_format: str = "dict",
-    **kwargs
+    setup: Optional[List] = None,
+    output_format: str = "object",
 ) -> Union[Dict, pd.DataFrame]:
     """
-    Return a list of all flows which are on OpenML.
-    (Supports large amount of results)
+    List all setups matching all of the given filters.
 
     Parameters
     ----------
     offset : int, optional
-        the number of flows to skip, starting from the first
     size : int, optional
-        the maximum number of flows to return
+    flow : int, optional
     tag : str, optional
-        the tag to include
-    output_format: str, optional (default='dict')
+    setup : list(int), optional
+    output_format: str, optional (default='object')
         The parameter decides the format of the output.
+        - If 'object' the output is a dict of OpenMLSetup objects
         - If 'dict' the output is a dict of dict
         - If 'dataframe' the output is a pandas DataFrame
-    kwargs: dict, optional
-        Legal filter operators: uploader.
 
     Returns
     -------
-    flows : dict of dicts, or dataframe
-        - If output_format='dict'
-            A mapping from flow_id to a dict giving a brief overview of the
-            respective flow.
-            Every flow is represented by a dictionary containing
-            the following information:
-            - flow id
-            - full name
-            - name
-            - version
-            - external version
-            - uploader
-
-        - If output_format='dataframe'
-            Each row maps to a dataset
-            Each column contains the following information:
-            - flow id
-            - full name
-            - name
-            - version
-            - external version
-            - uploader
+    dict or dataframe
     """
-    if output_format not in ["dataframe", "dict"]:
+    if output_format not in ["dataframe", "dict", "object"]:
         raise ValueError(
-            "Invalid output format selected. " "Only 'dict' or 'dataframe' applicable."
+            "Invalid output format selected. " "Only 'dict', 'object', or 'dataframe' applicable."
         )
 
     # TODO: [0.15]
     if output_format == "dict":
         msg = (
-            "Support for `output_format` of 'dict' will be removed in 0.15 "
-            "and pandas dataframes will be returned instead. To ensure your code "
-            "will continue to work, use `output_format`='dataframe'."
+            "Support for `output_format` of 'dict' will be removed in 0.15. "
+            "To ensure your code will continue to work, "
+            "use `output_format`='dataframe' or `output_format`='object'."
         )
         warnings.warn(msg, category=FutureWarning, stacklevel=2)
 
+    batch_size = 1000  # batch size for setups is lower
     return openml.utils._list_all(
         output_format=output_format,
-        listing_call=_list_flows,
+        listing_call=_list_setups,
         offset=offset,
         size=size,
+        flow=flow,
         tag=tag,
-        **kwargs
+        setup=setup,
+        batch_size=batch_size,
     )
 
 
-def _list_flows(output_format="dict", **kwargs) -> Union[Dict, pd.DataFrame]:
+def _list_setups(setup=None, output_format="object", **kwargs):
     """
-    Perform the api call that return a list of all flows.
+    Perform API call `/setup/list/{filters}`
 
     Parameters
     ----------
+    The setup argument that is a list is separated from the single value
+    filters which are put into the kwargs.
+
+    setup : list(int), optional
+
     output_format: str, optional (default='dict')
         The parameter decides the format of the output.
         - If 'dict' the output is a dict of dict
         - If 'dataframe' the output is a pandas DataFrame
 
     kwargs: dict, optional
-        Legal filter operators: uploader, tag, limit, offset.
+        Legal filter operators: flow, setup, limit, offset, tag.
 
     Returns
     -------
-    flows : dict, or dataframe
+    dict or dataframe
     """
-    api_call = "flow/list"
 
+    api_call = "setup/list"
+    if setup is not None:
+        api_call += "/setup/%s" % ",".join([str(int(i)) for i in setup])
     if kwargs is not None:
         for operator, value in kwargs.items():
             api_call += "/%s/%s" % (operator, value)
 
-    return __list_flows(api_call=api_call, output_format=output_format)
+    return __list_setups(api_call=api_call, output_format=output_format)
 
 
-def flow_exists(name: str, external_version: str) -> Union[int, bool]:
-    """Retrieves the flow id.
-
-    A flow is uniquely identified by name + external_version.
-
-    Parameters
-    ----------
-    name : string
-        Name of the flow
-    external_version : string
-        Version information associated with flow.
+def __list_setups(api_call, output_format="object"):
+    """Helper function to parse API calls which are lists of setups"""
+    xml_string = openml._api_calls._perform_api_call(api_call, "get")
+    setups_dict = xmltodict.parse(xml_string, force_list=("oml:setup",))
+    openml_uri = "http://openml.org/openml"
+    # Minimalistic check if the XML is useful
+    if "oml:setups" not in setups_dict:
+        raise ValueError(
+            'Error in return XML, does not contain "oml:setups":' " %s" % str(setups_dict)
+        )
+    elif "@xmlns:oml" not in setups_dict["oml:setups"]:
+        raise ValueError(
+            "Error in return XML, does not contain "
+            '"oml:setups"/@xmlns:oml: %s' % str(setups_dict)
+        )
+    elif setups_dict["oml:setups"]["@xmlns:oml"] != openml_uri:
+        raise ValueError(
+            "Error in return XML, value of  "
+            '"oml:seyups"/@xmlns:oml is not '
+            '"%s": %s' % (openml_uri, str(setups_dict))
+        )
 
-    Returns
-    -------
-    flow_exist : int or bool
-        flow id iff exists, False otherwise
+    assert isinstance(setups_dict["oml:setups"]["oml:setup"], list), type(setups_dict["oml:setups"])
 
-    Notes
-    -----
-    see https://www.openml.org/api_docs/#!/flow/get_flow_exists_name_version
-    """
-    if not (isinstance(name, str) and len(name) > 0):
-        raise ValueError("Argument 'name' should be a non-empty string")
-    if not (isinstance(name, str) and len(external_version) > 0):
-        raise ValueError("Argument 'version' should be a non-empty string")
-
-    xml_response = openml._api_calls._perform_api_call(
-        "flow/exists",
-        "post",
-        data={"name": name, "external_version": external_version},
-    )
-
-    result_dict = xmltodict.parse(xml_response)
-    flow_id = int(result_dict["oml:flow_exists"]["oml:id"])
-    return flow_id if flow_id > 0 else False
+    setups = dict()
+    for setup_ in setups_dict["oml:setups"]["oml:setup"]:
+        # making it a dict to give it the right format
+        current = _create_setup_from_xml(
+            {"oml:setup_parameters": setup_}, output_format=output_format
+        )
+        if output_format == "object":
+            setups[current.setup_id] = current
+        else:
+            setups[current["setup_id"]] = current
 
+    if output_format == "dataframe":
+        setups = pd.DataFrame.from_dict(setups, orient="index")
 
-def get_flow_id(
-    model: Optional[Any] = None,
-    name: Optional[str] = None,
-    exact_version=True,
-) -> Union[int, bool, List[int]]:
-    """Retrieves the flow id for a model or a flow name.
+    return setups
 
-    Provide either a model or a name to this function. Depending on the input, it does
 
-    * ``model`` and ``exact_version == True``: This helper function first queries for the necessary
-      extension. Second, it uses that extension to convert the model into a flow. Third, it
-      executes ``flow_exists`` to potentially obtain the flow id the flow is published to the
-      server.
-    * ``model`` and ``exact_version == False``: This helper function first queries for the
-      necessary extension. Second, it uses that extension to convert the model into a flow. Third
-      it calls ``list_flows`` and filters the returned values based on the flow name.
-    * ``name``: Ignores ``exact_version`` and calls ``list_flows``, then filters the returned
-      values based on the flow name.
+def initialize_model(setup_id: int) -> Any:
+    """
+    Initialized a model based on a setup_id (i.e., using the exact
+    same parameter settings)
 
     Parameters
     ----------
-    model : object
-        Any model. Must provide either ``model`` or ``name``.
-    name : str
-        Name of the flow. Must provide either ``model`` or ``name``.
-    exact_version : bool
-        Whether to return the flow id of the exact version or all flow ids where the name
-        of the flow matches. This is only taken into account for a model where a version number
-        is available.
+    setup_id : int
+        The Openml setup_id
 
     Returns
     -------
-    int or bool, List
-        flow id iff exists, ``False`` otherwise, List if ``exact_version is False``
+    model
     """
-    if model is None and name is None:
-        raise ValueError(
-            "Need to provide either argument `model` or argument `name`, but both are `None`."
-        )
-    elif model is not None and name is not None:
-        raise ValueError("Must provide either argument `model` or argument `name`, but not both.")
-
-    if model is not None:
-        extension = openml.extensions.get_extension_by_model(model, raise_if_no_extension=True)
-        if extension is None:
-            # This should never happen and is only here to please mypy will be gone soon once the
-            # whole function is removed
-            raise TypeError(extension)
-        flow = extension.model_to_flow(model)
-        flow_name = flow.name
-        external_version = flow.external_version
-    else:
-        flow_name = name
-        exact_version = False
+    setup = get_setup(setup_id)
+    flow = openml.flows.get_flow(setup.flow_id)
+
+    # instead of using scikit-learns or any other library's "set_params" function, we override the
+    # OpenMLFlow objects default parameter value so we can utilize the
+    # Extension.flow_to_model() function to reinitialize the flow with the set defaults.
+    for hyperparameter in setup.parameters.values():
+        structure = flow.get_structure("flow_id")
+        if len(structure[hyperparameter.flow_id]) > 0:
+            subflow = flow.get_subflow(structure[hyperparameter.flow_id])
+        else:
+            subflow = flow
+        subflow.parameters[hyperparameter.parameter_name] = hyperparameter.value
 
-    if exact_version:
-        return flow_exists(name=flow_name, external_version=external_version)
-    else:
-        flows = list_flows(output_format="dataframe")
-        assert isinstance(flows, pd.DataFrame)  # Make mypy happy
-        flows = flows.query('name == "{}"'.format(flow_name))
-        return flows["id"].to_list()
+    model = flow.extension.flow_to_model(flow)
+    return model
 
 
-def __list_flows(api_call: str, output_format: str = "dict") -> Union[Dict, pd.DataFrame]:
-    """Retrieve information about flows from OpenML API
-    and parse it to a dictionary or a Pandas DataFrame.
+def _to_dict(flow_id: int, openml_parameter_settings) -> OrderedDict:
+    """Convert a flow ID and a list of OpenML parameter settings to
+    a dictionary representation that can be serialized to XML.
 
     Parameters
     ----------
-    api_call: str
-        Retrieves the information about flows.
-    output_format: str in {"dict", "dataframe"}
-        The output format.
-    Returns
+    flow_id : int
+        ID of the flow.
+    openml_parameter_settings : List[OpenMLParameter]
+        A list of OpenML parameter settings.
 
+    Returns
     -------
-        The flows information in the specified output format.
+    OrderedDict
+        A dictionary representation of the flow ID and parameter settings.
     """
-    xml_string = openml._api_calls._perform_api_call(api_call, "get")
-    flows_dict = xmltodict.parse(xml_string, force_list=("oml:flow",))
-
-    # Minimalistic check if the XML is useful
-    assert isinstance(flows_dict["oml:flows"]["oml:flow"], list), type(flows_dict["oml:flows"])
-    assert flows_dict["oml:flows"]["@xmlns:oml"] == "http://openml.org/openml", flows_dict[
-        "oml:flows"
-    ]["@xmlns:oml"]
-
-    flows = dict()
-    for flow_ in flows_dict["oml:flows"]["oml:flow"]:
-        fid = int(flow_["oml:id"])
-        flow = {
-            "id": fid,
-            "full_name": flow_["oml:full_name"],
-            "name": flow_["oml:name"],
-            "version": flow_["oml:version"],
-            "external_version": flow_["oml:external_version"],
-            "uploader": flow_["oml:uploader"],
-        }
-        flows[fid] = flow
-
-    if output_format == "dataframe":
-        flows = pd.DataFrame.from_dict(flows, orient="index")
-
-    return flows
-
-
-def _check_flow_for_server_id(flow: OpenMLFlow) -> None:
-    """Raises a ValueError if the flow or any of its subflows has no flow id."""
-
-    # Depth-first search to check if all components were uploaded to the
-    # server before parsing the parameters
-    stack = list()
-    stack.append(flow)
-    while len(stack) > 0:
-        current = stack.pop()
-        if current.flow_id is None:
-            raise ValueError("Flow %s has no flow_id!" % current.name)
-        else:
-            for component in current.components.values():
-                stack.append(component)
-
-
-def assert_flows_equal(
-    flow1: OpenMLFlow,
-    flow2: OpenMLFlow,
-    ignore_parameter_values_on_older_children: Optional[str] = None,
-    ignore_parameter_values: bool = False,
-    ignore_custom_name_if_none: bool = False,
-    check_description: bool = True,
-) -> None:
-    """Check equality of two flows.
-
-    Two flows are equal if their all keys which are not set by the server
-    are equal, as well as all their parameters and components.
+    # for convenience, this function (ab)uses the run object.
+    xml: OrderedDict = OrderedDict()
+    xml["oml:run"] = OrderedDict()
+    xml["oml:run"]["@xmlns:oml"] = "http://openml.org/openml"
+    xml["oml:run"]["oml:flow_id"] = flow_id
+    xml["oml:run"]["oml:parameter_setting"] = openml_parameter_settings
 
-    Parameters
-    ----------
-    flow1 : OpenMLFlow
-
-    flow2 : OpenMLFlow
-
-    ignore_parameter_values_on_older_children : str (optional)
-        If set to ``OpenMLFlow.upload_date``, ignores parameters in a child
-        flow if it's upload date predates the upload date of the parent flow.
+    return xml
 
-    ignore_parameter_values : bool
-        Whether to ignore parameter values when comparing flows.
 
-    ignore_custom_name_if_none : bool
-        Whether to ignore the custom name field if either flow has `custom_name` equal to `None`.
-
-    check_description : bool
-        Whether to ignore matching of flow descriptions.
+def _create_setup_from_xml(result_dict, output_format="object"):
+    """
+    Turns an API xml result into a OpenMLSetup object (or dict)
     """
-    if not isinstance(flow1, OpenMLFlow):
-        raise TypeError("Argument 1 must be of type OpenMLFlow, but is %s" % type(flow1))
-
-    if not isinstance(flow2, OpenMLFlow):
-        raise TypeError("Argument 2 must be of type OpenMLFlow, but is %s" % type(flow2))
-
-    # TODO as they are actually now saved during publish, it might be good to
-    # check for the equality of these as well.
-    generated_by_the_server = [
-        "flow_id",
-        "uploader",
-        "version",
-        "upload_date",
-        # Tags aren't directly created by the server,
-        # but the uploader has no control over them!
-        "tags",
-    ]
-    ignored_by_python_api = ["binary_url", "binary_format", "binary_md5", "model", "_entity_id"]
-
-    for key in set(flow1.__dict__.keys()).union(flow2.__dict__.keys()):
-        if key in generated_by_the_server + ignored_by_python_api:
-            continue
-        attr1 = getattr(flow1, key, None)
-        attr2 = getattr(flow2, key, None)
-        if key == "components":
-            if not (isinstance(attr1, Dict) and isinstance(attr2, Dict)):
-                raise TypeError("Cannot compare components because they are not dictionary.")
-
-            for name in set(attr1.keys()).union(attr2.keys()):
-                if name not in attr1:
-                    raise ValueError(
-                        "Component %s only available in " "argument2, but not in argument1." % name
-                    )
-                if name not in attr2:
-                    raise ValueError(
-                        "Component %s only available in " "argument2, but not in argument1." % name
-                    )
-                assert_flows_equal(
-                    attr1[name],
-                    attr2[name],
-                    ignore_parameter_values_on_older_children,
-                    ignore_parameter_values,
-                    ignore_custom_name_if_none,
+    setup_id = int(result_dict["oml:setup_parameters"]["oml:setup_id"])
+    flow_id = int(result_dict["oml:setup_parameters"]["oml:flow_id"])
+    parameters = {}
+    if "oml:parameter" not in result_dict["oml:setup_parameters"]:
+        parameters = None
+    else:
+        # basically all others
+        xml_parameters = result_dict["oml:setup_parameters"]["oml:parameter"]
+        if isinstance(xml_parameters, dict):
+            id = int(xml_parameters["oml:id"])
+            parameters[id] = _create_setup_parameter_from_xml(
+                result_dict=xml_parameters, output_format=output_format
+            )
+        elif isinstance(xml_parameters, list):
+            for xml_parameter in xml_parameters:
+                id = int(xml_parameter["oml:id"])
+                parameters[id] = _create_setup_parameter_from_xml(
+                    result_dict=xml_parameter, output_format=output_format
                 )
-        elif key == "_extension":
-            continue
-        elif check_description and key == "description":
-            # to ignore matching of descriptions since sklearn based flows may have
-            # altering docstrings and is not guaranteed to be consistent
-            continue
         else:
-            if key == "parameters":
-                if ignore_parameter_values or ignore_parameter_values_on_older_children:
-                    params_flow_1 = set(flow1.parameters.keys())
-                    params_flow_2 = set(flow2.parameters.keys())
-                    symmetric_difference = params_flow_1 ^ params_flow_2
-                    if len(symmetric_difference) > 0:
-                        raise ValueError(
-                            "Flow %s: parameter set of flow "
-                            "differs from the parameters stored "
-                            "on the server." % flow1.name
-                        )
-
-                if ignore_parameter_values_on_older_children:
-                    upload_date_current_flow = dateutil.parser.parse(flow1.upload_date)
-                    upload_date_parent_flow = dateutil.parser.parse(
-                        ignore_parameter_values_on_older_children
-                    )
-                    if upload_date_current_flow < upload_date_parent_flow:
-                        continue
-
-                if ignore_parameter_values:
-                    # Continue needs to be done here as the first if
-                    # statement triggers in both special cases
-                    continue
-            elif (
-                key == "custom_name"
-                and ignore_custom_name_if_none
-                and (attr1 is None or attr2 is None)
-            ):
-                # If specified, we allow `custom_name` inequality if one flow's name is None.
-                # Helps with backwards compatibility as `custom_name` is now auto-generated, but
-                # before it used to be `None`.
-                continue
-            elif key == "parameters_meta_info":
-                # this value is a dictionary where each key is a parameter name, containing another
-                # dictionary with keys specifying the parameter's 'description' and 'data_type'
-                # checking parameter descriptions can be ignored since that might change
-                # data type check can also be ignored if one of them is not defined, i.e., None
-                params1 = set(flow1.parameters_meta_info)
-                params2 = set(flow2.parameters_meta_info)
-                if params1 != params2:
-                    raise ValueError(
-                        "Parameter list in meta info for parameters differ " "in the two flows."
-                    )
-                # iterating over the parameter's meta info list
-                for param in params1:
-                    if (
-                        isinstance(flow1.parameters_meta_info[param], Dict)
-                        and isinstance(flow2.parameters_meta_info[param], Dict)
-                        and "data_type" in flow1.parameters_meta_info[param]
-                        and "data_type" in flow2.parameters_meta_info[param]
-                    ):
-                        value1 = flow1.parameters_meta_info[param]["data_type"]
-                        value2 = flow2.parameters_meta_info[param]["data_type"]
-                    else:
-                        value1 = flow1.parameters_meta_info[param]
-                        value2 = flow2.parameters_meta_info[param]
-                    if value1 is None or value2 is None:
-                        continue
-                    elif value1 != value2:
-                        raise ValueError(
-                            "Flow {}: data type for parameter {} in {} differ "
-                            "as {}\nvs\n{}".format(flow1.name, param, key, value1, value2)
-                        )
-                # the continue is to avoid the 'attr != attr2' check at end of function
-                continue
-
-            if attr1 != attr2:
-                raise ValueError(
-                    "Flow %s: values for attribute '%s' differ: "
-                    "'%s'\nvs\n'%s'." % (str(flow1.name), str(key), str(attr1), str(attr2))
-                )
-
+            raise ValueError(
+                "Expected None, list or dict, received "
+                "something else: %s" % str(type(xml_parameters))
+            )
 
-def _create_flow_from_xml(flow_xml: str) -> OpenMLFlow:
-    """Create flow object from xml
+    if output_format in ["dataframe", "dict"]:
+        return_dict = {"setup_id": setup_id, "flow_id": flow_id}
+        return_dict["parameters"] = parameters
+        return return_dict
+    return OpenMLSetup(setup_id, flow_id, parameters)
 
-    Parameters
-    ----------
-    flow_xml: xml representation of a flow
 
-    Returns
-    -------
-    OpenMLFlow
+def _create_setup_parameter_from_xml(result_dict, output_format="object"):
     """
-
-    return OpenMLFlow._from_dict(xmltodict.parse(flow_xml))
-
-
-def delete_flow(flow_id: int) -> bool:
-    """Delete flow with id `flow_id` from the OpenML server.
-
-    You can only delete flows which you uploaded and which
-    which are not linked to runs.
-
-    Parameters
-    ----------
-    flow_id : int
-        OpenML id of the flow
-
-    Returns
-    -------
-    bool
-        True if the deletion was successful. False otherwise.
+    Create an OpenMLParameter object or a dictionary from an API xml result.
     """
-    return openml.utils._delete_entity("flow", flow_id)
+    if output_format == "object":
+        return OpenMLParameter(
+            input_id=int(result_dict["oml:id"]),
+            flow_id=int(result_dict["oml:flow_id"]),
+            flow_name=result_dict["oml:flow_name"],
+            full_name=result_dict["oml:full_name"],
+            parameter_name=result_dict["oml:parameter_name"],
+            data_type=result_dict["oml:data_type"],
+            default_value=result_dict["oml:default_value"],
+            value=result_dict["oml:value"],
+        )
+    else:
+        return {
+            "input_id": int(result_dict["oml:id"]),
+            "flow_id": int(result_dict["oml:flow_id"]),
+            "flow_name": result_dict["oml:flow_name"],
+            "full_name": result_dict["oml:full_name"],
+            "parameter_name": result_dict["oml:parameter_name"],
+            "data_type": result_dict["oml:data_type"],
+            "default_value": result_dict["oml:default_value"],
+            "value": result_dict["oml:value"],
+        }

From 3ab73e0b4da5d7cf57b73f242e27239c65dd67dc Mon Sep 17 00:00:00 2001
From: Lennart Purucker <contact@lennart-purucker.com>
Date: Mon, 30 Oct 2023 14:49:00 -0700
Subject: [PATCH 46/48] fix copy paste error

---
 openml/flows/functions.py | 701 +++++++++++++++++++++++++-------------
 1 file changed, 455 insertions(+), 246 deletions(-)

diff --git a/openml/flows/functions.py b/openml/flows/functions.py
index bc6d21aaa..45eea42dc 100644
--- a/openml/flows/functions.py
+++ b/openml/flows/functions.py
@@ -1,377 +1,586 @@
 # License: BSD 3-Clause
 import warnings
+
+import dateutil.parser
 from collections import OrderedDict
-import io
 import os
-from typing import Any, Union, List, Dict, Optional
-
+import io
+import re
 import xmltodict
 import pandas as pd
+from typing import Any, Union, Dict, Optional, List
 
-import openml
-from .. import config
-from .setup import OpenMLSetup, OpenMLParameter
-from openml.flows import flow_exists
-import openml.exceptions
+from ..exceptions import OpenMLCacheException
+import openml._api_calls
+from . import OpenMLFlow
 import openml.utils
 
 
-def setup_exists(flow) -> int:
-    """
-    Checks whether a hyperparameter configuration already exists on the server.
+FLOWS_CACHE_DIR_NAME = "flows"
 
-    Parameters
-    ----------
-    flow : flow
-        The openml flow object. Should have flow id present for the main flow
-        and all subflows (i.e., it should be downloaded from the server by
-        means of flow.get, and not instantiated locally)
+
+def _get_cached_flows() -> OrderedDict:
+    """Return all the cached flows.
 
     Returns
     -------
-    setup_id : int
-        setup id iff exists, False otherwise
+    flows : OrderedDict
+        Dictionary with flows. Each flow is an instance of OpenMLFlow.
     """
-    # sadly, this api call relies on a run object
-    openml.flows.functions._check_flow_for_server_id(flow)
-    if flow.model is None:
-        raise ValueError("Flow should have model field set with the actual model.")
-    if flow.extension is None:
-        raise ValueError("Flow should have model field set with the correct extension.")
-
-    # checks whether the flow exists on the server and flow ids align
-    exists = flow_exists(flow.name, flow.external_version)
-    if exists != flow.flow_id:
-        raise ValueError(
-            f"Local flow id ({flow.id}) differs from server id ({exists}). "
-            "If this issue persists, please contact the developers."
-        )
+    flows = OrderedDict()  # type: 'OrderedDict[int, OpenMLFlow]'
 
-    openml_param_settings = flow.extension.obtain_parameter_values(flow)
-    description = xmltodict.unparse(_to_dict(flow.flow_id, openml_param_settings), pretty=True)
-    file_elements = {
-        "description": ("description.arff", description)
-    }  # type: openml._api_calls.FILE_ELEMENTS_TYPE
-    result = openml._api_calls._perform_api_call(
-        "/setup/exists/", "post", file_elements=file_elements
-    )
-    result_dict = xmltodict.parse(result)
-    setup_id = int(result_dict["oml:setup_exists"]["oml:id"])
-    return setup_id if setup_id > 0 else False
+    flow_cache_dir = openml.utils._create_cache_directory(FLOWS_CACHE_DIR_NAME)
+    directory_content = os.listdir(flow_cache_dir)
+    directory_content.sort()
+    # Find all flow ids for which we have downloaded
+    # the flow description
+
+    for filename in directory_content:
+        if not re.match(r"[0-9]*", filename):
+            continue
+
+        fid = int(filename)
+        flows[fid] = _get_cached_flow(fid)
+
+    return flows
 
 
-def _get_cached_setup(setup_id: int):
-    """Load a run from the cache.
+def _get_cached_flow(fid: int) -> OpenMLFlow:
+    """Get the cached flow with the given id.
 
     Parameters
     ----------
-    setup_id : int
-        ID of the setup to be loaded.
+    fid : int
+        Flow id.
 
     Returns
     -------
-    OpenMLSetup
-        The loaded setup object.
-
-    Raises
-    ------
-    OpenMLCacheException
-        If the setup file for the given setup ID is not cached.
+    OpenMLFlow.
     """
-    cache_dir = config.get_cache_directory()
-    setup_cache_dir = os.path.join(cache_dir, "setups", str(setup_id))
-    try:
-        setup_file = os.path.join(setup_cache_dir, "description.xml")
-        with io.open(setup_file, encoding="utf8") as fh:
-            setup_xml = xmltodict.parse(fh.read())
-            setup = _create_setup_from_xml(setup_xml, output_format="object")
-        return setup
 
+    fid_cache_dir = openml.utils._create_cache_directory_for_id(FLOWS_CACHE_DIR_NAME, fid)
+    flow_file = os.path.join(fid_cache_dir, "flow.xml")
+
+    try:
+        with io.open(flow_file, encoding="utf8") as fh:
+            return _create_flow_from_xml(fh.read())
     except (OSError, IOError):
-        raise openml.exceptions.OpenMLCacheException(
-            "Setup file for setup id %d not cached" % setup_id
-        )
+        openml.utils._remove_cache_dir_for_id(FLOWS_CACHE_DIR_NAME, fid_cache_dir)
+        raise OpenMLCacheException("Flow file for fid %d not " "cached" % fid)
 
 
-def get_setup(setup_id):
-    """
-     Downloads the setup (configuration) description from OpenML
-     and returns a structured object
+@openml.utils.thread_safe_if_oslo_installed
+def get_flow(flow_id: int, reinstantiate: bool = False, strict_version: bool = True) -> OpenMLFlow:
+    """Download the OpenML flow for a given flow ID.
 
     Parameters
     ----------
-    setup_id : int
-        The Openml setup_id
+    flow_id : int
+        The OpenML flow id.
+
+    reinstantiate: bool
+        Whether to reinstantiate the flow to a model instance.
+
+    strict_version : bool, default=True
+        Whether to fail if version requirements are not fulfilled.
 
     Returns
     -------
-    dict or OpenMLSetup(an initialized openml setup object)
+    flow : OpenMLFlow
+        the flow
     """
-    setup_dir = os.path.join(config.get_cache_directory(), "setups", str(setup_id))
-    setup_file = os.path.join(setup_dir, "description.xml")
+    flow_id = int(flow_id)
+    flow = _get_flow_description(flow_id)
+
+    if reinstantiate:
+        flow.model = flow.extension.flow_to_model(flow, strict_version=strict_version)
+        if not strict_version:
+            # check if we need to return a new flow b/c of version mismatch
+            new_flow = flow.extension.model_to_flow(flow.model)
+            if new_flow.dependencies != flow.dependencies:
+                return new_flow
+    return flow
+
 
-    if not os.path.exists(setup_dir):
-        os.makedirs(setup_dir)
+def _get_flow_description(flow_id: int) -> OpenMLFlow:
+    """Get the Flow for a given  ID.
 
+    Does the real work for get_flow. It returns a cached flow
+    instance if the flow exists locally, otherwise it downloads the
+    flow and returns an instance created from the xml representation.
+
+    Parameters
+    ----------
+    flow_id : int
+        The OpenML flow id.
+
+    Returns
+    -------
+    OpenMLFlow
+    """
     try:
-        return _get_cached_setup(setup_id)
-    except openml.exceptions.OpenMLCacheException:
-        url_suffix = "/setup/%d" % setup_id
-        setup_xml = openml._api_calls._perform_api_call(url_suffix, "get")
-        with io.open(setup_file, "w", encoding="utf8") as fh:
-            fh.write(setup_xml)
+        return _get_cached_flow(flow_id)
+    except OpenMLCacheException:
+        xml_file = os.path.join(
+            openml.utils._create_cache_directory_for_id(FLOWS_CACHE_DIR_NAME, flow_id),
+            "flow.xml",
+        )
+
+        flow_xml = openml._api_calls._perform_api_call("flow/%d" % flow_id, request_method="get")
+        with io.open(xml_file, "w", encoding="utf8") as fh:
+            fh.write(flow_xml)
 
-    result_dict = xmltodict.parse(setup_xml)
-    return _create_setup_from_xml(result_dict, output_format="object")
+        return _create_flow_from_xml(flow_xml)
 
 
-def list_setups(
+def list_flows(
     offset: Optional[int] = None,
     size: Optional[int] = None,
-    flow: Optional[int] = None,
     tag: Optional[str] = None,
-    setup: Optional[List] = None,
-    output_format: str = "object",
+    output_format: str = "dict",
+    **kwargs
 ) -> Union[Dict, pd.DataFrame]:
     """
-    List all setups matching all of the given filters.
+    Return a list of all flows which are on OpenML.
+    (Supports large amount of results)
 
     Parameters
     ----------
     offset : int, optional
+        the number of flows to skip, starting from the first
     size : int, optional
-    flow : int, optional
+        the maximum number of flows to return
     tag : str, optional
-    setup : list(int), optional
-    output_format: str, optional (default='object')
+        the tag to include
+    output_format: str, optional (default='dict')
         The parameter decides the format of the output.
-        - If 'object' the output is a dict of OpenMLSetup objects
         - If 'dict' the output is a dict of dict
         - If 'dataframe' the output is a pandas DataFrame
+    kwargs: dict, optional
+        Legal filter operators: uploader.
 
     Returns
     -------
-    dict or dataframe
+    flows : dict of dicts, or dataframe
+        - If output_format='dict'
+            A mapping from flow_id to a dict giving a brief overview of the
+            respective flow.
+            Every flow is represented by a dictionary containing
+            the following information:
+            - flow id
+            - full name
+            - name
+            - version
+            - external version
+            - uploader
+
+        - If output_format='dataframe'
+            Each row maps to a dataset
+            Each column contains the following information:
+            - flow id
+            - full name
+            - name
+            - version
+            - external version
+            - uploader
     """
-    if output_format not in ["dataframe", "dict", "object"]:
+    if output_format not in ["dataframe", "dict"]:
         raise ValueError(
-            "Invalid output format selected. " "Only 'dict', 'object', or 'dataframe' applicable."
+            "Invalid output format selected. " "Only 'dict' or 'dataframe' applicable."
         )
 
     # TODO: [0.15]
     if output_format == "dict":
         msg = (
-            "Support for `output_format` of 'dict' will be removed in 0.15. "
-            "To ensure your code will continue to work, "
-            "use `output_format`='dataframe' or `output_format`='object'."
+            "Support for `output_format` of 'dict' will be removed in 0.15 "
+            "and pandas dataframes will be returned instead. To ensure your code "
+            "will continue to work, use `output_format`='dataframe'."
         )
         warnings.warn(msg, category=FutureWarning, stacklevel=2)
 
-    batch_size = 1000  # batch size for setups is lower
     return openml.utils._list_all(
         output_format=output_format,
-        listing_call=_list_setups,
+        listing_call=_list_flows,
         offset=offset,
         size=size,
-        flow=flow,
         tag=tag,
-        setup=setup,
-        batch_size=batch_size,
+        **kwargs
     )
 
 
-def _list_setups(setup=None, output_format="object", **kwargs):
+def _list_flows(output_format="dict", **kwargs) -> Union[Dict, pd.DataFrame]:
     """
-    Perform API call `/setup/list/{filters}`
+    Perform the api call that return a list of all flows.
 
     Parameters
     ----------
-    The setup argument that is a list is separated from the single value
-    filters which are put into the kwargs.
-
-    setup : list(int), optional
-
     output_format: str, optional (default='dict')
         The parameter decides the format of the output.
         - If 'dict' the output is a dict of dict
         - If 'dataframe' the output is a pandas DataFrame
 
     kwargs: dict, optional
-        Legal filter operators: flow, setup, limit, offset, tag.
+        Legal filter operators: uploader, tag, limit, offset.
 
     Returns
     -------
-    dict or dataframe
+    flows : dict, or dataframe
     """
+    api_call = "flow/list"
 
-    api_call = "setup/list"
-    if setup is not None:
-        api_call += "/setup/%s" % ",".join([str(int(i)) for i in setup])
     if kwargs is not None:
         for operator, value in kwargs.items():
             api_call += "/%s/%s" % (operator, value)
 
-    return __list_setups(api_call=api_call, output_format=output_format)
+    return __list_flows(api_call=api_call, output_format=output_format)
 
 
-def __list_setups(api_call, output_format="object"):
-    """Helper function to parse API calls which are lists of setups"""
-    xml_string = openml._api_calls._perform_api_call(api_call, "get")
-    setups_dict = xmltodict.parse(xml_string, force_list=("oml:setup",))
-    openml_uri = "http://openml.org/openml"
-    # Minimalistic check if the XML is useful
-    if "oml:setups" not in setups_dict:
-        raise ValueError(
-            'Error in return XML, does not contain "oml:setups":' " %s" % str(setups_dict)
-        )
-    elif "@xmlns:oml" not in setups_dict["oml:setups"]:
-        raise ValueError(
-            "Error in return XML, does not contain "
-            '"oml:setups"/@xmlns:oml: %s' % str(setups_dict)
-        )
-    elif setups_dict["oml:setups"]["@xmlns:oml"] != openml_uri:
-        raise ValueError(
-            "Error in return XML, value of  "
-            '"oml:seyups"/@xmlns:oml is not '
-            '"%s": %s' % (openml_uri, str(setups_dict))
-        )
+def flow_exists(name: str, external_version: str) -> Union[int, bool]:
+    """Retrieves the flow id.
 
-    assert isinstance(setups_dict["oml:setups"]["oml:setup"], list), type(setups_dict["oml:setups"])
+    A flow is uniquely identified by name + external_version.
 
-    setups = dict()
-    for setup_ in setups_dict["oml:setups"]["oml:setup"]:
-        # making it a dict to give it the right format
-        current = _create_setup_from_xml(
-            {"oml:setup_parameters": setup_}, output_format=output_format
-        )
-        if output_format == "object":
-            setups[current.setup_id] = current
-        else:
-            setups[current["setup_id"]] = current
+    Parameters
+    ----------
+    name : string
+        Name of the flow
+    external_version : string
+        Version information associated with flow.
 
-    if output_format == "dataframe":
-        setups = pd.DataFrame.from_dict(setups, orient="index")
+    Returns
+    -------
+    flow_exist : int or bool
+        flow id iff exists, False otherwise
 
-    return setups
+    Notes
+    -----
+    see https://www.openml.org/api_docs/#!/flow/get_flow_exists_name_version
+    """
+    if not (isinstance(name, str) and len(name) > 0):
+        raise ValueError("Argument 'name' should be a non-empty string")
+    if not (isinstance(name, str) and len(external_version) > 0):
+        raise ValueError("Argument 'version' should be a non-empty string")
+
+    xml_response = openml._api_calls._perform_api_call(
+        "flow/exists",
+        "post",
+        data={"name": name, "external_version": external_version},
+    )
 
+    result_dict = xmltodict.parse(xml_response)
+    flow_id = int(result_dict["oml:flow_exists"]["oml:id"])
+    return flow_id if flow_id > 0 else False
 
-def initialize_model(setup_id: int) -> Any:
-    """
-    Initialized a model based on a setup_id (i.e., using the exact
-    same parameter settings)
+
+def get_flow_id(
+    model: Optional[Any] = None,
+    name: Optional[str] = None,
+    exact_version=True,
+) -> Union[int, bool, List[int]]:
+    """Retrieves the flow id for a model or a flow name.
+
+    Provide either a model or a name to this function. Depending on the input, it does
+
+    * ``model`` and ``exact_version == True``: This helper function first queries for the necessary
+      extension. Second, it uses that extension to convert the model into a flow. Third, it
+      executes ``flow_exists`` to potentially obtain the flow id the flow is published to the
+      server.
+    * ``model`` and ``exact_version == False``: This helper function first queries for the
+      necessary extension. Second, it uses that extension to convert the model into a flow. Third
+      it calls ``list_flows`` and filters the returned values based on the flow name.
+    * ``name``: Ignores ``exact_version`` and calls ``list_flows``, then filters the returned
+      values based on the flow name.
 
     Parameters
     ----------
-    setup_id : int
-        The Openml setup_id
+    model : object
+        Any model. Must provide either ``model`` or ``name``.
+    name : str
+        Name of the flow. Must provide either ``model`` or ``name``.
+    exact_version : bool
+        Whether to return the flow id of the exact version or all flow ids where the name
+        of the flow matches. This is only taken into account for a model where a version number
+        is available.
 
     Returns
     -------
-    model
+    int or bool, List
+        flow id iff exists, ``False`` otherwise, List if ``exact_version is False``
     """
-    setup = get_setup(setup_id)
-    flow = openml.flows.get_flow(setup.flow_id)
-
-    # instead of using scikit-learns or any other library's "set_params" function, we override the
-    # OpenMLFlow objects default parameter value so we can utilize the
-    # Extension.flow_to_model() function to reinitialize the flow with the set defaults.
-    for hyperparameter in setup.parameters.values():
-        structure = flow.get_structure("flow_id")
-        if len(structure[hyperparameter.flow_id]) > 0:
-            subflow = flow.get_subflow(structure[hyperparameter.flow_id])
-        else:
-            subflow = flow
-        subflow.parameters[hyperparameter.parameter_name] = hyperparameter.value
+    if model is None and name is None:
+        raise ValueError(
+            "Need to provide either argument `model` or argument `name`, but both are `None`."
+        )
+    elif model is not None and name is not None:
+        raise ValueError("Must provide either argument `model` or argument `name`, but not both.")
+
+    if model is not None:
+        extension = openml.extensions.get_extension_by_model(model, raise_if_no_extension=True)
+        if extension is None:
+            # This should never happen and is only here to please mypy will be gone soon once the
+            # whole function is removed
+            raise TypeError(extension)
+        flow = extension.model_to_flow(model)
+        flow_name = flow.name
+        external_version = flow.external_version
+    else:
+        flow_name = name
+        exact_version = False
 
-    model = flow.extension.flow_to_model(flow)
-    return model
+    if exact_version:
+        return flow_exists(name=flow_name, external_version=external_version)
+    else:
+        flows = list_flows(output_format="dataframe")
+        assert isinstance(flows, pd.DataFrame)  # Make mypy happy
+        flows = flows.query('name == "{}"'.format(flow_name))
+        return flows["id"].to_list()
 
 
-def _to_dict(flow_id: int, openml_parameter_settings) -> OrderedDict:
-    """Convert a flow ID and a list of OpenML parameter settings to
-    a dictionary representation that can be serialized to XML.
+def __list_flows(api_call: str, output_format: str = "dict") -> Union[Dict, pd.DataFrame]:
+    """Retrieve information about flows from OpenML API
+    and parse it to a dictionary or a Pandas DataFrame.
 
     Parameters
     ----------
-    flow_id : int
-        ID of the flow.
-    openml_parameter_settings : List[OpenMLParameter]
-        A list of OpenML parameter settings.
-
+    api_call: str
+        Retrieves the information about flows.
+    output_format: str in {"dict", "dataframe"}
+        The output format.
     Returns
+
     -------
-    OrderedDict
-        A dictionary representation of the flow ID and parameter settings.
+        The flows information in the specified output format.
     """
-    # for convenience, this function (ab)uses the run object.
-    xml: OrderedDict = OrderedDict()
-    xml["oml:run"] = OrderedDict()
-    xml["oml:run"]["@xmlns:oml"] = "http://openml.org/openml"
-    xml["oml:run"]["oml:flow_id"] = flow_id
-    xml["oml:run"]["oml:parameter_setting"] = openml_parameter_settings
+    xml_string = openml._api_calls._perform_api_call(api_call, "get")
+    flows_dict = xmltodict.parse(xml_string, force_list=("oml:flow",))
 
-    return xml
+    # Minimalistic check if the XML is useful
+    assert isinstance(flows_dict["oml:flows"]["oml:flow"], list), type(flows_dict["oml:flows"])
+    assert flows_dict["oml:flows"]["@xmlns:oml"] == "http://openml.org/openml", flows_dict[
+        "oml:flows"
+    ]["@xmlns:oml"]
+
+    flows = dict()
+    for flow_ in flows_dict["oml:flows"]["oml:flow"]:
+        fid = int(flow_["oml:id"])
+        flow = {
+            "id": fid,
+            "full_name": flow_["oml:full_name"],
+            "name": flow_["oml:name"],
+            "version": flow_["oml:version"],
+            "external_version": flow_["oml:external_version"],
+            "uploader": flow_["oml:uploader"],
+        }
+        flows[fid] = flow
 
+    if output_format == "dataframe":
+        flows = pd.DataFrame.from_dict(flows, orient="index")
 
-def _create_setup_from_xml(result_dict, output_format="object"):
-    """
-    Turns an API xml result into a OpenMLSetup object (or dict)
+    return flows
+
+
+def _check_flow_for_server_id(flow: OpenMLFlow) -> None:
+    """Raises a ValueError if the flow or any of its subflows has no flow id."""
+
+    # Depth-first search to check if all components were uploaded to the
+    # server before parsing the parameters
+    stack = list()
+    stack.append(flow)
+    while len(stack) > 0:
+        current = stack.pop()
+        if current.flow_id is None:
+            raise ValueError("Flow %s has no flow_id!" % current.name)
+        else:
+            for component in current.components.values():
+                stack.append(component)
+
+
+def assert_flows_equal(
+    flow1: OpenMLFlow,
+    flow2: OpenMLFlow,
+    ignore_parameter_values_on_older_children: Optional[str] = None,
+    ignore_parameter_values: bool = False,
+    ignore_custom_name_if_none: bool = False,
+    check_description: bool = True,
+) -> None:
+    """Check equality of two flows.
+
+    Two flows are equal if their all keys which are not set by the server
+    are equal, as well as all their parameters and components.
+
+    Parameters
+    ----------
+    flow1 : OpenMLFlow
+
+    flow2 : OpenMLFlow
+
+    ignore_parameter_values_on_older_children : str (optional)
+        If set to ``OpenMLFlow.upload_date``, ignores parameters in a child
+        flow if it's upload date predates the upload date of the parent flow.
+
+    ignore_parameter_values : bool
+        Whether to ignore parameter values when comparing flows.
+
+    ignore_custom_name_if_none : bool
+        Whether to ignore the custom name field if either flow has `custom_name` equal to `None`.
+
+    check_description : bool
+        Whether to ignore matching of flow descriptions.
     """
-    setup_id = int(result_dict["oml:setup_parameters"]["oml:setup_id"])
-    flow_id = int(result_dict["oml:setup_parameters"]["oml:flow_id"])
-    parameters = {}
-    if "oml:parameter" not in result_dict["oml:setup_parameters"]:
-        parameters = None
-    else:
-        # basically all others
-        xml_parameters = result_dict["oml:setup_parameters"]["oml:parameter"]
-        if isinstance(xml_parameters, dict):
-            id = int(xml_parameters["oml:id"])
-            parameters[id] = _create_setup_parameter_from_xml(
-                result_dict=xml_parameters, output_format=output_format
-            )
-        elif isinstance(xml_parameters, list):
-            for xml_parameter in xml_parameters:
-                id = int(xml_parameter["oml:id"])
-                parameters[id] = _create_setup_parameter_from_xml(
-                    result_dict=xml_parameter, output_format=output_format
+    if not isinstance(flow1, OpenMLFlow):
+        raise TypeError("Argument 1 must be of type OpenMLFlow, but is %s" % type(flow1))
+
+    if not isinstance(flow2, OpenMLFlow):
+        raise TypeError("Argument 2 must be of type OpenMLFlow, but is %s" % type(flow2))
+
+    # TODO as they are actually now saved during publish, it might be good to
+    # check for the equality of these as well.
+    generated_by_the_server = [
+        "flow_id",
+        "uploader",
+        "version",
+        "upload_date",
+        # Tags aren't directly created by the server,
+        # but the uploader has no control over them!
+        "tags",
+    ]
+    ignored_by_python_api = ["binary_url", "binary_format", "binary_md5", "model", "_entity_id"]
+
+    for key in set(flow1.__dict__.keys()).union(flow2.__dict__.keys()):
+        if key in generated_by_the_server + ignored_by_python_api:
+            continue
+        attr1 = getattr(flow1, key, None)
+        attr2 = getattr(flow2, key, None)
+        if key == "components":
+            if not (isinstance(attr1, Dict) and isinstance(attr2, Dict)):
+                raise TypeError("Cannot compare components because they are not dictionary.")
+
+            for name in set(attr1.keys()).union(attr2.keys()):
+                if name not in attr1:
+                    raise ValueError(
+                        "Component %s only available in " "argument2, but not in argument1." % name
+                    )
+                if name not in attr2:
+                    raise ValueError(
+                        "Component %s only available in " "argument2, but not in argument1." % name
+                    )
+                assert_flows_equal(
+                    attr1[name],
+                    attr2[name],
+                    ignore_parameter_values_on_older_children,
+                    ignore_parameter_values,
+                    ignore_custom_name_if_none,
                 )
+        elif key == "_extension":
+            continue
+        elif check_description and key == "description":
+            # to ignore matching of descriptions since sklearn based flows may have
+            # altering docstrings and is not guaranteed to be consistent
+            continue
         else:
-            raise ValueError(
-                "Expected None, list or dict, received "
-                "something else: %s" % str(type(xml_parameters))
-            )
+            if key == "parameters":
+                if ignore_parameter_values or ignore_parameter_values_on_older_children:
+                    params_flow_1 = set(flow1.parameters.keys())
+                    params_flow_2 = set(flow2.parameters.keys())
+                    symmetric_difference = params_flow_1 ^ params_flow_2
+                    if len(symmetric_difference) > 0:
+                        raise ValueError(
+                            "Flow %s: parameter set of flow "
+                            "differs from the parameters stored "
+                            "on the server." % flow1.name
+                        )
+
+                if ignore_parameter_values_on_older_children:
+                    upload_date_current_flow = dateutil.parser.parse(flow1.upload_date)
+                    upload_date_parent_flow = dateutil.parser.parse(
+                        ignore_parameter_values_on_older_children
+                    )
+                    if upload_date_current_flow < upload_date_parent_flow:
+                        continue
+
+                if ignore_parameter_values:
+                    # Continue needs to be done here as the first if
+                    # statement triggers in both special cases
+                    continue
+            elif (
+                key == "custom_name"
+                and ignore_custom_name_if_none
+                and (attr1 is None or attr2 is None)
+            ):
+                # If specified, we allow `custom_name` inequality if one flow's name is None.
+                # Helps with backwards compatibility as `custom_name` is now auto-generated, but
+                # before it used to be `None`.
+                continue
+            elif key == "parameters_meta_info":
+                # this value is a dictionary where each key is a parameter name, containing another
+                # dictionary with keys specifying the parameter's 'description' and 'data_type'
+                # checking parameter descriptions can be ignored since that might change
+                # data type check can also be ignored if one of them is not defined, i.e., None
+                params1 = set(flow1.parameters_meta_info)
+                params2 = set(flow2.parameters_meta_info)
+                if params1 != params2:
+                    raise ValueError(
+                        "Parameter list in meta info for parameters differ " "in the two flows."
+                    )
+                # iterating over the parameter's meta info list
+                for param in params1:
+                    if (
+                        isinstance(flow1.parameters_meta_info[param], Dict)
+                        and isinstance(flow2.parameters_meta_info[param], Dict)
+                        and "data_type" in flow1.parameters_meta_info[param]
+                        and "data_type" in flow2.parameters_meta_info[param]
+                    ):
+                        value1 = flow1.parameters_meta_info[param]["data_type"]
+                        value2 = flow2.parameters_meta_info[param]["data_type"]
+                    else:
+                        value1 = flow1.parameters_meta_info[param]
+                        value2 = flow2.parameters_meta_info[param]
+                    if value1 is None or value2 is None:
+                        continue
+                    elif value1 != value2:
+                        raise ValueError(
+                            "Flow {}: data type for parameter {} in {} differ "
+                            "as {}\nvs\n{}".format(flow1.name, param, key, value1, value2)
+                        )
+                # the continue is to avoid the 'attr != attr2' check at end of function
+                continue
+
+            if attr1 != attr2:
+                raise ValueError(
+                    "Flow %s: values for attribute '%s' differ: "
+                    "'%s'\nvs\n'%s'." % (str(flow1.name), str(key), str(attr1), str(attr2))
+                )
+
 
-    if output_format in ["dataframe", "dict"]:
-        return_dict = {"setup_id": setup_id, "flow_id": flow_id}
-        return_dict["parameters"] = parameters
-        return return_dict
-    return OpenMLSetup(setup_id, flow_id, parameters)
+def _create_flow_from_xml(flow_xml: str) -> OpenMLFlow:
+    """Create flow object from xml
 
+    Parameters
+    ----------
+    flow_xml: xml representation of a flow
 
-def _create_setup_parameter_from_xml(result_dict, output_format="object"):
+    Returns
+    -------
+    OpenMLFlow
     """
-    Create an OpenMLParameter object or a dictionary from an API xml result.
+
+    return OpenMLFlow._from_dict(xmltodict.parse(flow_xml))
+
+
+def delete_flow(flow_id: int) -> bool:
+    """Delete flow with id `flow_id` from the OpenML server.
+
+    You can only delete flows which you uploaded and which
+    which are not linked to runs.
+
+    Parameters
+    ----------
+    flow_id : int
+        OpenML id of the flow
+
+    Returns
+    -------
+    bool
+        True if the deletion was successful. False otherwise.
     """
-    if output_format == "object":
-        return OpenMLParameter(
-            input_id=int(result_dict["oml:id"]),
-            flow_id=int(result_dict["oml:flow_id"]),
-            flow_name=result_dict["oml:flow_name"],
-            full_name=result_dict["oml:full_name"],
-            parameter_name=result_dict["oml:parameter_name"],
-            data_type=result_dict["oml:data_type"],
-            default_value=result_dict["oml:default_value"],
-            value=result_dict["oml:value"],
-        )
-    else:
-        return {
-            "input_id": int(result_dict["oml:id"]),
-            "flow_id": int(result_dict["oml:flow_id"]),
-            "flow_name": result_dict["oml:flow_name"],
-            "full_name": result_dict["oml:full_name"],
-            "parameter_name": result_dict["oml:parameter_name"],
-            "data_type": result_dict["oml:data_type"],
-            "default_value": result_dict["oml:default_value"],
-            "value": result_dict["oml:value"],
-        }
+    return openml.utils._delete_entity("flow", flow_id)

From ab6f2028ebc7498ab9d8e1bd680e02c8b4845cfd Mon Sep 17 00:00:00 2001
From: Lennart Purucker <contact@lennart-purucker.com>
Date: Mon, 30 Oct 2023 14:50:49 -0700
Subject: [PATCH 47/48] Update functions.py of setup to fix mypy error

---
 openml/setups/functions.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/openml/setups/functions.py b/openml/setups/functions.py
index 410a1e964..bc6d21aaa 100644
--- a/openml/setups/functions.py
+++ b/openml/setups/functions.py
@@ -287,7 +287,7 @@ def initialize_model(setup_id: int) -> Any:
     return model
 
 
-def _to_dict(flow_id: int, openml_parameter_settings):
+def _to_dict(flow_id: int, openml_parameter_settings) -> OrderedDict:
     """Convert a flow ID and a list of OpenML parameter settings to
     a dictionary representation that can be serialized to XML.
 
@@ -304,7 +304,7 @@ def _to_dict(flow_id: int, openml_parameter_settings):
         A dictionary representation of the flow ID and parameter settings.
     """
     # for convenience, this function (ab)uses the run object.
-    xml = OrderedDict()
+    xml: OrderedDict = OrderedDict()
     xml["oml:run"] = OrderedDict()
     xml["oml:run"]["@xmlns:oml"] = "http://openml.org/openml"
     xml["oml:run"]["oml:flow_id"] = flow_id

From 049230ddb622194fde2c81e4de820a2e7179b799 Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Wed, 1 Nov 2023 00:02:38 +0530
Subject: [PATCH 48/48] Update progress.rst

---
 doc/progress.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/progress.rst b/doc/progress.rst
index 3fc493914..6fed41326 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -10,6 +10,7 @@ next
 ~~~~~~
 
  * MAINT #1280: Use the server-provided ``parquet_url`` instead of ``minio_url`` to determine the location of the parquet file.
+ * ADD #716: add documentation for remaining attributes of classes and functions.
 
 0.14.1
 ~~~~~~