diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4086a4b1..c26e014c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -44,7 +44,7 @@ repos: # # Similar to: https://stackoverflow.com/a/73603491/5755604 # additional_dependencies: ['types-PyYAML'] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.8.6 + rev: v0.9.3 hooks: - id: ruff args: diff --git a/notebooks/4.0c-mb-feature-importances.ipynb b/notebooks/4.0c-mb-feature-importances.ipynb index be90b3f7..e1bfea78 100644 --- a/notebooks/4.0c-mb-feature-importances.ipynb +++ b/notebooks/4.0c-mb-feature-importances.ipynb @@ -1203,7 +1203,8 @@ "plt.axis(\"off\")\n", "plt.tight_layout()\n", "plt.savefig(\n", - " f\"../reports/Graphs/attention_head_{h+1}_layer_{l+1}_{key}.pdf\", bbox_inches=\"tight\"\n", + " f\"../reports/Graphs/attention_head_{h + 1}_layer_{l + 1}_{key}.pdf\",\n", + " bbox_inches=\"tight\",\n", ")" ] }, @@ -1277,7 +1278,7 @@ " alpha=el.item(),\n", " )\n", "\n", - " axes[l, h].set_title(f\"head {l+1,h+1}\", size=\"xx-small\")\n", + " axes[l, h].set_title(f\"head {l + 1, h + 1}\", size=\"xx-small\")\n", " # fig.tight_layout()\n", " axes[l, h].set_xticks([])\n", " axes[l, h].set_yticks([])\n", diff --git a/src/otc/data/dataset.py b/src/otc/data/dataset.py index e2224a33..5ac68f80 100644 --- a/src/otc/data/dataset.py +++ b/src/otc/data/dataset.py @@ -52,15 +52,15 @@ def __init__( # infer feature names from dataframe. if isinstance(x, pd.DataFrame): feature_names = x.columns.tolist() - assert ( - len(feature_names) == x.shape[1] - ), "`len('feature_names)` must match `X.shape[1]`" + assert len(feature_names) == x.shape[1], ( + "`len('feature_names)` must match `X.shape[1]`" + ) # calculate cat indices cat_features = cat_features if cat_features else [] - assert set(cat_features).issubset( - feature_names - ), "Categorical features must be a subset of feature names." + assert set(cat_features).issubset(feature_names), ( + "Categorical features must be a subset of feature names." + ) self._cat_idx = [ feature_names.index(i) for i in cat_features if i in feature_names @@ -77,12 +77,12 @@ def __init__( y = y.to_numpy() if isinstance(y, pd.Series) else y weight = weight.to_numpy() if isinstance(weight, pd.Series) else weight - assert ( - x.shape[0] == y.shape[0] - ), "Length of feature matrix must match length of target." - assert len(cat_features) == len( - self._cat_unique_counts - ), "For all categorical features the number of unique entries must be provided." + assert x.shape[0] == y.shape[0], ( + "Length of feature matrix must match length of target." + ) + assert len(cat_features) == len(self._cat_unique_counts), ( + "For all categorical features the number of unique entries must be provided." + ) # adjust target to be either 0 or 1 self.y = torch.tensor(y).float() @@ -105,9 +105,9 @@ def __init__( if weight is not None else torch.ones(len(self.y), requires_grad=False).float() ) - assert ( - y.shape[0] == weight.shape[0] - ), "Length of label must match length of weight." + assert y.shape[0] == weight.shape[0], ( + "Length of label must match length of weight." + ) self.weight = weight def __len__(self) -> int: diff --git a/src/otc/models/callback.py b/src/otc/models/callback.py index 9eb4205c..b3c425c2 100644 --- a/src/otc/models/callback.py +++ b/src/otc/models/callback.py @@ -132,7 +132,7 @@ def on_train_end( name (str): name of study. """ if study.best_trial == trial: - prefix_file = f"{study.study_name}_" f"{model.__class__.__name__}_{name}" + prefix_file = f"{study.study_name}_{model.__class__.__name__}_{name}" uri_model: str file_model: str diff --git a/src/otc/models/fttransformer.py b/src/otc/models/fttransformer.py index 590a6791..b8896467 100644 --- a/src/otc/models/fttransformer.py +++ b/src/otc/models/fttransformer.py @@ -283,9 +283,9 @@ def __init__( """ super().__init__() assert num_continous >= 0, "n_num_features must be non-negative" - assert ( - num_continous or cat_cardinalities - ), "at least one of n_num_features or cat_cardinalities must be positive" + assert num_continous or cat_cardinalities, ( + "at least one of n_num_features or cat_cardinalities must be positive" + ) "and non-empty" self.initialization = "uniform" self.num_tokenizer = ( @@ -347,15 +347,15 @@ def forward( Returns: torch.Tensor: tokens. """ - assert ( - x_num is not None or x_cat is not None - ), "At least one of x_num and x_cat must be presented" - assert _all_or_none( - [self.num_tokenizer, x_num] - ), "If self.num_tokenizer is (not) None, then x_num must (not) be None" - assert _all_or_none( - [self.cat_tokenizer, x_cat] - ), "If self.cat_tokenizer is (not) None, then x_cat must (not) be None" + assert x_num is not None or x_cat is not None, ( + "At least one of x_num and x_cat must be presented" + ) + assert _all_or_none([self.num_tokenizer, x_num]), ( + "If self.num_tokenizer is (not) None, then x_num must (not) be None" + ) + assert _all_or_none([self.cat_tokenizer, x_cat]), ( + "If self.cat_tokenizer is (not) None, then x_cat must (not) be None" + ) x = [] if self.num_tokenizer is not None: x.append(self.num_tokenizer(x_num)) @@ -559,9 +559,9 @@ def forward( Tuple[torch.Tensor, Dict[str, torch.Tensor]]: Tuple with tokens and attention_stats """ - assert _all_or_none( - [key_compression, value_compression] - ), "If key_compression is (not) None, then value_compression must (not) be None" + assert _all_or_none([key_compression, value_compression]), ( + "If key_compression is (not) None, then value_compression must (not) be None" + ) q, k, v = self.W_q(x_q), self.W_k(x_kv), self.W_v(x_kv) for tensor in [q, k, v]: assert tensor.shape[-1] % self.n_heads == 0, _INTERNAL_ERROR_MESSAGE @@ -756,9 +756,9 @@ def __init__( f"Do you mean last_layer_query_idx=[{last_layer_query_idx}] ?" ) if not prenormalization: - assert ( - not first_prenormalization - ), "If `prenormalization` is False, then `first_prenormalization`" + assert not first_prenormalization, ( + "If `prenormalization` is False, then `first_prenormalization`" + ) "must be False" assert _all_or_none([n_tokens, kv_compression_ratio, kv_compression_sharing]), ( "If any of the following arguments is (not) None, then all of them must " @@ -814,9 +814,9 @@ def make_kv_compression() -> nn.Module: if kv_compression_sharing == "headwise": layer["value_compression"] = make_kv_compression() else: - assert ( - kv_compression_sharing == "key-value" - ), _INTERNAL_ERROR_MESSAGE + assert kv_compression_sharing == "key-value", ( + _INTERNAL_ERROR_MESSAGE + ) self.blocks.append(layer) self.head = Transformer.Head( @@ -874,9 +874,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: Returns: torch.Tensor: output tensor. """ - assert ( - x.ndim == 3 - ), "The input must have 3 dimensions: (n_objects, n_tokens, d_token)" + assert x.ndim == 3, ( + "The input must have 3 dimensions: (n_objects, n_tokens, d_token)" + ) for layer_idx, layer in enumerate(self.blocks): layer = cast(nn.ModuleDict, layer) diff --git a/src/otc/models/objective.py b/src/otc/models/objective.py index e22a1e71..4429b10e 100644 --- a/src/otc/models/objective.py +++ b/src/otc/models/objective.py @@ -447,7 +447,7 @@ def __call__(self, trial: optuna.Trial) -> float: # https://catboost.ai/en/docs/features/training-on-gpu gpu_count = get_gpu_device_count() task_type = "GPU" if gpu_count > 0 else "CPU" - devices = f"0-{gpu_count-1}" + devices = f"0-{gpu_count - 1}" # kaggle book + https://catboost.ai/en/docs/concepts/parameter-tuning # friedman paper