Skip to content

Commit

Permalink
Merge pull request #998 from Yelp/jfong/TRON-2277-fix-nonretryable-ex…
Browse files Browse the repository at this point in the history
…it-codes

TRON-2277: Pass along non_retryable_exit_codes to KubernetesCluster objects
  • Loading branch information
jfongatyelp authored Sep 26, 2024
2 parents 3c64a6e + 5b27ae6 commit c061971
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 0 deletions.
14 changes: 14 additions & 0 deletions tests/config/config_parse_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1778,6 +1778,20 @@ def test_invalid(self, kubeconfig_path, watcher_kubeconfig_paths):
with pytest.raises(ConfigError):
config_parse.valid_kubernetes_options.validate(k8s_options, self.context)

def test_nonretry(self):
k8s_options = {
"enabled": True,
"kubeconfig_path": "/some/valid/path",
"watcher_kubeconfig_paths": [],
"non_retryable_exit_codes": 1,
}
with pytest.raises(ConfigError):
config_parse.valid_kubernetes_options.validate(k8s_options, self.context)

k8s_options["non_retryable_exit_codes"] = [-12, 1]

assert config_parse.valid_kubernetes_options.validate(k8s_options, self.context)


if __name__ == "__main__":
run()
31 changes: 31 additions & 0 deletions tests/core/actionrun_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2000,3 +2000,34 @@ def test_stop_task_no_task_id_k8s(self, mock_cluster_repo, mock_k8s_action_run):
mock_k8s_action_run.create_attempt()
error_message = mock_k8s_action_run.stop()
assert error_message == "Error: Can't find task id for the action."

@mock.patch("tron.core.actionrun.KubernetesClusterRepository", autospec=True)
def test_non_retryable_exit(self, mock_cluster_repo, mock_k8s_action_run):

mock_cluster = mock.Mock()
mock_cluster.non_retryable_exit_codes = [13]

mock_cluster_repo.get_cluster.return_value = mock_cluster

mock_k8s_action_run.retries_remaining = 5
mock_k8s_action_run.start = mock.Mock()

mock_k8s_action_run._exit_unsuccessful(13)

assert mock_k8s_action_run.retries_remaining == 0
assert not mock_k8s_action_run.is_unknown

@mock.patch("tron.core.actionrun.KubernetesClusterRepository", autospec=True)
def test_retryable_exit(self, mock_cluster_repo, mock_k8s_action_run):

mock_cluster = mock.Mock()
mock_cluster.non_retryable_exit_codes = [-12]

mock_cluster_repo.get_cluster.return_value = mock_cluster

mock_k8s_action_run.retries_remaining = 5
mock_k8s_action_run.start = mock.Mock()

mock_k8s_action_run._exit_unsuccessful(13)

assert mock_k8s_action_run.retries_remaining == 4
27 changes: 27 additions & 0 deletions tests/kubernetes_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@
from task_processing.plugins.kubernetes.task_config import KubernetesTaskConfig

from tron.config.schema import ConfigFieldSelectorSource
from tron.config.schema import ConfigKubernetes
from tron.config.schema import ConfigProjectedSAVolume
from tron.config.schema import ConfigSecretSource
from tron.config.schema import ConfigSecretVolume
from tron.config.schema import ConfigSecretVolumeItem
from tron.config.schema import ConfigVolume
from tron.kubernetes import DEFAULT_DISK_LIMIT
from tron.kubernetes import KubernetesCluster
from tron.kubernetes import KubernetesClusterRepository
from tron.kubernetes import KubernetesTask
from tron.utils import exitcode

Expand Down Expand Up @@ -776,3 +778,28 @@ def test_recover(mock_kubernetes_cluster, mock_kubernetes_task):
assert mock_kubernetes_task.get_kubernetes_id() in mock_kubernetes_cluster.tasks
mock_kubernetes_cluster.runner.reconcile.assert_called_once_with(mock_kubernetes_task.get_config())
assert mock_started.call_count == 1


def test_kuberntes_cluster_repository():
# Check we are passing k8s_options from mcp/KubernetesClusterRepository.configure to KubernetesCluster calls

mock_k8s_options = {
"enabled": True,
"kubeconfig_path": "/tmp/kubeconfig.conf",
"watcher_kubeconfig_paths": ["/tmp/kubeconfig_old.conf"],
"non_retryable_exit_codes": [13],
"default_volumes": [
ConfigVolume(
container_path="/tmp",
host_path="/host/tmp",
mode="RO",
)
],
}
mock_k8s_options_obj = ConfigKubernetes(**mock_k8s_options)

with mock.patch("tron.kubernetes.KubernetesCluster", autospec=True) as mock_cluster:
KubernetesClusterRepository.configure(mock_k8s_options_obj)
KubernetesClusterRepository.get_cluster()

mock_cluster.assert_called_once_with(**mock_k8s_options)
3 changes: 3 additions & 0 deletions tron/kubernetes.py
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,7 @@ class KubernetesClusterRepository:
pod_launch_timeout: Optional[int] = None
default_volumes: Optional[List[ConfigVolume]] = None
watcher_kubeconfig_paths: Optional[List[str]] = None
non_retryable_exit_codes: Optional[List[int]] = None

# metadata config
clusters: Dict[str, KubernetesCluster] = {}
Expand All @@ -654,6 +655,7 @@ def get_cluster(cls, kubeconfig_path: Optional[str] = None) -> Optional[Kubernet
enabled=cls.kubernetes_enabled,
default_volumes=cls.default_volumes,
watcher_kubeconfig_paths=cls.watcher_kubeconfig_paths,
non_retryable_exit_codes=cls.non_retryable_exit_codes,
)
cls.clusters[kubeconfig_path] = cluster

Expand All @@ -671,6 +673,7 @@ def configure(cls, kubernetes_options: ConfigKubernetes) -> None:
cls.kubernetes_non_retryable_exit_codes = kubernetes_options.non_retryable_exit_codes
cls.default_volumes = kubernetes_options.default_volumes
cls.watcher_kubeconfig_paths = kubernetes_options.watcher_kubeconfig_paths
cls.non_retryable_exit_codes = kubernetes_options.non_retryable_exit_codes

for cluster in cls.clusters.values():
cluster.set_enabled(cls.kubernetes_enabled)
Expand Down

0 comments on commit c061971

Please sign in to comment.