Merge remote-tracking branch 'origin/master' into u/cuza/disable_retr…

…ies_on_lost * origin/master: Minor fix to help text for kubecontext param Support running on devbox too Support multiple clusters in sync_tron_from_k8s Released 2.3.0 via make release More type fixes for defaults Initialize KubernetesClusterRepository watcher_kubeconfig_paths the same as KubernetsCluster and correct typing Released 2.2.7 via make release Only show disable warning on tronctl disable (#990) Add validation to watcher_kubeconfig_paths too, and a minimal test Update tools/sync_tron_state_from_k8s.py Bump task_processing to get watcher_kubeconfig_paths change Support old_kubeconfig_paths Update sync_tron_from_k8s to match sanitized instance labels, cleanup comments Update sync to handle in-progress retries and don't tronctl yet; add unit tests TRON-2210: Add a tool that can sync from pod state to tron w/ tronctl # Conflicts: # tron/kubernetes.py
Yelp · Aug 2, 2024 · dbd31da · dbd31da
2 parents 6e2163e + d51984e
commit dbd31da
Show file tree

Hide file tree

Showing 12 changed files with 560 additions and 9 deletions.
diff --git a/Makefile b/Makefile
@@ -1,5 +1,5 @@
 # Edit this release and run "make release"
-RELEASE=2.2.6
+RELEASE=2.3.0
 
 SHELL=/bin/bash
 

diff --git a/bin/tronctl b/bin/tronctl
@@ -339,11 +339,12 @@ def control_objects(args: argparse.Namespace):
             yield request(urljoin(args.server, tron_id.url), data)
             # NOTE: ideally we'd add this message in the JobController handle_command() function, but having the API return terminal escape codes
             # sounds like a bad idea, so we're doing it here instead
-            print(
-                warning_output(
-                    "WARNING: jobs disabled with tronctl disable are *NOT* guaranteed to stay disabled. You must disable the job in yelpsoa-configs to guarantee it will not be re-enabled."
+            if args.command == "disable":
+                print(
+                    warning_output(
+                        "WARNING: jobs disabled with tronctl disable are *NOT* guaranteed to stay disabled. You must disable the job in yelpsoa-configs to guarantee it will not be re-enabled."
+                    )
                 )
-            )
 
 
 def retry(args):

diff --git a/debian/changelog b/debian/changelog
@@ -1,3 +1,22 @@
+tron (2.3.0) jammy; urgency=medium
+
+  * 2.3.0 tagged with 'make release'
+    Commit: Merge pull request #989 from Yelp/jfong/TRON-2195-old-
+    kubeconfig-paths  TRON-2195: Support watcher_kubeconfig_paths
+
+ -- Jen Patague <jfong@yelp.com>  Thu, 11 Jul 2024 14:25:42 -0700
+
+tron (2.2.7) jammy; urgency=medium
+
+  * 2.2.7 tagged with 'make release'
+    Commit: Only show disable warning on tronctl disable (#990)  I"m not
+    sure I was thinking here since this ended up unconditionally
+    printing the disable warning - but we can chalk this up to an
+    intentional PR campaign to warn folks and only show the warnings on
+    tronctl disable from now on :p
+
+ -- Luis Perez <luisp@yelp.com>  Thu, 11 Jul 2024 11:49:18 -0700
+
 tron (2.2.6) jammy; urgency=medium
 
   * 2.2.6 tagged with 'make release'

diff --git a/requirements-minimal.txt b/requirements-minimal.txt
@@ -21,7 +21,7 @@ pytimeparse
 pytz
 PyYAML>=5.1
 requests
-task_processing[mesos_executor,k8s]>=1.1.0
+task_processing[mesos_executor,k8s]>=1.2.0
 Twisted>=19.7.0
 urllib3>=1.24.2
 Werkzeug>=0.15.3
diff --git a/requirements.txt b/requirements.txt
@@ -80,7 +80,7 @@ setuptools==65.5.1
 six==1.15.0
 sshpubkeys==3.1.0
 stack-data==0.6.2
-task-processing==1.1.0
+task-processing==1.2.0
 traitlets==5.0.0
 Twisted==22.10.0
 typing-extensions==4.5.0

diff --git a/tests/config/config_parse_test.py b/tests/config/config_parse_test.py
@@ -1744,5 +1744,40 @@ def test_invalid(self, url, context):
             config_parse.valid_master_address(url, context)
 
 
+class TestValidKubeconfigPaths:
+    @setup
+    def setup_context(self):
+        self.context = config_utils.NullConfigContext
+
+    @pytest.mark.parametrize(
+        "kubeconfig_path,watcher_kubeconfig_paths",
+        [("/some/kubeconfig.conf", []), ("/another/kube/config", ["a_watcher_kubeconfig"])],
+    )
+    def test_valid(self, kubeconfig_path, watcher_kubeconfig_paths):
+        k8s_options = {
+            "enabled": True,
+            "kubeconfig_path": kubeconfig_path,
+            "watcher_kubeconfig_paths": watcher_kubeconfig_paths,
+        }
+        assert config_parse.valid_kubernetes_options.validate(k8s_options, self.context)
+
+    @pytest.mark.parametrize(
+        "kubeconfig_path,watcher_kubeconfig_paths",
+        [
+            (["/a/kubeconfig/in/a/list"], ["/a/valid/kubeconfig"]),
+            (None, []),
+            ("/some/kubeconfig.conf", "/not/a/list/kubeconfig"),
+        ],
+    )
+    def test_invalid(self, kubeconfig_path, watcher_kubeconfig_paths):
+        k8s_options = {
+            "enabled": True,
+            "kubeconfig_path": kubeconfig_path,
+            "watcher_kubeconfig_paths": watcher_kubeconfig_paths,
+        }
+        with pytest.raises(ConfigError):
+            config_parse.valid_kubernetes_options.validate(k8s_options, self.context)
+
+
 if __name__ == "__main__":
     run()
diff --git a/tests/tools/sync_tron_state_from_k8s_test.py b/tests/tools/sync_tron_state_from_k8s_test.py
@@ -0,0 +1,238 @@
+from typing import Dict
+from unittest import mock
+
+import pytest
+from kubernetes.client import V1ObjectMeta
+from kubernetes.client import V1Pod
+from kubernetes.client import V1PodStatus
+
+from tools.sync_tron_state_from_k8s import get_matching_pod
+from tools.sync_tron_state_from_k8s import get_tron_state_from_api
+from tools.sync_tron_state_from_k8s import update_tron_from_pods
+
+
+def create_mock_pod(name: str, phase: str, labels: Dict[str, str], creation_timestamp: str):
+    metadata = V1ObjectMeta(name=name, creation_timestamp=creation_timestamp, labels=labels)
+    status = V1PodStatus(phase=phase)
+    return V1Pod(metadata=metadata, status=status)
+
+
+class TestSyncTronStateFromK8s:
+    @pytest.fixture(autouse=True)
+    def setup_test_data(self):
+        self.pods = {
+            p.metadata.name: p
+            for p in [
+                create_mock_pod(
+                    "service.job.2.action",
+                    "Succeeded",
+                    {
+                        "paasta.yelp.com/service": "service",
+                        "paasta.yelp.com/instance": "job.action",
+                        "tron.yelp.com/run_num": "2",
+                    },
+                    "2024-01-01T00:00:00",
+                ),
+                create_mock_pod(
+                    "service.job.3.action-nomatch",
+                    "Failed",
+                    {
+                        "paasta.yelp.com/service": "service",
+                        "paasta.yelp.com/instance": "job.action",
+                        "tron.yelp.com/run_num": "3",
+                    },
+                    "2024-01-01T00:00:00",
+                ),
+                create_mock_pod(
+                    "service.job.4.action-nomatch",
+                    "Failed",
+                    {
+                        "paasta.yelp.com/service": "service",
+                        "paasta.yelp.com/instance": "job.action",
+                        "tron.yelp.com/run_num": "4",
+                    },
+                    "2024-01-01T00:00:00",
+                ),
+                create_mock_pod(
+                    "service.job.4.action-nomatch-retry2",
+                    "Succeeded",
+                    {
+                        "paasta.yelp.com/service": "service",
+                        "paasta.yelp.com/instance": "job.action",
+                        "tron.yelp.com/run_num": "4",
+                    },
+                    "2024-01-01T01:00:00",
+                ),
+                create_mock_pod(
+                    "service.job2.10.action",
+                    "Failed",
+                    {
+                        "paasta.yelp.com/service": "service",
+                        "paasta.yelp.com/instance": "job2.action",
+                        "tron.yelp.com/run_num": "10",
+                    },
+                    "2024-01-01T01:00:00",
+                ),
+                create_mock_pod(
+                    "service.job2.10.action",
+                    "Running",
+                    {
+                        "paasta.yelp.com/service": "service",
+                        "paasta.yelp.com/instance": "job2.action",
+                        "tron.yelp.com/run_num": "10",
+                    },
+                    "2024-01-01T01:05:00",
+                ),
+                create_mock_pod(
+                    # Technically this pod name would not actually exist
+                    "service.job_with_an_extremely_extremely_extremely_extremely_extremely_long_name.10.action",
+                    "Succeeded",
+                    {
+                        "paasta.yelp.com/service": "service",
+                        # If PaaSTA's setup_tron_namespace changes how we create these labels, this test will need updating
+                        "paasta.yelp.com/instance": "job_with_an_extremely_extremely_extremely_extremely_extrem-26i4",
+                        "tron.yelp.com/run_num": "10",
+                    },
+                    "2024-01-01T01:05:00",
+                ),
+            ]
+        }
+
+    # 1 matching pod by labels
+    # 2 matching pod by labels
+    # no matching pod
+    # test matching by hashed instance name
+    @pytest.mark.parametrize(
+        "job_name,run_num,expected_pod_name",
+        [
+            ("service.job", "3", "service.job.3.action-nomatch"),
+            ("service.job", "4", "service.job.4.action-nomatch-retry2"),
+            ("service.job2", "10", None),
+            ("service2.job", "1", None),
+            (
+                "service.job_with_an_extremely_extremely_extremely_extremely_extremely_long_name",
+                "10",
+                "service.job_with_an_extremely_extremely_extremely_extremely_extremely_long_name.10.action",
+            ),
+        ],
+    )
+    def test_get_matching_pod(self, job_name, run_num, expected_pod_name):
+        test_action_run = {"action_name": "action", "job_name": f"{job_name}", "run_num": run_num}
+        matching_pod = get_matching_pod(test_action_run, self.pods)
+        assert matching_pod == self.pods.get(expected_pod_name)
+
+    # verify we send correct num_runs
+    # verify we are sending request for jobs + one for each job
+    @mock.patch("tools.sync_tron_state_from_k8s.get_client_config", autospec=True)
+    @mock.patch("tools.sync_tron_state_from_k8s.Client", autospec=True)
+    def test_get_tron_state_from_api(self, mock_client, mock_get_client_config):
+        mock_client.return_value = mock.Mock()
+        mock_client.return_value.jobs.return_value = [{"url": "/uri", "name": "some job"}]
+        mock_client.return_value.job.return_value = {"runs": []}
+        mock_get_client_config.return_value = {"server": "https://localhost:8888"}
+        get_tron_state_from_api(None, num_runs=10)
+
+        mock_client.assert_called_with("https://localhost:8888")
+        mock_client.return_value.jobs.assert_called_with(
+            include_job_runs=False, include_action_runs=False, include_action_graph=False, include_node_pool=False
+        )
+
+        mock_client.return_value.job.assert_called_with("/api/uri", include_action_runs=True, count=10)
+
+    @mock.patch("tools.sync_tron_state_from_k8s.subprocess.run", autospec=True)
+    def test_update_tron(self, mock_subprocess_run):
+        # sorry for the blob of test data
+        tron_state = [
+            {
+                "name": "service.job",
+                "runs": [
+                    {
+                        "runs": [
+                            {
+                                "id": "service.job.2.action",
+                                "action_name": "action",
+                                "run_num": "2",
+                                "job_name": "service.job",
+                                "state": "unknown",
+                            }
+                        ]
+                    },
+                    {
+                        "runs": [
+                            {
+                                "id": "service.job.3.action",
+                                "action_name": "action",
+                                "run_num": "3",
+                                "job_name": "service.job",
+                                "state": "running",
+                            },
+                            {
+                                "id": "service.job.3.action2",
+                                "action_name": "action2",
+                                "run_num": "3",
+                                "job_name": "service.job",
+                                "state": "running",
+                            },
+                        ]
+                    },
+                    {
+                        "runs": [
+                            {
+                                "id": "service.job.4.action",
+                                "action_name": "action",
+                                "run_num": "4",
+                                "job_name": "service.job",
+                                "state": "starting",
+                            }
+                        ]
+                    },
+                    {
+                        "runs": [
+                            {
+                                "id": "service.job.5.action",
+                                "action_name": "action",
+                                "run_num": "5",
+                                "job_name": "service.job",
+                                "state": "starting",
+                            }
+                        ]
+                    },
+                ],
+            },
+            {
+                "name": "service.job2",
+                "runs": [
+                    {
+                        "runs": [
+                            {
+                                "id": "service.job2.10.action",
+                                "action_name": "action",
+                                "run_num": "10",
+                                "job_name": "service.job2",
+                                "state": "succeeded",
+                            },
+                        ]
+                    },
+                ],
+            },
+        ]
+
+        good_subprocess_run = mock.Mock(returncode=0)
+        bad_subprocess_run = mock.Mock(returncode=1)
+
+        expected_calls = [
+            mock.call(["tronctl", "success", "service.job.2.action"], capture_output=True, text=True),
+            mock.call(["tronctl", "fail", "service.job.3.action"], capture_output=True, text=True),
+            mock.call(["tronctl", "success", "service.job.4.action"], capture_output=True, text=True),
+        ]
+        mock_subprocess_run.return_value = good_subprocess_run
+
+        result = update_tron_from_pods(tron_state, self.pods, tronctl_wrapper="tronctl", do_work=True)
+
+        assert result["updated"] == ["service.job.2.action", "service.job.3.action", "service.job.4.action"]
+        assert result["error"] == []
+        mock_subprocess_run.assert_has_calls(expected_calls, any_order=True)
+
+        mock_subprocess_run.return_value = bad_subprocess_run
+        result = update_tron_from_pods(tron_state, self.pods, tronctl_wrapper="tronctl", do_work=True)
+        assert result["error"] == ["service.job.2.action", "service.job.3.action", "service.job.4.action"]