Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into u/cuza/disable_retr…
Browse files Browse the repository at this point in the history
…ies_on_lost

* origin/master:
  Minor fix to help text for kubecontext param
  Support running on devbox too
  Support multiple clusters in sync_tron_from_k8s
  Released 2.3.0 via make release
  More type fixes for defaults
  Initialize KubernetesClusterRepository watcher_kubeconfig_paths the same as KubernetsCluster and correct typing
  Released 2.2.7 via make release
  Only show disable warning on tronctl disable (#990)
  Add validation to watcher_kubeconfig_paths too, and a minimal  test
  Update tools/sync_tron_state_from_k8s.py
  Bump task_processing to get watcher_kubeconfig_paths change
  Support old_kubeconfig_paths
  Update sync_tron_from_k8s to match sanitized instance labels, cleanup comments
  Update sync to handle in-progress retries and don't tronctl yet; add unit tests
  TRON-2210: Add a tool that can sync from pod state to tron w/ tronctl

# Conflicts:
#	tron/kubernetes.py
  • Loading branch information
cuza committed Aug 2, 2024
2 parents 6e2163e + d51984e commit dbd31da
Show file tree
Hide file tree
Showing 12 changed files with 560 additions and 9 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Edit this release and run "make release"
RELEASE=2.2.6
RELEASE=2.3.0

SHELL=/bin/bash

Expand Down
9 changes: 5 additions & 4 deletions bin/tronctl
Original file line number Diff line number Diff line change
Expand Up @@ -339,11 +339,12 @@ def control_objects(args: argparse.Namespace):
yield request(urljoin(args.server, tron_id.url), data)
# NOTE: ideally we'd add this message in the JobController handle_command() function, but having the API return terminal escape codes
# sounds like a bad idea, so we're doing it here instead
print(
warning_output(
"WARNING: jobs disabled with tronctl disable are *NOT* guaranteed to stay disabled. You must disable the job in yelpsoa-configs to guarantee it will not be re-enabled."
if args.command == "disable":
print(
warning_output(
"WARNING: jobs disabled with tronctl disable are *NOT* guaranteed to stay disabled. You must disable the job in yelpsoa-configs to guarantee it will not be re-enabled."
)
)
)


def retry(args):
Expand Down
19 changes: 19 additions & 0 deletions debian/changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,22 @@
tron (2.3.0) jammy; urgency=medium

* 2.3.0 tagged with 'make release'
Commit: Merge pull request #989 from Yelp/jfong/TRON-2195-old-
kubeconfig-paths TRON-2195: Support watcher_kubeconfig_paths

-- Jen Patague <jfong@yelp.com> Thu, 11 Jul 2024 14:25:42 -0700

tron (2.2.7) jammy; urgency=medium

* 2.2.7 tagged with 'make release'
Commit: Only show disable warning on tronctl disable (#990) I"m not
sure I was thinking here since this ended up unconditionally
printing the disable warning - but we can chalk this up to an
intentional PR campaign to warn folks and only show the warnings on
tronctl disable from now on :p

-- Luis Perez <luisp@yelp.com> Thu, 11 Jul 2024 11:49:18 -0700

tron (2.2.6) jammy; urgency=medium

* 2.2.6 tagged with 'make release'
Expand Down
2 changes: 1 addition & 1 deletion requirements-minimal.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ pytimeparse
pytz
PyYAML>=5.1
requests
task_processing[mesos_executor,k8s]>=1.1.0
task_processing[mesos_executor,k8s]>=1.2.0
Twisted>=19.7.0
urllib3>=1.24.2
Werkzeug>=0.15.3
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ setuptools==65.5.1
six==1.15.0
sshpubkeys==3.1.0
stack-data==0.6.2
task-processing==1.1.0
task-processing==1.2.0
traitlets==5.0.0
Twisted==22.10.0
typing-extensions==4.5.0
Expand Down
35 changes: 35 additions & 0 deletions tests/config/config_parse_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1744,5 +1744,40 @@ def test_invalid(self, url, context):
config_parse.valid_master_address(url, context)


class TestValidKubeconfigPaths:
@setup
def setup_context(self):
self.context = config_utils.NullConfigContext

@pytest.mark.parametrize(
"kubeconfig_path,watcher_kubeconfig_paths",
[("/some/kubeconfig.conf", []), ("/another/kube/config", ["a_watcher_kubeconfig"])],
)
def test_valid(self, kubeconfig_path, watcher_kubeconfig_paths):
k8s_options = {
"enabled": True,
"kubeconfig_path": kubeconfig_path,
"watcher_kubeconfig_paths": watcher_kubeconfig_paths,
}
assert config_parse.valid_kubernetes_options.validate(k8s_options, self.context)

@pytest.mark.parametrize(
"kubeconfig_path,watcher_kubeconfig_paths",
[
(["/a/kubeconfig/in/a/list"], ["/a/valid/kubeconfig"]),
(None, []),
("/some/kubeconfig.conf", "/not/a/list/kubeconfig"),
],
)
def test_invalid(self, kubeconfig_path, watcher_kubeconfig_paths):
k8s_options = {
"enabled": True,
"kubeconfig_path": kubeconfig_path,
"watcher_kubeconfig_paths": watcher_kubeconfig_paths,
}
with pytest.raises(ConfigError):
config_parse.valid_kubernetes_options.validate(k8s_options, self.context)


if __name__ == "__main__":
run()
238 changes: 238 additions & 0 deletions tests/tools/sync_tron_state_from_k8s_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
from typing import Dict
from unittest import mock

import pytest
from kubernetes.client import V1ObjectMeta
from kubernetes.client import V1Pod
from kubernetes.client import V1PodStatus

from tools.sync_tron_state_from_k8s import get_matching_pod
from tools.sync_tron_state_from_k8s import get_tron_state_from_api
from tools.sync_tron_state_from_k8s import update_tron_from_pods


def create_mock_pod(name: str, phase: str, labels: Dict[str, str], creation_timestamp: str):
metadata = V1ObjectMeta(name=name, creation_timestamp=creation_timestamp, labels=labels)
status = V1PodStatus(phase=phase)
return V1Pod(metadata=metadata, status=status)


class TestSyncTronStateFromK8s:
@pytest.fixture(autouse=True)
def setup_test_data(self):
self.pods = {
p.metadata.name: p
for p in [
create_mock_pod(
"service.job.2.action",
"Succeeded",
{
"paasta.yelp.com/service": "service",
"paasta.yelp.com/instance": "job.action",
"tron.yelp.com/run_num": "2",
},
"2024-01-01T00:00:00",
),
create_mock_pod(
"service.job.3.action-nomatch",
"Failed",
{
"paasta.yelp.com/service": "service",
"paasta.yelp.com/instance": "job.action",
"tron.yelp.com/run_num": "3",
},
"2024-01-01T00:00:00",
),
create_mock_pod(
"service.job.4.action-nomatch",
"Failed",
{
"paasta.yelp.com/service": "service",
"paasta.yelp.com/instance": "job.action",
"tron.yelp.com/run_num": "4",
},
"2024-01-01T00:00:00",
),
create_mock_pod(
"service.job.4.action-nomatch-retry2",
"Succeeded",
{
"paasta.yelp.com/service": "service",
"paasta.yelp.com/instance": "job.action",
"tron.yelp.com/run_num": "4",
},
"2024-01-01T01:00:00",
),
create_mock_pod(
"service.job2.10.action",
"Failed",
{
"paasta.yelp.com/service": "service",
"paasta.yelp.com/instance": "job2.action",
"tron.yelp.com/run_num": "10",
},
"2024-01-01T01:00:00",
),
create_mock_pod(
"service.job2.10.action",
"Running",
{
"paasta.yelp.com/service": "service",
"paasta.yelp.com/instance": "job2.action",
"tron.yelp.com/run_num": "10",
},
"2024-01-01T01:05:00",
),
create_mock_pod(
# Technically this pod name would not actually exist
"service.job_with_an_extremely_extremely_extremely_extremely_extremely_long_name.10.action",
"Succeeded",
{
"paasta.yelp.com/service": "service",
# If PaaSTA's setup_tron_namespace changes how we create these labels, this test will need updating
"paasta.yelp.com/instance": "job_with_an_extremely_extremely_extremely_extremely_extrem-26i4",
"tron.yelp.com/run_num": "10",
},
"2024-01-01T01:05:00",
),
]
}

# 1 matching pod by labels
# 2 matching pod by labels
# no matching pod
# test matching by hashed instance name
@pytest.mark.parametrize(
"job_name,run_num,expected_pod_name",
[
("service.job", "3", "service.job.3.action-nomatch"),
("service.job", "4", "service.job.4.action-nomatch-retry2"),
("service.job2", "10", None),
("service2.job", "1", None),
(
"service.job_with_an_extremely_extremely_extremely_extremely_extremely_long_name",
"10",
"service.job_with_an_extremely_extremely_extremely_extremely_extremely_long_name.10.action",
),
],
)
def test_get_matching_pod(self, job_name, run_num, expected_pod_name):
test_action_run = {"action_name": "action", "job_name": f"{job_name}", "run_num": run_num}
matching_pod = get_matching_pod(test_action_run, self.pods)
assert matching_pod == self.pods.get(expected_pod_name)

# verify we send correct num_runs
# verify we are sending request for jobs + one for each job
@mock.patch("tools.sync_tron_state_from_k8s.get_client_config", autospec=True)
@mock.patch("tools.sync_tron_state_from_k8s.Client", autospec=True)
def test_get_tron_state_from_api(self, mock_client, mock_get_client_config):
mock_client.return_value = mock.Mock()
mock_client.return_value.jobs.return_value = [{"url": "/uri", "name": "some job"}]
mock_client.return_value.job.return_value = {"runs": []}
mock_get_client_config.return_value = {"server": "https://localhost:8888"}
get_tron_state_from_api(None, num_runs=10)

mock_client.assert_called_with("https://localhost:8888")
mock_client.return_value.jobs.assert_called_with(
include_job_runs=False, include_action_runs=False, include_action_graph=False, include_node_pool=False
)

mock_client.return_value.job.assert_called_with("/api/uri", include_action_runs=True, count=10)

@mock.patch("tools.sync_tron_state_from_k8s.subprocess.run", autospec=True)
def test_update_tron(self, mock_subprocess_run):
# sorry for the blob of test data
tron_state = [
{
"name": "service.job",
"runs": [
{
"runs": [
{
"id": "service.job.2.action",
"action_name": "action",
"run_num": "2",
"job_name": "service.job",
"state": "unknown",
}
]
},
{
"runs": [
{
"id": "service.job.3.action",
"action_name": "action",
"run_num": "3",
"job_name": "service.job",
"state": "running",
},
{
"id": "service.job.3.action2",
"action_name": "action2",
"run_num": "3",
"job_name": "service.job",
"state": "running",
},
]
},
{
"runs": [
{
"id": "service.job.4.action",
"action_name": "action",
"run_num": "4",
"job_name": "service.job",
"state": "starting",
}
]
},
{
"runs": [
{
"id": "service.job.5.action",
"action_name": "action",
"run_num": "5",
"job_name": "service.job",
"state": "starting",
}
]
},
],
},
{
"name": "service.job2",
"runs": [
{
"runs": [
{
"id": "service.job2.10.action",
"action_name": "action",
"run_num": "10",
"job_name": "service.job2",
"state": "succeeded",
},
]
},
],
},
]

good_subprocess_run = mock.Mock(returncode=0)
bad_subprocess_run = mock.Mock(returncode=1)

expected_calls = [
mock.call(["tronctl", "success", "service.job.2.action"], capture_output=True, text=True),
mock.call(["tronctl", "fail", "service.job.3.action"], capture_output=True, text=True),
mock.call(["tronctl", "success", "service.job.4.action"], capture_output=True, text=True),
]
mock_subprocess_run.return_value = good_subprocess_run

result = update_tron_from_pods(tron_state, self.pods, tronctl_wrapper="tronctl", do_work=True)

assert result["updated"] == ["service.job.2.action", "service.job.3.action", "service.job.4.action"]
assert result["error"] == []
mock_subprocess_run.assert_has_calls(expected_calls, any_order=True)

mock_subprocess_run.return_value = bad_subprocess_run
result = update_tron_from_pods(tron_state, self.pods, tronctl_wrapper="tronctl", do_work=True)
assert result["error"] == ["service.job.2.action", "service.job.3.action", "service.job.4.action"]
Loading

0 comments on commit dbd31da

Please sign in to comment.