diff --git a/charms/argo-controller/lib/charms/grafana_k8s/v0/grafana_dashboard.py b/charms/argo-controller/lib/charms/grafana_k8s/v0/grafana_dashboard.py index 1f1bc4f..f310156 100644 --- a/charms/argo-controller/lib/charms/grafana_k8s/v0/grafana_dashboard.py +++ b/charms/argo-controller/lib/charms/grafana_k8s/v0/grafana_dashboard.py @@ -157,7 +157,7 @@ def __init__(self, *args): self._on_dashboards_changed, ) -Dashboards can be retrieved the :meth:`dashboards`: +Dashboards can be retrieved via the `dashboards` method: It will be returned in the format of: @@ -175,7 +175,6 @@ def __init__(self, *args): The consuming charm should decompress the dashboard. """ -import base64 import hashlib import json import logging @@ -187,7 +186,7 @@ def __init__(self, *args): import tempfile import uuid from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple import yaml from ops.charm import ( @@ -209,6 +208,7 @@ def __init__(self, *args): StoredState, ) from ops.model import Relation +from cosl import LZMABase64 # The unique Charmhub library identifier, never change it LIBID = "c49eb9c7dfef40c7b6235ebd67010a3f" @@ -219,7 +219,7 @@ def __init__(self, *args): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 35 +LIBPATCH = 37 logger = logging.getLogger(__name__) @@ -544,357 +544,351 @@ def _validate_relation_by_interface_and_direction( raise Exception("Unexpected RelationDirection: {}".format(expected_relation_role)) -def _encode_dashboard_content(content: Union[str, bytes]) -> str: - if isinstance(content, str): - content = bytes(content, "utf-8") +class CharmedDashboard: + """A helper class for handling dashboards on the requirer (Grafana) side.""" - return base64.b64encode(lzma.compress(content)).decode("utf-8") - - -def _decode_dashboard_content(encoded_content: str) -> str: - return lzma.decompress(base64.b64decode(encoded_content.encode("utf-8"))).decode() - - -def _convert_dashboard_fields(content: str, inject_dropdowns: bool = True) -> str: - """Make sure values are present for Juju topology. - - Inserts Juju topology variables and selectors into the template, as well as - a variable for Prometheus. - """ - dict_content = json.loads(content) - datasources = {} - existing_templates = False - - template_dropdowns = ( - TOPOLOGY_TEMPLATE_DROPDOWNS + DATASOURCE_TEMPLATE_DROPDOWNS # type: ignore - if inject_dropdowns - else DATASOURCE_TEMPLATE_DROPDOWNS - ) - - # If the dashboard has __inputs, get the names to replace them. These are stripped - # from reactive dashboards in GrafanaDashboardAggregator, but charm authors in - # newer charms may import them directly from the marketplace - if "__inputs" in dict_content: - for field in dict_content["__inputs"]: - if "type" in field and field["type"] == "datasource": - datasources[field["name"]] = field["pluginName"].lower() - del dict_content["__inputs"] - - # If no existing template variables exist, just insert our own - if "templating" not in dict_content: - dict_content["templating"] = {"list": list(template_dropdowns)} # type: ignore - else: - # Otherwise, set a flag so we can go back later - existing_templates = True - for template_value in dict_content["templating"]["list"]: - # Build a list of `datasource_name`: `datasource_type` mappings - # The "query" field is actually "prometheus", "loki", "influxdb", etc - if "type" in template_value and template_value["type"] == "datasource": - datasources[template_value["name"]] = template_value["query"].lower() - - # Put our own variables in the template - for d in template_dropdowns: # type: ignore - if d not in dict_content["templating"]["list"]: - dict_content["templating"]["list"].insert(0, d) - - dict_content = _replace_template_fields(dict_content, datasources, existing_templates) - return json.dumps(dict_content) + @classmethod + def _convert_dashboard_fields(cls, content: str, inject_dropdowns: bool = True) -> str: + """Make sure values are present for Juju topology. + Inserts Juju topology variables and selectors into the template, as well as + a variable for Prometheus. + """ + dict_content = json.loads(content) + datasources = {} + existing_templates = False + + template_dropdowns = ( + TOPOLOGY_TEMPLATE_DROPDOWNS + DATASOURCE_TEMPLATE_DROPDOWNS # type: ignore + if inject_dropdowns + else DATASOURCE_TEMPLATE_DROPDOWNS + ) -def _replace_template_fields( # noqa: C901 - dict_content: dict, datasources: dict, existing_templates: bool -) -> dict: - """Make templated fields get cleaned up afterwards. + # If the dashboard has __inputs, get the names to replace them. These are stripped + # from reactive dashboards in GrafanaDashboardAggregator, but charm authors in + # newer charms may import them directly from the marketplace + if "__inputs" in dict_content: + for field in dict_content["__inputs"]: + if "type" in field and field["type"] == "datasource": + datasources[field["name"]] = field["pluginName"].lower() + del dict_content["__inputs"] + + # If no existing template variables exist, just insert our own + if "templating" not in dict_content: + dict_content["templating"] = {"list": list(template_dropdowns)} # type: ignore + else: + # Otherwise, set a flag so we can go back later + existing_templates = True + for template_value in dict_content["templating"]["list"]: + # Build a list of `datasource_name`: `datasource_type` mappings + # The "query" field is actually "prometheus", "loki", "influxdb", etc + if "type" in template_value and template_value["type"] == "datasource": + datasources[template_value["name"]] = template_value["query"].lower() + + # Put our own variables in the template + for d in template_dropdowns: # type: ignore + if d not in dict_content["templating"]["list"]: + dict_content["templating"]["list"].insert(0, d) + + dict_content = cls._replace_template_fields(dict_content, datasources, existing_templates) + return json.dumps(dict_content) - If existing datasource variables are present, try to substitute them. - """ - replacements = {"loki": "${lokids}", "prometheus": "${prometheusds}"} - used_replacements = [] # type: List[str] - - # If any existing datasources match types we know, or we didn't find - # any templating variables at all, template them. - if datasources or not existing_templates: - panels = dict_content.get("panels", {}) - if panels: - dict_content["panels"] = _template_panels( - panels, replacements, used_replacements, existing_templates, datasources - ) + @classmethod + def _replace_template_fields( # noqa: C901 + cls, dict_content: dict, datasources: dict, existing_templates: bool + ) -> dict: + """Make templated fields get cleaned up afterwards. - # Find panels nested under rows - rows = dict_content.get("rows", {}) - if rows: - for row_idx, row in enumerate(rows): - if "panels" in row.keys(): - rows[row_idx]["panels"] = _template_panels( - row["panels"], - replacements, - used_replacements, - existing_templates, - datasources, - ) + If existing datasource variables are present, try to substitute them. + """ + replacements = {"loki": "${lokids}", "prometheus": "${prometheusds}"} + used_replacements = [] # type: List[str] + + # If any existing datasources match types we know, or we didn't find + # any templating variables at all, template them. + if datasources or not existing_templates: + panels = dict_content.get("panels", {}) + if panels: + dict_content["panels"] = cls._template_panels( + panels, replacements, used_replacements, existing_templates, datasources + ) - dict_content["rows"] = rows - - # Finally, go back and pop off the templates we stubbed out - deletions = [] - for tmpl in dict_content["templating"]["list"]: - if tmpl["name"] and tmpl["name"] in used_replacements: - deletions.append(tmpl) - - for d in deletions: - dict_content["templating"]["list"].remove(d) - - return dict_content - - -def _template_panels( - panels: dict, - replacements: dict, - used_replacements: list, - existing_templates: bool, - datasources: dict, -) -> dict: - """Iterate through a `panels` object and template it appropriately.""" - # Go through all the panels. If they have a datasource set, AND it's one - # that we can convert to ${lokids} or ${prometheusds}, by stripping off the - # ${} templating and comparing the name to the list we built, replace it, - # otherwise, leave it alone. - # - for panel in panels: - if "datasource" not in panel or not panel.get("datasource"): - continue - if not existing_templates: - datasource = panel.get("datasource") - if isinstance(datasource, str): - if "loki" in datasource: - panel["datasource"] = "${lokids}" - elif "grafana" in datasource: - continue - else: - panel["datasource"] = "${prometheusds}" - elif isinstance(datasource, dict): - # In dashboards exported by Grafana 9, datasource type is dict - dstype = datasource.get("type", "") - if dstype == "loki": - panel["datasource"]["uid"] = "${lokids}" - elif dstype == "prometheus": - panel["datasource"]["uid"] = "${prometheusds}" + # Find panels nested under rows + rows = dict_content.get("rows", {}) + if rows: + for row_idx, row in enumerate(rows): + if "panels" in row.keys(): + rows[row_idx]["panels"] = cls._template_panels( + row["panels"], + replacements, + used_replacements, + existing_templates, + datasources, + ) + + dict_content["rows"] = rows + + # Finally, go back and pop off the templates we stubbed out + deletions = [] + for tmpl in dict_content["templating"]["list"]: + if tmpl["name"] and tmpl["name"] in used_replacements: + deletions.append(tmpl) + + for d in deletions: + dict_content["templating"]["list"].remove(d) + + return dict_content + + @classmethod + def _template_panels( + cls, + panels: dict, + replacements: dict, + used_replacements: list, + existing_templates: bool, + datasources: dict, + ) -> dict: + """Iterate through a `panels` object and template it appropriately.""" + # Go through all the panels. If they have a datasource set, AND it's one + # that we can convert to ${lokids} or ${prometheusds}, by stripping off the + # ${} templating and comparing the name to the list we built, replace it, + # otherwise, leave it alone. + # + for panel in panels: + if "datasource" not in panel or not panel.get("datasource"): + continue + if not existing_templates: + datasource = panel.get("datasource") + if isinstance(datasource, str): + if "loki" in datasource: + panel["datasource"] = "${lokids}" + elif "grafana" in datasource: + continue + else: + panel["datasource"] = "${prometheusds}" + elif isinstance(datasource, dict): + # In dashboards exported by Grafana 9, datasource type is dict + dstype = datasource.get("type", "") + if dstype == "loki": + panel["datasource"]["uid"] = "${lokids}" + elif dstype == "prometheus": + panel["datasource"]["uid"] = "${prometheusds}" + else: + logger.debug("Unrecognized datasource type '%s'; skipping", dstype) + continue else: - logger.debug("Unrecognized datasource type '%s'; skipping", dstype) + logger.error("Unknown datasource format: skipping") continue else: - logger.error("Unknown datasource format: skipping") - continue - else: - if isinstance(panel["datasource"], str): - if panel["datasource"].lower() in replacements.values(): - # Already a known template variable - continue - # Strip out variable characters and maybe braces - ds = re.sub(r"(\$|\{|\})", "", panel["datasource"]) - - if ds not in datasources.keys(): - # Unknown, non-templated datasource, potentially a Grafana builtin - continue - - replacement = replacements.get(datasources[ds], "") - if replacement: - used_replacements.append(ds) - panel["datasource"] = replacement or panel["datasource"] - elif isinstance(panel["datasource"], dict): - dstype = panel["datasource"].get("type", "") - if panel["datasource"].get("uid", "").lower() in replacements.values(): - # Already a known template variable - continue - # Strip out variable characters and maybe braces - ds = re.sub(r"(\$|\{|\})", "", panel["datasource"].get("uid", "")) - - if ds not in datasources.keys(): - # Unknown, non-templated datasource, potentially a Grafana builtin + if isinstance(panel["datasource"], str): + if panel["datasource"].lower() in replacements.values(): + # Already a known template variable + continue + # Strip out variable characters and maybe braces + ds = re.sub(r"(\$|\{|\})", "", panel["datasource"]) + + if ds not in datasources.keys(): + # Unknown, non-templated datasource, potentially a Grafana builtin + continue + + replacement = replacements.get(datasources[ds], "") + if replacement: + used_replacements.append(ds) + panel["datasource"] = replacement or panel["datasource"] + elif isinstance(panel["datasource"], dict): + dstype = panel["datasource"].get("type", "") + if panel["datasource"].get("uid", "").lower() in replacements.values(): + # Already a known template variable + continue + # Strip out variable characters and maybe braces + ds = re.sub(r"(\$|\{|\})", "", panel["datasource"].get("uid", "")) + + if ds not in datasources.keys(): + # Unknown, non-templated datasource, potentially a Grafana builtin + continue + + replacement = replacements.get(datasources[ds], "") + if replacement: + used_replacements.append(ds) + panel["datasource"]["uid"] = replacement + else: + logger.error("Unknown datasource format: skipping") continue + return panels - replacement = replacements.get(datasources[ds], "") - if replacement: - used_replacements.append(ds) - panel["datasource"]["uid"] = replacement - else: - logger.error("Unknown datasource format: skipping") - continue - return panels + @classmethod + def _inject_labels(cls, content: str, topology: dict, transformer: "CosTool") -> str: + """Inject Juju topology into panel expressions via CosTool. - -def _inject_labels(content: str, topology: dict, transformer: "CosTool") -> str: - """Inject Juju topology into panel expressions via CosTool. - - A dashboard will have a structure approximating: - { - "__inputs": [], - "templating": { - "list": [ + A dashboard will have a structure approximating: + { + "__inputs": [], + "templating": { + "list": [ + { + "name": "prometheusds", + "type": "prometheus" + } + ] + }, + "panels": [ { - "name": "prometheusds", - "type": "prometheus" + "foo": "bar", + "targets": [ + { + "some": "field", + "expr": "up{job="foo"}" + }, + { + "some_other": "field", + "expr": "sum(http_requests_total{instance="$foo"}[5m])} + } + ], + "datasource": "${someds}" } ] - }, - "panels": [ - { - "foo": "bar", - "targets": [ - { - "some": "field", - "expr": "up{job="foo"}" - }, - { - "some_other": "field", - "expr": "sum(http_requests_total{instance="$foo"}[5m])} - } - ], - "datasource": "${someds}" - } - ] - } + } - `templating` is used elsewhere in this library, but the structure is not rigid. It is - not guaranteed that a panel will actually have any targets (it could be a "spacer" with - no datasource, hence no expression). It could have only one target. It could have multiple - targets. It could have multiple targets of which only one has an `expr` to evaluate. We need - to try to handle all of these concisely. + `templating` is used elsewhere in this library, but the structure is not rigid. It is + not guaranteed that a panel will actually have any targets (it could be a "spacer" with + no datasource, hence no expression). It could have only one target. It could have multiple + targets. It could have multiple targets of which only one has an `expr` to evaluate. We need + to try to handle all of these concisely. - `cos-tool` (`github.com/canonical/cos-tool` as a Go module in general) - does not know "Grafana-isms", such as using `[$_variable]` to modify the query from the user - interface, so we add placeholders (as `5y`, since it must parse, but a dashboard looking for - five years for a panel query would be unusual). + `cos-tool` (`github.com/canonical/cos-tool` as a Go module in general) + does not know "Grafana-isms", such as using `[$_variable]` to modify the query from the user + interface, so we add placeholders (as `5y`, since it must parse, but a dashboard looking for + five years for a panel query would be unusual). - Args: - content: dashboard content as a string - topology: a dict containing topology values - transformer: a 'CosTool' instance - Returns: - dashboard content with replaced values. - """ - dict_content = json.loads(content) + Args: + content: dashboard content as a string + topology: a dict containing topology values + transformer: a 'CosTool' instance + Returns: + dashboard content with replaced values. + """ + dict_content = json.loads(content) - if "panels" not in dict_content.keys(): - return json.dumps(dict_content) + if "panels" not in dict_content.keys(): + return json.dumps(dict_content) - # Go through all the panels and inject topology labels - # Panels may have more than one 'target' where the expressions live, so that must be - # accounted for. Additionally, `promql-transform` does not necessarily gracefully handle - # expressions with range queries including variables. Exclude these. - # - # It is not a certainty that the `datasource` field will necessarily reflect the type, so - # operate on all fields. - panels = dict_content["panels"] - topology_with_prefix = {"juju_{}".format(k): v for k, v in topology.items()} + # Go through all the panels and inject topology labels + # Panels may have more than one 'target' where the expressions live, so that must be + # accounted for. Additionally, `promql-transform` does not necessarily gracefully handle + # expressions with range queries including variables. Exclude these. + # + # It is not a certainty that the `datasource` field will necessarily reflect the type, so + # operate on all fields. + panels = dict_content["panels"] + topology_with_prefix = {"juju_{}".format(k): v for k, v in topology.items()} + + # We need to use an index so we can insert the changed element back later + for panel_idx, panel in enumerate(panels): + if not isinstance(panel, dict): + continue - # We need to use an index so we can insert the changed element back later - for panel_idx, panel in enumerate(panels): - if not isinstance(panel, dict): - continue + # Use the index to insert it back in the same location + panels[panel_idx] = cls._modify_panel(panel, topology_with_prefix, transformer) - # Use the index to insert it back in the same location - panels[panel_idx] = _modify_panel(panel, topology_with_prefix, transformer) + return json.dumps(dict_content) - return json.dumps(dict_content) + @classmethod + def _modify_panel(cls, panel: dict, topology: dict, transformer: "CosTool") -> dict: + """Inject Juju topology into panel expressions via CosTool. + Args: + panel: a dashboard panel as a dict + topology: a dict containing topology values + transformer: a 'CosTool' instance + Returns: + the panel with injected values + """ + if "targets" not in panel.keys(): + return panel -def _modify_panel(panel: dict, topology: dict, transformer: "CosTool") -> dict: - """Inject Juju topology into panel expressions via CosTool. + # Pre-compile a regular expression to grab values from inside of [] + range_re = re.compile(r"\[(?P.*?)\]") + # Do the same for any offsets + offset_re = re.compile(r"offset\s+(?P-?\s*[$\w]+)") - Args: - panel: a dashboard panel as a dict - topology: a dict containing topology values - transformer: a 'CosTool' instance - Returns: - the panel with injected values - """ - if "targets" not in panel.keys(): - return panel + known_datasources = {"${prometheusds}": "promql", "${lokids}": "logql"} - # Pre-compile a regular expression to grab values from inside of [] - range_re = re.compile(r"\[(?P.*?)\]") - # Do the same for any offsets - offset_re = re.compile(r"offset\s+(?P-?\s*[$\w]+)") + targets = panel["targets"] - known_datasources = {"${prometheusds}": "promql", "${lokids}": "logql"} + # We need to use an index so we can insert the changed element back later + for idx, target in enumerate(targets): + # If there's no expression, we don't need to do anything + if "expr" not in target.keys(): + continue + expr = target["expr"] - targets = panel["targets"] + if "datasource" not in panel.keys(): + continue - # We need to use an index so we can insert the changed element back later - for idx, target in enumerate(targets): - # If there's no expression, we don't need to do anything - if "expr" not in target.keys(): - continue - expr = target["expr"] + if isinstance(panel["datasource"], str): + if panel["datasource"] not in known_datasources: + continue + querytype = known_datasources[panel["datasource"]] + elif isinstance(panel["datasource"], dict): + if panel["datasource"]["uid"] not in known_datasources: + continue + querytype = known_datasources[panel["datasource"]["uid"]] + else: + logger.error("Unknown datasource format: skipping") + continue - if "datasource" not in panel.keys(): - continue + # Capture all values inside `[]` into a list which we'll iterate over later to + # put them back in-order. Then apply the regex again and replace everything with + # `[5y]` so promql/parser will take it. + # + # Then do it again for offsets + range_values = [m.group("value") for m in range_re.finditer(expr)] + expr = range_re.sub(r"[5y]", expr) + + offset_values = [m.group("value") for m in offset_re.finditer(expr)] + expr = offset_re.sub(r"offset 5y", expr) + # Retrieve the new expression (which may be unchanged if there were no label + # matchers in the expression, or if tt was unable to be parsed like logql. It's + # virtually impossible to tell from any datasource "name" in a panel what the + # actual type is without re-implementing a complete dashboard parser, but no + # harm will some from passing invalid promql -- we'll just get the original back. + # + replacement = transformer.inject_label_matchers(expr, topology, querytype) - if isinstance(panel["datasource"], str): - if panel["datasource"] not in known_datasources: - continue - querytype = known_datasources[panel["datasource"]] - elif isinstance(panel["datasource"], dict): - if panel["datasource"]["uid"] not in known_datasources: + if replacement == target["expr"]: + # promql-transform caught an error. Move on continue - querytype = known_datasources[panel["datasource"]["uid"]] - else: - logger.error("Unknown datasource format: skipping") - continue - # Capture all values inside `[]` into a list which we'll iterate over later to - # put them back in-order. Then apply the regex again and replace everything with - # `[5y]` so promql/parser will take it. - # - # Then do it again for offsets - range_values = [m.group("value") for m in range_re.finditer(expr)] - expr = range_re.sub(r"[5y]", expr) - - offset_values = [m.group("value") for m in offset_re.finditer(expr)] - expr = offset_re.sub(r"offset 5y", expr) - # Retrieve the new expression (which may be unchanged if there were no label - # matchers in the expression, or if tt was unable to be parsed like logql. It's - # virtually impossible to tell from any datasource "name" in a panel what the - # actual type is without re-implementing a complete dashboard parser, but no - # harm will some from passing invalid promql -- we'll just get the original back. - # - replacement = transformer.inject_label_matchers(expr, topology, querytype) - - if replacement == target["expr"]: - # promql-tranform caught an error. Move on - continue - - # Go back and substitute values in [] which were pulled out - # Enumerate with an index... again. The same regex is ok, since it will still match - # `[(.*?)]`, which includes `[5y]`, our placeholder - for i, match in enumerate(range_re.finditer(replacement)): - # Replace one-by-one, starting from the left. We build the string back with - # `str.replace(string_to_replace, replacement_value, count)`. Limit the count - # to one, since we are going through one-by-one through the list we saved earlier - # in `range_values`. - replacement = replacement.replace( - "[{}]".format(match.group("value")), - "[{}]".format(range_values[i]), - 1, - ) + # Go back and substitute values in [] which were pulled out + # Enumerate with an index... again. The same regex is ok, since it will still match + # `[(.*?)]`, which includes `[5y]`, our placeholder + for i, match in enumerate(range_re.finditer(replacement)): + # Replace one-by-one, starting from the left. We build the string back with + # `str.replace(string_to_replace, replacement_value, count)`. Limit the count + # to one, since we are going through one-by-one through the list we saved earlier + # in `range_values`. + replacement = replacement.replace( + "[{}]".format(match.group("value")), + "[{}]".format(range_values[i]), + 1, + ) - for i, match in enumerate(offset_re.finditer(replacement)): - # Replace one-by-one, starting from the left. We build the string back with - # `str.replace(string_to_replace, replacement_value, count)`. Limit the count - # to one, since we are going through one-by-one through the list we saved earlier - # in `range_values`. - replacement = replacement.replace( - "offset {}".format(match.group("value")), - "offset {}".format(offset_values[i]), - 1, - ) + for i, match in enumerate(offset_re.finditer(replacement)): + # Replace one-by-one, starting from the left. We build the string back with + # `str.replace(string_to_replace, replacement_value, count)`. Limit the count + # to one, since we are going through one-by-one through the list we saved earlier + # in `range_values`. + replacement = replacement.replace( + "offset {}".format(match.group("value")), + "offset {}".format(offset_values[i]), + 1, + ) - # Use the index to insert it back in the same location - targets[idx]["expr"] = replacement + # Use the index to insert it back in the same location + targets[idx]["expr"] = replacement - panel["targets"] = targets - return panel + panel["targets"] = targets + return panel def _type_convert_stored(obj): @@ -1050,6 +1044,7 @@ def __init__( self.framework.observe(self._charm.on.leader_elected, self._update_all_dashboards_from_dir) self.framework.observe(self._charm.on.upgrade_charm, self._update_all_dashboards_from_dir) + self.framework.observe(self._charm.on.config_changed, self._update_all_dashboards_from_dir) self.framework.observe( self._charm.on[self._relation_name].relation_created, @@ -1074,7 +1069,7 @@ def add_dashboard(self, content: str, inject_dropdowns: bool = True) -> None: # that the stored state is there when this unit becomes leader. stored_dashboard_templates: Any = self._stored.dashboard_templates # pyright: ignore - encoded_dashboard = _encode_dashboard_content(content) + encoded_dashboard = LZMABase64.compress(content) # Use as id the first chars of the encoded dashboard, so that # it is predictable across units. @@ -1135,7 +1130,7 @@ def _is_dashboard(p: Path) -> bool: # path = Path(path) id = "file:{}".format(path.stem) stored_dashboard_templates[id] = self._content_to_dashboard_object( - _encode_dashboard_content(path.read_bytes()), inject_dropdowns + LZMABase64.compress(path.read_bytes()), inject_dropdowns ) stored_dashboard_templates[id]["dashboard_alt_uid"] = self._generate_alt_uid(id) @@ -1435,15 +1430,15 @@ def _render_dashboards_and_signal_changed(self, relation: Relation) -> bool: # error = None topology = template.get("juju_topology", {}) try: - content = _decode_dashboard_content(template["content"]) + content = LZMABase64.decompress(template["content"]) inject_dropdowns = template.get("inject_dropdowns", True) content = self._manage_dashboard_uid(content, template) - content = _convert_dashboard_fields(content, inject_dropdowns) + content = CharmedDashboard._convert_dashboard_fields(content, inject_dropdowns) if topology: - content = _inject_labels(content, topology, self._tranformer) + content = CharmedDashboard._inject_labels(content, topology, self._tranformer) - content = _encode_dashboard_content(content) + content = LZMABase64.compress(content) except lzma.LZMAError as e: error = str(e) relation_has_invalid_dashboards = True @@ -1532,7 +1527,7 @@ def _to_external_object(self, relation_id, dashboard): "id": dashboard["original_id"], "relation_id": relation_id, "charm": dashboard["template"]["charm"], - "content": _decode_dashboard_content(dashboard["content"]), + "content": LZMABase64.decompress(dashboard["content"]), } @property @@ -1823,7 +1818,7 @@ def _handle_reactive_dashboards(self, event: RelationEvent) -> Optional[Dict]: from jinja2 import DebugUndefined, Template - content = _encode_dashboard_content( + content = LZMABase64.compress( Template(dash, undefined=DebugUndefined).render(datasource=r"${prometheusds}") # type: ignore ) id = "prog:{}".format(content[-24:-16]) @@ -1863,7 +1858,7 @@ def is_dashboard(p: Path) -> bool: if event.app.name in path.name: # type: ignore id = "file:{}".format(path.stem) builtins[id] = self._content_to_dashboard_object( - _encode_dashboard_content(path.read_bytes()), event + LZMABase64.compress(path.read_bytes()), event ) return builtins diff --git a/charms/argo-controller/lib/charms/loki_k8s/v1/loki_push_api.py b/charms/argo-controller/lib/charms/loki_k8s/v1/loki_push_api.py index 4b3bcfe..d75cb7e 100644 --- a/charms/argo-controller/lib/charms/loki_k8s/v1/loki_push_api.py +++ b/charms/argo-controller/lib/charms/loki_k8s/v1/loki_push_api.py @@ -16,20 +16,24 @@ send log to Loki by implementing the consumer side of the `loki_push_api` relation interface. For instance, a Promtail or Grafana agent charm which needs to send logs to Loki. -- `LogProxyConsumer`: This object can be used by any Charmed Operator which needs to -send telemetry, such as logs, to Loki through a Log Proxy by implementing the consumer side of the -`loki_push_api` relation interface. +- `LogProxyConsumer`: DEPRECATED. +This object can be used by any Charmed Operator which needs to send telemetry, such as logs, to +Loki through a Log Proxy by implementing the consumer side of the `loki_push_api` relation +interface. +In order to be able to control the labels on the logs pushed this object adds a Pebble layer +that runs Promtail in the workload container, injecting Juju topology labels into the +logs on the fly. +This object is deprecated. Consider migrating to LogForwarder with the release of Juju 3.6 LTS. - `LogForwarder`: This object can be used by any Charmed Operator which needs to send the workload standard output (stdout) through Pebble's log forwarding mechanism, to Loki endpoints through the `loki_push_api` relation interface. +In order to be able to control the labels on the logs pushed this object updates the pebble layer's +"log-targets" section with Juju topology. Filtering logs in Loki is largely performed on the basis of labels. In the Juju ecosystem, Juju topology labels are used to uniquely identify the workload which generates telemetry like logs. -In order to be able to control the labels on the logs pushed this object adds a Pebble layer -that runs Promtail in the workload container, injecting Juju topology labels into the -logs on the fly. ## LokiPushApiProvider Library Usage @@ -42,13 +46,14 @@ - `charm`: A reference to the parent (Loki) charm. - `relation_name`: The name of the relation that the charm uses to interact - with its clients, which implement `LokiPushApiConsumer` or `LogProxyConsumer`. + with its clients, which implement `LokiPushApiConsumer` `LogForwarder`, or `LogProxyConsumer` + (note that LogProxyConsumer is deprecated). If provided, this relation name must match a provided relation in metadata.yaml with the `loki_push_api` interface. - The default relation name is "logging" for `LokiPushApiConsumer` and "log-proxy" for - `LogProxyConsumer`. + The default relation name is "logging" for `LokiPushApiConsumer` and `LogForwarder`, and + "log-proxy" for `LogProxyConsumer` (note that LogProxyConsumer is deprecated). For example, a provider's `metadata.yaml` file may look as follows: @@ -223,6 +228,9 @@ def __init__(self, *args): ## LogProxyConsumer Library Usage +> Note: This object is deprecated. Consider migrating to LogForwarder with the release of Juju 3.6 +> LTS. + Let's say that we have a workload charm that produces logs, and we need to send those logs to a workload implementing the `loki_push_api` interface, such as `Loki` or `Grafana Agent`. @@ -472,6 +480,25 @@ def _alert_rules_error(self, event): Units of consumer charm send their alert rules over app relation data using the `alert_rules` key. + +## Charm logging +The `charms.loki_k8s.v0.charm_logging` library can be used in conjunction with this one to configure python's +logging module to forward all logs to Loki via the loki-push-api interface. + +```python +from lib.charms.loki_k8s.v0.charm_logging import log_charm +from lib.charms.loki_k8s.v1.loki_push_api import charm_logging_config, LokiPushApiConsumer + +@log_charm(logging_endpoint="my_endpoints", server_cert="cert_path") +class MyCharm(...): + _cert_path = "/path/to/cert/on/charm/container.crt" + def __init__(self, ...): + self.logging = LokiPushApiConsumer(...) + self.my_endpoints, self.cert_path = charm_logging_config( + self.logging, self._cert_path) +``` + +Do this, and all charm logs will be forwarded to Loki as soon as a relation is formed. """ import json @@ -519,7 +546,7 @@ def _alert_rules_error(self, event): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 10 +LIBPATCH = 13 PYDEPS = ["cosl"] @@ -569,7 +596,11 @@ def _alert_rules_error(self, event): GRPC_LISTEN_PORT_START = 9095 # odd start port -class RelationNotFoundError(ValueError): +class LokiPushApiError(Exception): + """Base class for errors raised by this module.""" + + +class RelationNotFoundError(LokiPushApiError): """Raised if there is no relation with the given name.""" def __init__(self, relation_name: str): @@ -579,7 +610,7 @@ def __init__(self, relation_name: str): super().__init__(self.message) -class RelationInterfaceMismatchError(Exception): +class RelationInterfaceMismatchError(LokiPushApiError): """Raised if the relation with the given name has a different interface.""" def __init__( @@ -599,7 +630,7 @@ def __init__( super().__init__(self.message) -class RelationRoleMismatchError(Exception): +class RelationRoleMismatchError(LokiPushApiError): """Raised if the relation with the given name has a different direction.""" def __init__( @@ -1593,7 +1624,8 @@ def __init__( the Loki API endpoint to push logs. It is intended for workloads that can speak loki_push_api (https://grafana.com/docs/loki/latest/api/#push-log-entries-to-loki), such as grafana-agent. - (If you only need to forward a few workload log files, then use LogProxyConsumer.) + (If you need to forward workload stdout logs, then use LogForwarder; if you need to forward + log files, then use LogProxyConsumer.) `LokiPushApiConsumer` can be instantiated as follows: @@ -1768,6 +1800,9 @@ class LogProxyEvents(ObjectEvents): class LogProxyConsumer(ConsumerBase): """LogProxyConsumer class. + > Note: This object is deprecated. Consider migrating to LogForwarder with the release of Juju + > 3.6 LTS. + The `LogProxyConsumer` object provides a method for attaching `promtail` to a workload in order to generate structured logging data from applications which traditionally log to syslog or do not have native Loki integration. @@ -2543,14 +2578,18 @@ def _on_pebble_ready(self, event: PebbleReadyEvent): self._update_endpoints(event.workload, loki_endpoints) - def _update_logging(self, _): + def _update_logging(self, event: RelationEvent): """Update the log forwarding to match the active Loki endpoints.""" if not (loki_endpoints := self._retrieve_endpoints_from_relation()): logger.warning("No Loki endpoints available") return for container in self._charm.unit.containers.values(): - self._update_endpoints(container, loki_endpoints) + if container.can_connect(): + self._update_endpoints(container, loki_endpoints) + # else: `_update_endpoints` will be called on pebble-ready anyway. + + self._handle_alert_rules(event.relation) def _retrieve_endpoints_from_relation(self) -> dict: loki_endpoints = {} @@ -2736,3 +2775,49 @@ def _exec(self, cmd) -> str: result = subprocess.run(cmd, check=True, stdout=subprocess.PIPE) output = result.stdout.decode("utf-8").strip() return output + + +def charm_logging_config( + endpoint_requirer: LokiPushApiConsumer, cert_path: Optional[Union[Path, str]] +) -> Tuple[Optional[List[str]], Optional[str]]: + """Utility function to determine the charm_logging config you will likely want. + + If no endpoint is provided: + disable charm logging. + If https endpoint is provided but cert_path is not found on disk: + disable charm logging. + If https endpoint is provided and cert_path is None: + ERROR + Else: + proceed with charm logging (with or without tls, as appropriate) + + Args: + endpoint_requirer: an instance of LokiPushApiConsumer. + cert_path: a path where a cert is stored. + + Returns: + A tuple with (optionally) the values of the endpoints and the certificate path. + + Raises: + LokiPushApiError: if some endpoint are http and others https. + """ + endpoints = [ep["url"] for ep in endpoint_requirer.loki_endpoints] + if not endpoints: + return None, None + + https = tuple(endpoint.startswith("https://") for endpoint in endpoints) + + if all(https): # all endpoints are https + if cert_path is None: + raise LokiPushApiError("Cannot send logs to https endpoints without a certificate.") + if not Path(cert_path).exists(): + # if endpoints is https BUT we don't have a server_cert yet: + # disable charm logging until we do to prevent tls errors + return None, None + return endpoints, str(cert_path) + + if all(not x for x in https): # all endpoints are http + return endpoints, None + + # if there's a disagreement, that's very weird: + raise LokiPushApiError("Some endpoints are http, some others are https. That's not good.") diff --git a/charms/argo-controller/lib/charms/observability_libs/v1/kubernetes_service_patch.py b/charms/argo-controller/lib/charms/observability_libs/v1/kubernetes_service_patch.py index 2cce729..4d37a38 100644 --- a/charms/argo-controller/lib/charms/observability_libs/v1/kubernetes_service_patch.py +++ b/charms/argo-controller/lib/charms/observability_libs/v1/kubernetes_service_patch.py @@ -1,131 +1,18 @@ # Copyright 2021 Canonical Ltd. # See LICENSE file for licensing details. -"""# KubernetesServicePatch Library. - -This library is designed to enable developers to more simply patch the Kubernetes Service created -by Juju during the deployment of a sidecar charm. When sidecar charms are deployed, Juju creates a -service named after the application in the namespace (named after the Juju model). This service by -default contains a "placeholder" port, which is 65536/TCP. - -When modifying the default set of resources managed by Juju, one must consider the lifecycle of the -charm. In this case, any modifications to the default service (created during deployment), will be -overwritten during a charm upgrade. - -When initialised, this library binds a handler to the parent charm's `install` and `upgrade_charm` -events which applies the patch to the cluster. This should ensure that the service ports are -correct throughout the charm's life. - -The constructor simply takes a reference to the parent charm, and a list of -[`lightkube`](https://github.com/gtsystem/lightkube) ServicePorts that each define a port for the -service. For information regarding the `lightkube` `ServicePort` model, please visit the -`lightkube` [docs](https://gtsystem.github.io/lightkube-models/1.23/models/core_v1/#serviceport). - -Optionally, a name of the service (in case service name needs to be patched as well), labels, -selectors, and annotations can be provided as keyword arguments. - -## Getting Started - -To get started using the library, you just need to fetch the library using `charmcraft`. **Note -that you also need to add `lightkube` and `lightkube-models` to your charm's `requirements.txt`.** - -```shell -cd some-charm -charmcraft fetch-lib charms.observability_libs.v1.kubernetes_service_patch -cat << EOF >> requirements.txt -lightkube -lightkube-models -EOF -``` - -Then, to initialise the library: - -For `ClusterIP` services: - -```python -# ... -from charms.observability_libs.v1.kubernetes_service_patch import KubernetesServicePatch -from lightkube.models.core_v1 import ServicePort - -class SomeCharm(CharmBase): - def __init__(self, *args): - # ... - port = ServicePort(443, name=f"{self.app.name}") - self.service_patcher = KubernetesServicePatch(self, [port]) - # ... -``` - -For `LoadBalancer`/`NodePort` services: - -```python -# ... -from charms.observability_libs.v1.kubernetes_service_patch import KubernetesServicePatch -from lightkube.models.core_v1 import ServicePort - -class SomeCharm(CharmBase): - def __init__(self, *args): - # ... - port = ServicePort(443, name=f"{self.app.name}", targetPort=443, nodePort=30666) - self.service_patcher = KubernetesServicePatch( - self, [port], "LoadBalancer" - ) - # ... -``` - -Port protocols can also be specified. Valid protocols are `"TCP"`, `"UDP"`, and `"SCTP"` - -```python -# ... -from charms.observability_libs.v1.kubernetes_service_patch import KubernetesServicePatch -from lightkube.models.core_v1 import ServicePort - -class SomeCharm(CharmBase): - def __init__(self, *args): - # ... - tcp = ServicePort(443, name=f"{self.app.name}-tcp", protocol="TCP") - udp = ServicePort(443, name=f"{self.app.name}-udp", protocol="UDP") - sctp = ServicePort(443, name=f"{self.app.name}-sctp", protocol="SCTP") - self.service_patcher = KubernetesServicePatch(self, [tcp, udp, sctp]) - # ... -``` - -Bound with custom events by providing `refresh_event` argument: -For example, you would like to have a configurable port in your charm and want to apply -service patch every time charm config is changed. - -```python -from charms.observability_libs.v1.kubernetes_service_patch import KubernetesServicePatch -from lightkube.models.core_v1 import ServicePort - -class SomeCharm(CharmBase): - def __init__(self, *args): - # ... - port = ServicePort(int(self.config["charm-config-port"]), name=f"{self.app.name}") - self.service_patcher = KubernetesServicePatch( - self, - [port], - refresh_event=self.on.config_changed - ) - # ... -``` - -Additionally, you may wish to use mocks in your charm's unit testing to ensure that the library -does not try to make any API calls, or open any files during testing that are unlikely to be -present, and could break your tests. The easiest way to do this is during your test `setUp`: - -```python -# ... - -@patch("charm.KubernetesServicePatch", lambda x, y: None) -def setUp(self, *unused): - self.harness = Harness(SomeCharm) - # ... -``` +"""# [DEPRECATED!] KubernetesServicePatch Library. + +The `kubernetes_service_patch` library is DEPRECATED and will be removed in October 2025. + +For patching the Kubernetes service created by Juju during the deployment of a charm, +`ops.Unit.set_ports` functionality should be used instead. + """ import logging from types import MethodType -from typing import List, Literal, Optional, Union +from typing import Any, List, Literal, Optional, Union from lightkube import ApiError, Client # pyright: ignore from lightkube.core import exceptions @@ -133,6 +20,7 @@ def setUp(self, *unused): from lightkube.models.meta_v1 import ObjectMeta from lightkube.resources.core_v1 import Service from lightkube.types import PatchType +from ops import UpgradeCharmEvent from ops.charm import CharmBase from ops.framework import BoundEvent, Object @@ -146,7 +34,7 @@ def setUp(self, *unused): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 9 +LIBPATCH = 13 ServiceType = Literal["ClusterIP", "LoadBalancer"] @@ -184,12 +72,22 @@ def __init__( will be observed to re-apply the patch (e.g. on port change). The `install` and `upgrade-charm` events would be observed regardless. """ + logger.warning( + "The ``kubernetes_service_patch v1`` library is DEPRECATED and will be removed " + "in October 2025. For patching the Kubernetes service created by Juju during " + "the deployment of a charm, ``ops.Unit.set_ports`` functionality should be used instead." + ) super().__init__(charm, "kubernetes-service-patch") self.charm = charm - self.service_name = service_name if service_name else self._app + self.service_name = service_name or self._app + # To avoid conflicts with the default Juju service, append "-lb" to the service name. + # The Juju application name is retained for the default service created by Juju. + if self.service_name == self._app and service_type == "LoadBalancer": + self.service_name = f"{self._app}-lb" + self.service_type = service_type self.service = self._service_object( ports, - service_name, + self.service_name, service_type, additional_labels, additional_selectors, @@ -200,8 +98,11 @@ def __init__( assert isinstance(self._patch, MethodType) # Ensure this patch is applied during the 'install' and 'upgrade-charm' events self.framework.observe(charm.on.install, self._patch) - self.framework.observe(charm.on.upgrade_charm, self._patch) + self.framework.observe(charm.on.upgrade_charm, self._on_upgrade_charm) self.framework.observe(charm.on.update_status, self._patch) + # Sometimes Juju doesn't clean-up a manually created LB service, + # so we clean it up ourselves just in case. + self.framework.observe(charm.on.remove, self._remove_service) # apply user defined events if refresh_event: @@ -277,7 +178,10 @@ def _patch(self, _) -> None: if self._is_patched(client): return if self.service_name != self._app: - self._delete_and_create_service(client) + if not self.service_type == "LoadBalancer": + self._delete_and_create_service(client) + else: + self._create_lb_service(client) client.patch(Service, self.service_name, self.service, patch_type=PatchType.MERGE) except ApiError as e: if e.status.code == 403: @@ -294,6 +198,12 @@ def _delete_and_create_service(self, client: Client): client.delete(Service, self._app, namespace=self._namespace) client.create(service) + def _create_lb_service(self, client: Client): + try: + client.get(Service, self.service_name, namespace=self._namespace) + except ApiError: + client.create(self.service) + def is_patched(self) -> bool: """Reports if the service patch has been applied. @@ -321,6 +231,60 @@ def _is_patched(self, client: Client) -> bool: ] # noqa: E501 return expected_ports == fetched_ports + def _on_upgrade_charm(self, event: UpgradeCharmEvent): + """Handle the upgrade charm event.""" + # If a charm author changed the service type from LB to ClusterIP across an upgrade, we need to delete the previous LB. + if self.service_type == "ClusterIP": + + client = Client() # pyright: ignore + + # Define a label selector to find services related to the app + selector: dict[str, Any] = {"app.kubernetes.io/name": self._app} + + # Check if any service of type LoadBalancer exists + services = client.list(Service, namespace=self._namespace, labels=selector) + for service in services: + if ( + not service.metadata + or not service.metadata.name + or not service.spec + or not service.spec.type + ): + logger.warning( + "Service patch: skipping resource with incomplete metadata: %s.", service + ) + continue + if service.spec.type == "LoadBalancer": + client.delete(Service, service.metadata.name, namespace=self._namespace) + logger.info(f"LoadBalancer service {service.metadata.name} deleted.") + + # Continue the upgrade flow normally + self._patch(event) + + def _remove_service(self, _): + """Remove a Kubernetes service associated with this charm. + + Specifically designed to delete the load balancer service created by the charm, since Juju only deletes the + default ClusterIP service and not custom services. + + Returns: + None + + Raises: + ApiError: for deletion errors, excluding when the service is not found (404 Not Found). + """ + client = Client() # pyright: ignore + + try: + client.delete(Service, self.service_name, namespace=self._namespace) + logger.info("The patched k8s service '%s' was deleted.", self.service_name) + except ApiError as e: + if e.status.code == 404: + # Service not found, so no action needed + return + # Re-raise for other statuses + raise + @property def _app(self) -> str: """Name of the current Juju application. diff --git a/charms/argo-controller/lib/charms/prometheus_k8s/v0/prometheus_scrape.py b/charms/argo-controller/lib/charms/prometheus_k8s/v0/prometheus_scrape.py index be96768..e3d35c6 100644 --- a/charms/argo-controller/lib/charms/prometheus_k8s/v0/prometheus_scrape.py +++ b/charms/argo-controller/lib/charms/prometheus_k8s/v0/prometheus_scrape.py @@ -178,7 +178,7 @@ def __init__(self, *args): - `scrape_timeout` - `proxy_url` - `relabel_configs` -- `metrics_relabel_configs` +- `metric_relabel_configs` - `sample_limit` - `label_limit` - `label_name_length_limit` @@ -362,7 +362,7 @@ def _on_scrape_targets_changed(self, event): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 46 +LIBPATCH = 47 PYDEPS = ["cosl"] @@ -377,7 +377,7 @@ def _on_scrape_targets_changed(self, event): "scrape_timeout", "proxy_url", "relabel_configs", - "metrics_relabel_configs", + "metric_relabel_configs", "sample_limit", "label_limit", "label_name_length_limit", diff --git a/charms/argo-controller/requirements-unit.txt b/charms/argo-controller/requirements-unit.txt index 0b690a7..36847e6 100644 --- a/charms/argo-controller/requirements-unit.txt +++ b/charms/argo-controller/requirements-unit.txt @@ -46,7 +46,7 @@ charset-normalizer==3.4.0 # via # -r requirements.txt # requests -cosl==0.0.45 +cosl==0.0.50 # via -r requirements.txt coverage==7.6.1 # via -r requirements-unit.in diff --git a/charms/argo-controller/requirements.txt b/charms/argo-controller/requirements.txt index 545d4a0..172c92a 100644 --- a/charms/argo-controller/requirements.txt +++ b/charms/argo-controller/requirements.txt @@ -30,7 +30,7 @@ charmed-kubeflow-chisme==0.4.3 # via -r requirements.in charset-normalizer==3.4.0 # via requests -cosl==0.0.45 +cosl==0.0.50 # via -r requirements.in cryptography==44.0.0 # via paramiko