Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hotfix/ssl fix #12

Merged
merged 11 commits into from
Dec 20, 2023
32 changes: 28 additions & 4 deletions aviso-server/monitoring/aviso_monitoring/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def __init__(
aviso_auth_reporter=None,
etcd_reporter=None,
prometheus_reporter=None,
kube_state_metrics=None,
):
try:
# we build the configuration in priority order from the lower to the higher
Expand All @@ -41,6 +42,7 @@ def __init__(
self.aviso_auth_reporter = aviso_auth_reporter
self.etcd_reporter = etcd_reporter
self.prometheus_reporter = prometheus_reporter
self.kube_state_metrics = kube_state_metrics

logger.debug("Loading configuration completed")

Expand Down Expand Up @@ -113,6 +115,8 @@ def _create_default_config() -> Dict:
},
}

kube_state_metrics = {"ssl_enabled": False, "token": None}

# main config
config = {}
config["udp_server"] = udp_server
Expand All @@ -121,6 +125,7 @@ def _create_default_config() -> Dict:
config["aviso_auth_reporter"] = aviso_auth_reporter
config["etcd_reporter"] = etcd_reporter
config["prometheus_reporter"] = prometheus_reporter
config["kube_state_metrics"] = kube_state_metrics
return config

def _read_env_variables(self) -> Dict:
Expand Down Expand Up @@ -179,7 +184,7 @@ def aviso_rest_reporter(self, aviso_rest_reporter):
assert ar is not None, "aviso_rest_reporter has not been configured"
assert ar.get("tlms") is not None, "aviso_rest_reporter tlms has not been configured"
assert ar.get("enabled") is not None, "aviso_rest_reporter enabled has not been configured"
if type(ar["enabled"]) is str:
if isinstance(ar["enabled"], str):
ar["enabled"] = ar["enabled"].casefold() == "true".casefold()
assert ar.get("frequency") is not None, "aviso_rest_reporter frequency has not been configured"
self._aviso_rest_reporter = ar
Expand All @@ -199,7 +204,7 @@ def aviso_auth_reporter(self, aviso_auth_reporter):
assert aa is not None, "aviso_auth_reporter has not been configured"
assert aa.get("tlms") is not None, "aviso_auth_reporter tlms has not been configured"
assert aa.get("enabled") is not None, "aviso_auth_reporter enabled has not been configured"
if type(aa["enabled"]) is str:
if isinstance(aa["enabled"], str):
aa["enabled"] = aa["enabled"].casefold() == "true".casefold()
assert aa.get("frequency") is not None, "aviso_auth_reporter frequency has not been configured"
self._aviso_auth_reporter = aa
Expand All @@ -219,7 +224,7 @@ def etcd_reporter(self, etcd_reporter):
assert e is not None, "etcd_reporter has not been configured"
assert e.get("tlms") is not None, "etcd_reporter tlms has not been configured"
assert e.get("enabled") is not None, "etcd_reporter enabled has not been configured"
if type(e["enabled"]) is str:
if isinstance(e["enabled"], str):
e["enabled"] = e["enabled"].casefold() == "true".casefold()
assert e.get("frequency") is not None, "etcd_reporter frequency has not been configured"
assert e.get("member_urls") is not None, "etcd_reporter member_urls has not been configured"
Expand All @@ -241,11 +246,29 @@ def prometheus_reporter(self, prometheus_reporter):
assert pr is not None, "prometheus_reporter has not been configured"
assert pr.get("host") is not None, "prometheus_reporter host has not been configured"
assert pr.get("enabled") is not None, "prometheus_reporter enabled has not been configured"
if type(pr["enabled"]) is str:
if isinstance(pr["enabled"], str):
pr["enabled"] = pr["enabled"].casefold() == "true".casefold()
assert pr.get("port") is not None, "prometheus_reporter port has not been configured"
self._prometheus_reporter = pr

@property
def kube_state_metrics(self):
return self._kube_state_metrics

@kube_state_metrics.setter
def kube_state_metrics(self, kube_state_metrics):
ksm = self._config.get("kube_state_metrics")
if kube_state_metrics is not None and ksm is not None:
Config.deep_update(ksm, kube_state_metrics)
elif kube_state_metrics is not None:
ksm = kube_state_metrics
# verify is valid
assert ksm is not None, "kube_state_metrics has not been configured"
assert ksm.get("ssl_enabled") is not None, "kube_state_metrics ssl_enabled has not been configured"
if ksm["ssl_enabled"]:
assert ksm.get("token") is not None, "kube_state_metrics token has not been configured"
self._kube_state_metrics = ksm

def __str__(self):
config_string = (
f"udp_server: {self.udp_server}"
Expand All @@ -254,6 +277,7 @@ def __str__(self):
+ f", aviso_auth_reporter: {self.aviso_auth_reporter}"
+ f", etcd_reporter: {self.etcd_reporter}"
+ f", prometheus_reporter: {self.prometheus_reporter}"
+ f", kube_state_metrics: {self.kube_state_metrics}"
)
return config_string

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,12 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

def metric(self):
pattern = r'kube_deployment_status_replicas{namespace="aviso",deployment="aviso-auth-\w+"}'
namespace = self.get_k8s_pod_namespace()
if not namespace:
logger.warning("Could not determine the pod's namespace.")
namespace = "aviso"

pattern = rf'kube_deployment_status_replicas{{namespace="{namespace}",deployment="aviso-auth"}}'
# defaults
status = 0
message = "All pods available"
Expand Down Expand Up @@ -243,3 +248,30 @@ def metric(self):
m_status = {"name": self.metric_name, "status": 1, "message": "Metric could not be retrieved"}
logger.debug(f"{self.metric_name} metric: {m_status}")
return m_status

@staticmethod
def get_k8s_pod_namespace():
"""
Retrieves the Kubernetes (k8s) namespace in which the current pod is running.

This function reads the namespace name from a file that Kubernetes automatically
mounts inside the pod. This file is typically located at:
'/var/run/secrets/kubernetes.io/serviceaccount/namespace'

Returns:
str: The namespace in which the pod is running. If the namespace cannot be determined
(e.g., the file doesn't exist or the pod is not running in a k8s environment),
the function returns None.
"""
namespace_file = "/var/run/secrets/kubernetes.io/serviceaccount/namespace"
try:
with open(namespace_file, "r") as file:
return file.read().strip()
except FileNotFoundError:
logger.error(f"Namespace file not found: {namespace_file}")
except IOError as e:
logger.error(f"I/O error occurred when reading namespace file: {e}")
except Exception as e:
logger.exception(f"Unexpected error occurred when reading namespace file: {e}")

return None
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ def __init__(self, config, *args, **kwargs):
self.frequency = aviso_rest_config["frequency"]
self.enabled = aviso_rest_config["enabled"]
self.tlms = aviso_rest_config["tlms"]
# configure the metric vars once only here
OpsviewReporter.configure_metric_vars(config)
super().__init__(config, *args, **kwargs)

def process_messages(self):
Expand Down Expand Up @@ -182,7 +184,12 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

def metric(self):
pattern = r'kube_deployment_status_replicas{namespace="aviso",deployment="aviso-rest-\w+"}'
namespace = self.get_k8s_pod_namespace()
if not namespace:
logger.warning("Could not determine the pod's namespace.")
namespace = "aviso"

pattern = rf'kube_deployment_status_replicas{{namespace="{namespace}",deployment="aviso-rest"}}'
# defaults
status = 0
message = "All pods available"
Expand Down Expand Up @@ -224,3 +231,30 @@ def metric(self):
m_status = {"name": self.metric_name, "status": 1, "message": "Metric could not be retrieved"}
logger.debug(f"{self.metric_name} metric: {m_status}")
return m_status

@staticmethod
def get_k8s_pod_namespace():
"""
Retrieves the Kubernetes (k8s) namespace in which the current pod is running.

This function reads the namespace name from a file that Kubernetes automatically
mounts inside the pod. This file is typically located at:
'/var/run/secrets/kubernetes.io/serviceaccount/namespace'

Returns:
str: The namespace in which the pod is running. If the namespace cannot be determined
(e.g., the file doesn't exist or the pod is not running in a k8s environment),
the function returns None.
"""
namespace_file = "/var/run/secrets/kubernetes.io/serviceaccount/namespace"
try:
with open(namespace_file, "r") as file:
return file.read().strip()
except FileNotFoundError:
logger.error(f"Namespace file not found: {namespace_file}")
except IOError as e:
logger.error(f"I/O error occurred when reading namespace file: {e}")
except Exception as e:
logger.exception(f"Unexpected error occurred when reading namespace file: {e}")

return None
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,25 @@


class OpsviewReporter(ABC):
metric_ssl_enabled = False
metric_token = ""

def __init__(self, config: Config, msg_receiver=None):
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
self.monitor_servers = config.monitor_servers
self.msg_receiver = msg_receiver
self.token = {}

@classmethod
def configure_metric_vars(cls, config):
"""
Configures the class attributes based on the provided config.
"""
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
if config.kube_state_metrics["ssl_enabled"]:
cls.metric_ssl_enabled = True
cls.metric_token = config.kube_state_metrics["token"]

def ms_authenticate(self, m_server):
"""
This method authenticate to the monitoring server
Expand Down Expand Up @@ -199,16 +212,20 @@ def aggregate_unique_counter_tlms(tlms):
}
return agg_tlm

def retrieve_metrics(metric_servers, req_timeout):
@classmethod
def retrieve_metrics(cls, metric_servers, req_timeout):
"""
This methods retrieves the metrics provided by specific metric servers using a Prometheus interface.
"""
raw_tlms = {}
for u in metric_servers:
url = u + "/metrics"
logger.debug(f"Retrieving metrics from {url}...")
headers = {}
try:
resp = requests.get(url, verify=False, timeout=req_timeout)
if cls.metric_ssl_enabled:
headers["Authorization"] = f"Bearer {cls.metric_token}"
resp = requests.get(url, verify=False, timeout=req_timeout, headers=headers)
except Exception as e:
logger.exception(f"Not able to get metrics from {url}, error {e}")
raw_tlms[u] = None
Expand Down
Loading