Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add network policies #56

Draft
wants to merge 21 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ tox -e integration # integration tests
tox # runs 'fmt', 'lint', and 'unit' environments
```

Running the integration tests requires that you have a valid kubeconfig file at `~/.kube/config`.

## Build charm

Build the charm in this git repository using:
Expand Down
160 changes: 160 additions & 0 deletions lib/charms/kratos/v0/kubernetes_network_policies.py

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where are the unit tests for this file 👀

Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
#!/usr/bin/env python3
# Copyright 2023 Canonical Ltd.
# See LICENSE file for licensing details.

"""Library for creating network policies.
This library provides a Python API for creating kubernetes ingress network policies.

## Getting Started
To get started using the library, you need to fetch the library using `charmcraft`.
```shell
cd some-charm
charmcraft fetch-lib charms.kratos.v0.kubernetes_network_policies
```
Then, to initialise the library:
```python
from charms.kratos.v0.kubernetes_network_policies import (
K8sNetworkPoliciesHandler,
NetworkPoliciesHandlerError,
)
Class SomeCharm(CharmBase):
def __init__(self, *args):
self.network_policy_handler = K8sNetworkPoliciesHandler(self)

def some_event_function():
policies = [(PortDefinition("admin"), [self.admin_ingress_relation]), (PortDefinition(8080), [])]
self.network_policy_handler.apply_ingress_policy(policies)
```

The function in this example will only allow traffic to the charm pod to the "admin" port from the app on the
other side of the `admin_ingress_relation` and all traffic to the "8080" port. Ingress traffic to all other ports
will be denied.
"""

import logging
from dataclasses import dataclass
from typing import List, Optional, Tuple, Union

from lightkube import ApiError, Client
from lightkube.models.meta_v1 import LabelSelector, ObjectMeta
from lightkube.models.networking_v1 import (
NetworkPolicyIngressRule,
NetworkPolicyPeer,
NetworkPolicyPort,
NetworkPolicySpec,
)
from lightkube.resources.networking_v1 import NetworkPolicy
from ops.charm import CharmBase
from ops.model import Relation


# The unique Charmhub library identifier, never change it
LIBID = "f0a1c7a9bc084be09b1052810651b7ed"

# Increment this major API version when introducing breaking changes
LIBAPI = 0

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 1

logger = logging.getLogger(__name__)


class NetworkPoliciesHandlerError(Exception):
"""Raised when applying the network policies failed."""


Port = Union[str, int]
IngressPolicyDefinition = Tuple[Port, List[Relation]]


class KubernetesNetworkPoliciesHandler:
"""A helper class for managing kubernetes network policies."""

def __init__(self, charm: CharmBase) -> None:
self._charm = charm
self.client = Client(field_manager=charm.app.name, namespace=charm.model.name)

@property
def policy_name(self) -> str:
"""The default policy name that will be created."""
return f"{self._charm.app.name}-network-policy"

def apply_ingress_policies(
self, policies: List[IngressPolicyDefinition], name: Optional[str] = None
) -> None:
"""Apply an ingress network policy for a related application.

Policies can be defined for multiple ports at once to allow ingress traffic
from related applications

If no policies are defined then all ingress traffic will be denied.

Example usage:

policies = [("admin", [admin_ingress_relation]), (8080, [public_ingress_relation])]
network_policy_handler.apply_ingress_policy(policies)
"""
if not name:
name = self.policy_name

ingress = []
for port, relations in policies:
selectors = [
NetworkPolicyPeer(
podSelector=LabelSelector(
matchLabels={"app.kubernetes.io/name": relation.app.name}
)
)
for relation in relations
if relation and relation.app
]
ingress.append(
NetworkPolicyIngressRule(
from_=selectors,
ports=[NetworkPolicyPort(port=port, protocol="TCP")],
),
)

policy = NetworkPolicy(
metadata=ObjectMeta(name=name),
spec=NetworkPolicySpec(
podSelector=LabelSelector(
matchLabels={
"app.kubernetes.io/name": self._charm.app.name,
}
),
policyTypes=["Ingress"],
ingress=ingress,
),
)


try:
self.client.apply(
policy,
namespace=self._charm.model.name,
)
except ApiError as e:
if e.status.code == 403:
msg = f"Kubernetes resources patch failed: `juju trust` this application. {e}"
else:
msg = f"Kubernetes resources patch failed: {e}"
logger.error(msg)
raise NetworkPoliciesHandlerError()

def delete_ingress_policies(self, name: Optional[str] = None) -> None:
"""Delete a network policy rule."""
if not name:
name = self.policy_name

try:
self.client.delete(NetworkPolicy, name, namespace=self._charm.model.name)
except ApiError as e:
if e.status.code == 403:
msg = f"Kubernetes resources patch failed: `juju trust` this application. {e}"
else:
msg = f"Kubernetes resources patch failed: {e}"
logger.error(msg)
raise NetworkPoliciesHandlerError()
30 changes: 30 additions & 0 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@
LoginUITooManyRelatedAppsError,
)
from charms.kratos.v0.kratos_endpoints import KratosEndpointsProvider
from charms.kratos.v0.kubernetes_network_policies import (
KubernetesNetworkPoliciesHandler,
NetworkPoliciesHandlerError,
)
from charms.kratos_external_idp_integrator.v0.kratos_external_provider import (
ClientConfigChangedEvent,
ExternalIdpRequirer,
Expand All @@ -49,6 +53,7 @@
HookEvent,
PebbleReadyEvent,
RelationEvent,
RemoveEvent,
)
from ops.main import main
from ops.model import ActiveStatus, BlockedStatus, MaintenanceStatus, ModelError, WaitingStatus
Expand Down Expand Up @@ -117,6 +122,7 @@ def __init__(self, *args: Any) -> None:
port=KRATOS_PUBLIC_PORT,
strip_prefix=True,
)
self.network_policy_handler = KubernetesNetworkPoliciesHandler(self)

self.database = DatabaseRequires(
self,
Expand All @@ -139,6 +145,7 @@ def __init__(self, *args: Any) -> None:

self.framework.observe(self.on.kratos_pebble_ready, self._on_pebble_ready)
self.framework.observe(self.on.config_changed, self._on_config_changed)
self.framework.observe(self.on.remove, self._cleanup)
self.framework.observe(
self.endpoints_provider.on.ready, self._update_kratos_endpoints_relation_data
)
Expand Down Expand Up @@ -442,6 +449,10 @@ def _on_config_changed(self, event: ConfigChangedEvent) -> None:
"""Event Handler for config changed event."""
self._handle_status_update_config(event)

def _cleanup(self, event: RemoveEvent) -> None:
logger.info("Removing charm")
self.network_policy_handler.delete_ingress_policies()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be a juju bug, but when I try removing kratos it gets into terminated state with unit lost. When I use --force --no-wait, the charm is removed but in both cases the network policy is not deleted.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I tried testing it a little more and when I test it manually, the policy is removed. I wrote an integration test to validate this and in the test the policy is not removed. This is very weird.

Copy link
Contributor Author

@nsklikas nsklikas Apr 28, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as I can tell the stop event is never fired, this is most likely a juju bug and not something we can control. Let's leave this as it is for now, it is the best we can do and juju should (hopefully) fix it in a later version.


def _update_kratos_endpoints_relation_data(self, event: RelationEvent) -> None:
admin_endpoint = (
self.admin_ingress.url
Expand Down Expand Up @@ -521,20 +532,39 @@ def _on_admin_ingress_ready(self, event: IngressPerAppReadyEvent) -> None:

self._handle_status_update_config(event)
self._update_kratos_endpoints_relation_data(event)
self._apply_network_policies()

def _apply_network_policies(self) -> None:
if not self.unit.is_leader():
return

try:
self.network_policy_handler.apply_ingress_policies(
[
(KRATOS_PUBLIC_PORT, [self.public_ingress.relation]),
(KRATOS_ADMIN_PORT, [self.admin_ingress.relation]),
(38812, []),
(38813, []),
]
)
except NetworkPoliciesHandlerError:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we retry applying the network policy?
Right now if you deploy kratos without trust, it gets into BlockedStatus, but if you trust it then it gets into active without creating the network policy.

pass

def _on_public_ingress_ready(self, event: IngressPerAppReadyEvent) -> None:
if self.unit.is_leader():
logger.info("This app's public ingress URL: %s", event.url)

self._handle_status_update_config(event)
self._update_kratos_endpoints_relation_data(event)
self._apply_network_policies()

def _on_ingress_revoked(self, event: IngressPerAppRevokedEvent) -> None:
if self.unit.is_leader():
logger.info("This app no longer has ingress")

self._handle_status_update_config(event)
self._update_kratos_endpoints_relation_data(event)
self._apply_network_policies()

def _on_client_config_changed(self, event: ClientConfigChangedEvent) -> None:
domain_url = self._domain_url
Expand Down
14 changes: 14 additions & 0 deletions tests/integration/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/env python3
# Copyright 2023 Canonical Ltd.

import os

import pytest
from lightkube import Client, KubeConfig

KUBECONFIG = os.environ.get("TESTING_KUBECONFIG", "~/.kube/config")


@pytest.fixture(scope="module")
def client() -> Client:
return Client(config=KubeConfig.from_file(KUBECONFIG))
25 changes: 23 additions & 2 deletions tests/integration/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import pytest
import requests
import yaml
from lightkube import Client
from lightkube.resources.networking_v1 import NetworkPolicy
from pytest_operator.plugin import OpsTest

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -51,7 +53,7 @@ async def test_build_and_deploy(ops_test: OpsTest) -> None:
"""
await ops_test.model.deploy(
POSTGRES,
channel="latest/edge",
channel="14/stable",
trust=True,
)
charm = await ops_test.build_charm(".")
Expand All @@ -71,7 +73,9 @@ async def test_build_and_deploy(ops_test: OpsTest) -> None:
assert ops_test.model.applications[APP_NAME].units[0].workload_status == "active"


async def test_ingress_relation(ops_test: OpsTest) -> None:
async def test_ingress_relation(ops_test: OpsTest, client: Client) -> None:
# We deploy ingress before running the database migrations to make sure that
# we don't break migrations by applying the network policies (e.g. by breaking dns).
await ops_test.model.deploy(
TRAEFIK,
application_name=TRAEFIK_PUBLIC_APP,
Expand All @@ -94,6 +98,10 @@ async def test_ingress_relation(ops_test: OpsTest) -> None:
timeout=1000,
)

# Validate network policies are created when ingress is provided
policy = client.get(NetworkPolicy, "kratos-network-policy", namespace=ops_test.model.name)
assert policy


async def test_has_public_ingress(ops_test: OpsTest) -> None:
# Get the traefik address and try to reach kratos
Expand Down Expand Up @@ -253,3 +261,16 @@ async def test_identity_schemas_config(ops_test: OpsTest) -> None:
resp = requests.get(f"http://{public_address}/{ops_test.model.name}-{APP_NAME}/schemas")

assert original_resp == resp.json()


@pytest.mark.skip(
reason=("sometimes the event hook is not fired so the resources don't get cleaned up.")
)
async def test_charm_removal(ops_test: OpsTest, client: Client) -> None:
await ops_test.model.remove_application(
APP_NAME, force=True, block_until_done=True, destroy_storage=True
)

# Validate network policies are removed when ingress is provided
policy = client.get(NetworkPolicy, "kratos-network-policy", namespace=ops_test.model.name)
assert not policy
9 changes: 8 additions & 1 deletion tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,14 @@


@pytest.fixture()
def harness(mocked_kubernetes_service_patcher: MagicMock) -> Harness:
def mocked_kubernetes_policy_handler(mocker: MockerFixture) -> None:
return mocker.patch("charm.KubernetesNetworkPoliciesHandler")


@pytest.fixture()
def harness(
mocked_kubernetes_service_patcher: MagicMock, mocked_kubernetes_policy_handler: MagicMock
) -> Harness:
harness = Harness(KratosCharm)
harness.set_model_name("kratos-model")
harness.set_can_connect("kratos", True)
Expand Down
Loading