diff --git a/docs/COMPONENT_INTEGRATIONS.md b/docs/COMPONENT_INTEGRATIONS.md new file mode 100644 index 0000000000000000000000000000000000000000..f45ad3f935c7ed01dbee8acaeb5cba8db70dd2be --- /dev/null +++ b/docs/COMPONENT_INTEGRATIONS.md @@ -0,0 +1,1267 @@ +# OEG Component Integrations — Full Reference + +> This document explains every external component that the Open Exposure Gateway (OEG) depends on: +> **why** it exists in the architecture, **what** the OEG uses it for, **how** the integration works at +> the code and protocol level, **what the component itself does internally**, and **what breaks or is missing** +> in the current integration. +> +> Components covered: **SRM**, **Federation Manager**, **Keycloak**, **Artefact Manager**, **MongoDB (OEG)**, +> **MongoDB (SRM)**, **MongoDB (FM)**, and the **OpenVPN sidecar**. + +--- + +## Table of Contents + +1. [Component Map](#1-component-map) +2. [Service Resource Manager (SRM)](#2-service-resource-manager-srm) +3. [Federation Manager (FM)](#3-federation-manager-fm) +4. [Keycloak](#4-keycloak) +5. [Artefact Manager](#5-artefact-manager) +6. [MongoDB — OEG Instance](#6-mongodb--oeg-instance) +7. [MongoDB — SRM Instance](#7-mongodb--srm-instance) +8. [MongoDB — Federation Manager Instance](#8-mongodb--federation-manager-instance) +9. [OpenVPN Sidecar](#9-openvpn-sidecar) +10. [Component Interaction Summary](#10-component-interaction-summary) +11. [End-to-End Call Chains](#11-end-to-end-call-chains) +12. [What Is Missing / Broken in the Integrations](#12-what-is-missing--broken-in-the-integrations) +13. [FastAPI Migration — Integration Improvements](#13-fastapi-migration--integration-improvements) + +--- + +## 1. Component Map + +``` +╔══════════════════════════════════════════════════════════════════════════════════╗ +║ Operator Platform (k8s namespace: oop) ║ +║ ║ +║ ┌───────────────────────────────────────────────────────┐ ║ +║ │ Open Exposure Gateway (OEG) │ ← THIS REPO ║ +║ │ NodePort 32263 · Port 8080 · /oeg/1.0.0 │ ║ +║ └───────────────┬─────────────┬──────────────┬──────────┘ ║ +║ │ │ │ ║ +║ HTTP REST HTTP REST pymongo ║ +║ │ │ │ ║ +║ ┌────────▼──────┐ ┌───▼──────────┐ ┌──▼───────────────┐ ║ +║ │ SRM │ │ Federation │ │ MongoDB (OEG) │ ║ +║ │ Port 8080 │ │ Manager │ │ ClusterIP :27017 │ ║ +║ │ NodePort │ │ Port 8989 │ │ DB: oeg_storage │ ║ +║ │ 32415 │ │ NodePort │ │ zones/federations│ ║ +║ └───────┬───────┘ │ 30989 │ └──────────────────┘ ║ +║ │ └──────┬───────┘ ║ +║ ┌───────▼──────┐ ┌──────▼──────────────────────────────┐ ║ +║ │ MongoDB (SRM)│ │ k8s namespace: federation-manager │ ║ +║ │ :27017 │ │ ┌──────────────┐ ┌──────────────┐ │ ║ +║ │ 200Mi PV │ │ │ Keycloak │ │ MongoDB (FM) │ │ ║ +║ └──────────────┘ │ │ Port 8080 │ │ :27017 │ │ ║ +║ │ │ NodePort │ │ 1Gi PV │ │ ║ +║ ┌────────────┐ │ │ 30081 │ └──────────────┘ │ ║ +║ │ Artefact │ │ └──────────────┘ │ ║ +║ │ Manager │◄─────│ │ ║ +║ │ Port 8000 │ │ ┌──────────────────────────────┐ │ ║ +║ │ NodePort │ │ │ OpenVPN sidecar (optional) │ │ ║ +║ │ 30080 │ │ └──────────────────────────────┘ │ ║ +║ └────────────┘ └─────────────────────────────────────┘ ║ +║ │ ║ +╚══════════════════════════════════════════╪════════════════════════════════════════╝ + │ GSMA OPG Federation APIs + │ (via PARTNER_API_ROOT / OpenVPN) + ▼ + Partner Operator Platform + (remote FM + SRM + Infra) +``` + +--- + +## 2. Service Resource Manager (SRM) + +### 2.1 What It Is + +The SRM is the **southbound infrastructure orchestration engine** of the Operator Platform. It is a separate microservice (`ghcr.io/sunriseopenoperatorplatform/srm/srm:1.0.1`) with its own MongoDB instance and its own REST API. It has no relationship to CAMARA — it exposes an internal API that the OEG translates from CAMARA. + +The SRM is responsible for: +- **Maintaining the application catalogue** — storing app definitions (service functions) submitted by App Providers +- **Managing edge nodes** — maintaining a registry of physical/virtual compute nodes (edge zones) +- **Orchestrating deployments** — deploying application instances on those nodes by talking directly to the Kubernetes API server +- **Tracking running instances** — recording deployed service function instances and their status +- **Network function proxy** — proxying QoD session and Traffic Influence requests to underlying network infrastructure + +### 2.2 Why OEG Depends on It + +OEG is a **translation gateway**, not an infrastructure manager. Every CAMARA operation has a corresponding SRM operation: + +| CAMARA concept | SRM concept | Rationale | +|---|---|---| +| Application (AppManifest) | ServiceFunction | CAMARA models apps as rich manifests; SRM stores them as catalogue entries | +| Application Instance | DeployedServiceFunction | A running instance of a service function on a specific node | +| Edge Cloud Zone | Node | SRM's `/node` endpoint represents what CAMARA calls an Edge Cloud Zone | +| QoD Session | Session | SRM proxies QoD to the underlying network adapter | +| Traffic Influence Resource | TrafficInfluence | SRM proxies TI configuration | + +Without SRM, the OEG has no knowledge of what infrastructure exists or any ability to deploy apps on it. + +### 2.3 How the Integration Works + +**Client class:** `PiEdgeAPIClient` in `edge_cloud_services.py` +**Factory:** `PiEdgeAPIClientFactory` +**Transport:** Synchronous `requests` library (blocking HTTP) +**Base URL:** `SRM_HOST` env var — e.g., `http://srm:8080/srm/1.0.0` +**Authentication:** None (commented out — see issues section) +**TLS:** `verify=False` on all calls + +The client is instantiated fresh on every controller function call: + +```python +# Every controller does this: +pi_edge_factory = PiEdgeAPIClientFactory() +api_client = pi_edge_factory.create_pi_edge_api_client() +result = api_client.some_method(...) +``` + +This means a new `PiEdgeAPIClient` Python object is created per request, though no new TCP connection is established since the client uses the default `requests` session. + +#### Application Catalogue Endpoints + +**`POST /serviceFunction`** — Submit app +OEG forwards the raw CAMARA `AppManifest` body directly to SRM without transformation. SRM is expected to understand and store it, returning an `appId`. The OEG does not assign UUIDs — SRM owns the `appId` namespace. + +```python +def submit_app(self, body): + url = f"{self.base_url}/serviceFunction" + response = requests.post(url, headers=self._get_headers(), verify=False, json=body) + response.raise_for_status() + return response.json() +``` + +**`GET /serviceFunction`** — List apps +Returns the full catalogue. OEG passes the raw list to the client. No pagination, no filtering. + +**`GET /serviceFunction/{appId}`** — Get app +OEG uses this internally during **federated instantiation** to retrieve the app manifest before building the GSMA artefact payload. This is an internal-to-internal call not triggered by the App Provider at that moment. + +```python +# Inside create_app_instance() — federated path: +appData = pi_edge_client.get_app(appId=app_id).get('appManifest') +``` + +The response structure from SRM is `{ appManifest: { name, version, appProvider, appRepo, componentSpec, ... } }`. OEG unwraps this with `.get('appManifest')`. + +**`DELETE /serviceFunction/{appId}`** — Delete app +Returns plain text, not JSON. OEG returns this text directly. + +#### Deployment Endpoints + +**`POST /deployedServiceFunction`** — Deploy app (local zones only) +This is the **local instantiation path**. OEG passes the full `create_app_instance` request body (which includes `appId` and `appZones`) directly to SRM: + +```python +response = pi_edge_client.deploy_service_function(data=body) +``` + +SRM takes the `appId`, looks it up in its catalogue, and orchestrates the actual deployment on the Kubernetes cluster by calling the Kubernetes API server directly using `KUBERNETES_MASTER_IP`, `KUBERNETES_MASTER_PORT`, and `KUBERNETES_MASTER_TOKEN`. + +**`GET /deployedServiceFunction`** — List all running instances +No filter parameters are passed from OEG to SRM, even though the `/appinstances` CAMARA endpoint accepts `appId`, `appInstanceId`, and `region` filters. + +**`DELETE /deployedServiceFunction/{appInstanceId}`** — Terminate instance +OEG passes the `appInstanceId` path parameter directly to SRM. SRM instructs Kubernetes to tear down the workload. + +#### Edge Node / Zone Endpoints + +**`GET /node`** — List all edge nodes +Called in two places: +1. At **module import time** in `edge_cloud_controller.py` — zones are fetched and inserted into OEG's MongoDB (once per process startup) +2. On every **`GET /edge-cloud-zones`** request — zones are fetched live from SRM again (ignoring MongoDB) + +This means OEG always has the freshest zone list (live SRM call), but the MongoDB copy can be stale if SRM's node list changes after startup. The MongoDB copy is only used for the `isLocal` flag during `create_app_instance`. + +```python +def edge_cloud_zones(self): + url = f"{self.base_url}/node" + request_headers = self._get_headers() + response = requests.get(url, headers=request_headers, verify=False) + response.raise_for_status() + nodes = response.json() + if not nodes: + raise ValueError("No edge nodes found") + return nodes +``` + +Note: no try/except here — unlike every other method in `PiEdgeAPIClient`, this one propagates exceptions uncaught. The caller in `edge_cloud_controller.py` wraps the startup call in try/except but the live-query call in `get_local_zones()` does not. + +**`GET /node/{zoneId}`** — Zone details +Used by the `GET /edge-cloud-zones/{zoneId}` endpoint. Returns full node details from SRM. + +#### Network Function Endpoints + +**`POST /sessions`**, **`GET /sessions/{id}`**, **`DELETE /sessions/{id}`** +Pure transparent proxy. OEG forwards the body or path parameter to SRM unchanged, and returns the response. SRM internally manages QoD sessions by talking to the underlying network adapter (configured via `NETWORK_ADAPTER_NAME` and `NETWORK_ADAPTER_BASE_URL` env vars). The `SCS_AS_ID` env var identifies this Operator Platform instance to the network adapter. + +**`POST /traffic-influences`**, **`GET /traffic-influences/{id}`**, etc. +Same transparent proxy pattern for Traffic Influence resources. + +### 2.4 SRM's Internal Configuration + +From the Helm values and deployment templates, SRM requires: + +| Env Variable | Value (KIND default) | Purpose | +|---|---|---| +| `KUBERNETES_MASTER_IP` | `kubernetes.default.svc.cluster.local` | K8s API server address | +| `KUBERNETES_MASTER_PORT` | `443` | K8s API server port | +| `KUBERNETES_USERNAME` | `admin` | K8s username | +| `K8S_NAMESPACE` | `oop` | Namespace where app workloads are deployed | +| `KUBERNETES_MASTER_TOKEN` | ServiceAccount JWT | Authenticates SRM to the K8s API | +| `EMP_STORAGE_URI` | `mongodb://mongosrm:27017` | SRM's own MongoDB | +| `ARTIFACT_MANAGER_ADDRESS` | `http://artefact-manager-service:8000` | Artefact Manager URL | +| `EDGE_CLOUD_ADAPTER_NAME` | `kubernetes` | Which infra adapter to use | +| `ADAPTER_BASE_URL` | K8s API server hostname | Adapter's target | +| `PLATFORM_PROVIDER` | `Local` / `ISI` | Human-readable OP name for zone metadata | +| `NETWORK_ADAPTER_NAME` | (optional) | Network function adapter name | +| `NETWORK_ADAPTER_BASE_URL` | (optional) | Network function adapter URL | +| `SCS_AS_ID` | (optional) | Application Server ID for QoD | + +SRM uses the `KUBERNETES_MASTER_TOKEN` (a long-lived ServiceAccount JWT) to authenticate `kubectl`-equivalent API calls directly against the Kubernetes API server. This is how application instances are created as Kubernetes `Deployment` or `Pod` resources in the `oop` namespace. + +### 2.5 What Is Missing in the OEG→SRM Integration + +- **No authentication:** SRM is called with no credentials. If SRM requires auth, all calls will return 401 silently. +- **No TLS verification:** `verify=False` on all calls. +- **No retry logic:** A single failed call returns an error dict or raises an exception. +- **No timeout on `edge_cloud_zones()`:** All other SRM calls have a `requests.Timeout` catch but this one has no try/except at all. +- **No request correlation:** The `x-correlator` from the App Provider is not forwarded to SRM. +- **Filter parameters not forwarded:** `GET /appinstances?appId=...®ion=...` receives filters but SRM is called as `GET /deployedServiceFunction` with no query parameters. +- **Response not transformed:** SRM response structures are returned raw to the App Provider without CAMARA formatting. + +--- + +## 3. Federation Manager (FM) + +### 3.1 What It Is + +The Federation Manager is the **inter-operator protocol gateway**. It implements the GSMA Operator Platform Group (OPG) federation API, which defines how two independent Operator Platforms can trust each other, exchange zone availability information, and allow applications from one OP to be deployed on the infrastructure of another OP. + +It is a separate microservice (`ghcr.io/sunriseopenoperatorplatform/federation-manager:0.0.1`) deployed in its **own Kubernetes namespace** (`federation-manager`), with its own MongoDB and its own Keycloak instance co-located in the same pod. + +The FM sits at the boundary between operators. It: +- Accepts federation establishment requests from peer OPs +- Maintains the list of available zones it is willing to share with partner OPs +- Authenticates and authorises inter-OP API calls via OAuth2 (Keycloak) +- Receives artefacts (app packages) from originating OPs +- Manages the full lifecycle of cross-OP app deployments (onboard → deploy → terminate) +- Optionally uses OpenVPN to create a secure network tunnel to the partner OP + +The OEG does **not** call the partner OP directly — it calls the **local FM**, which handles all communication with the partner. This is a deliberate isolation: the OEG only speaks local platform semantics; the FM speaks the inter-OP protocol. + +### 3.2 Why OEG Depends on It + +The OEG needs the FM for two distinct scenarios: + +1. **Federation lifecycle management** — when an operator admin establishes, queries, or tears down a federation relationship with a partner OP. This is a control-plane operation that populates the zone registry with partner zones. + +2. **Federated application instantiation** — when an App Provider requests deployment to a zone that belongs to a partner OP. The OEG must perform a multi-step protocol (artefact upload → onboarding → deployment) against the local FM, which relays everything to the partner FM. + +Without the FM, OEG can only deploy apps on its own local zones. + +### 3.3 How the Integration Works + +**Client class:** `FederationManagerClient` in `federation_services.py` +**Factory:** `FederationManagerClientFactory` +**Transport:** Synchronous `requests` library +**Base URL:** `FEDERATION_MANAGER_HOST` env var — e.g., `http://federation-manager:8989/operatorplatform/federation/v1` +**Authentication:** OAuth2 Bearer token from Keycloak (stored in OEG's MongoDB) +**TLS:** Varies (no `verify=False` flag set explicitly in FM client methods — so uses system default) + +Unlike the SRM client, FM clients are instantiated at **module level** (once per process) rather than per-request: + +```python +# At module import time in both app_controllers.py and federation_manager_controller.py: +factory = FederationManagerClientFactory() +federation_client = factory.create_federation_client() +``` + +This is slightly better for performance but means the `base_url` is fixed for the process lifetime. + +Every FM call includes three custom headers: + +```python +def _get_headers(self, token): + return { + 'Authorization': 'Bearer ' + token, + 'X-Partner-API-Root': self.partner_root, # PARTNER_API_ROOT env var + 'X-Internal': 'true', + 'Content-Type': 'application/json', + 'Accept': 'application/json', + } +``` + +- `Authorization`: The OAuth2 Bearer token obtained from Keycloak. FM validates this against Keycloak before processing the request. +- `X-Partner-API-Root`: Tells FM the base URL of the partner OP's API endpoint. FM uses this to know where to relay outbound calls. This value comes from the `PARTNER_API_ROOT` env var (e.g., `http://10.8.0.1:31002`). +- `X-Internal: true`: Signals to FM that this is an internal OEG→FM call, not a call coming in from the partner OP's FM. FM uses this to route the request correctly (act as originating OP, not as partner OP). + +#### Federation Establishment + +**`POST /partner`** — called by `FederationManagerClient.post_partner()` + +The OEG sends a `FederationRequestData` payload: + +```json +{ + "origOPFederationId": "my-operator-platform-id", + "origOPCountryCode": "GR", + "origOPMobileNetworkCodes": { "mcc": "202", "mncs": ["10"] }, + "initialDate": "2025-01-01T00:00:00Z", + "partnerStatusLink": "http://oeg-callback/status", + "partnerCallbackCredentials": { "tokenUrl": "...", "clientId": "...", "clientSecret": "..." } +} +``` + +The local FM, upon receiving this: +1. Validates the Bearer token with Keycloak +2. Reads `X-Partner-API-Root` to know the partner's address +3. Contacts the partner FM at `{PARTNER_API_ROOT}/partner` with the same payload +4. The partner FM responds with its own `FederationResponseData`: + - `federationContextId` — a unique ID for this bilateral federation relationship + - `offeredAvailabilityZones` — the list of zones the partner is willing to share + - `edgeDiscoveryServiceEndPoint` — where App Providers can discover edge info at the partner + - `lcmServiceEndPoint` — where lifecycle management requests should be sent for the partner + - `partnerOPFederationId`, `partnerOPCountryCode`, network codes + +The OEG then: +1. Stores each offered zone in MongoDB `zones` collection with `isLocal: 'false'` and `fedContextId` +2. Stores `{_id: federationContextId, token: access_token}` in MongoDB `federations` collection + +This is the key seeding step that makes partner zones visible to the `GET /edge-cloud-zones` endpoint (in principle — though `get_federated_zones()` is currently a stub that returns `[]`). + +#### Federation Query + +**`GET /{fedContextId}/partner`** — called by `FederationManagerClient.get_partner()` + +OEG retrieves the stored token from MongoDB for the given `fedContextId`, then calls the local FM. FM returns the current state of the federation context — updated zone lists, endpoint changes, network code updates. This is primarily for monitoring and re-synchronisation. + +#### Federation Teardown + +**`DELETE /{fedContextId}/partner`** — called by `FederationManagerClient.delete_partner()` + +FM instructs the partner OP to tear down the federation context. On success, OEG: +1. Removes the federation record from MongoDB `federations` +2. Removes all zones with `isLocal: 'false'` from MongoDB `zones` + +The zone cleanup is currently a blanket delete of all partner zones regardless of which federation they belong to — a correctness bug in multi-federation scenarios. + +#### Artefact Upload (Federated Instantiation Step 1) + +**`POST /{fedContextId}/artefact`** — called by `FederationManagerClient.create_artefact()` + +Before an app can be deployed at a partner OP, the app's artefact (Helm chart, container image reference, etc.) must be sent to that partner. OEG constructs the GSMA artefact payload by translating the CAMARA `AppManifest` fields: + +```python +artefact = { + 'artefactId': app_id, # reuse the OEG app_id as artefact_id + 'appProviderId': appData.get('appProvider'), + 'artefactName': appData.get('name'), + 'artefactVersionInfo': appData.get('version'), + 'artefactDescription': '', # always empty — not available in CAMARA manifest + 'repoType': repoInfo.get('type'), # PRIVATEREPO / PUBLICREPO + 'artefactRepoLocation': { + 'repoURL': repoInfo.get('imagePath'), + 'userName': repoInfo.get('userName'), + 'password': repoInfo.get('credentials'), + 'token': '' # always empty + }, + 'componentSpec': [{ + 'componentName': appData.get('name'), + 'numOfInstances': 0, # always 0 + 'restartPolicy': 'RESTART_POLICY_ALWAYS', + 'exposedInterfaces': [ + # translated from CAMARA networkInterfaces + {'interfaceId': '', 'commProtocol': ni.get('protocol'), 'commPort': ni.get('port'), + 'visibilityType': ni.get('visibilityType'), 'network': '', 'InterfaceName': ''} + for ni in networkInterfaces + ], + 'compEnvParams': [], + 'persistentVolumes': [] + }] +} +``` + +Several fields are always empty or hardcoded (`artefactDescription`, `token`, `numOfInstances`, `interfaceId`, `network`, `InterfaceName`), meaning the partner OP receives an incomplete artefact record. + +FM relays this to the partner FM at `{PARTNER_API_ROOT}/{fedContextId}/artefact`. The partner FM registers the artefact in its own Artefact Manager. + +The response is returned as a raw `requests.Response` object. OEG checks `response.status_code == 200 or 409` — 409 means the artefact already exists at the partner (idempotent re-submission). + +#### Application Onboarding at Partner (Step 2) + +**`POST /{fedContextId}/application/onboarding`** — called by `FederationManagerClient.onboard_application()` + +Once the artefact exists at the partner, the app itself must be "onboarded" — registered in the partner OP's app catalogue, referencing the previously uploaded artefact: + +```python +onboard_app = { + 'appId': app_id, + 'appProviderId': appData.get('appProvider'), + 'appDeploymentZones': [], # always empty — zone given at deploy time + 'appMetaData': { + 'appName': appData.get('name'), + 'version': appData.get('version') + }, + 'appComponentSpecs': [{ + 'serviceNameNB': appData.get('name'), # NB = northbound service name + 'serviceNameEW': appData.get('name'), # EW = east-west service name (same value) + 'componentName': appData.get('name'), + 'artefactId': app_id # links to the uploaded artefact + }] +} +``` + +FM relays to the partner FM at `{PARTNER_API_ROOT}/{fedContextId}/application/onboarding`. The partner FM stores the app definition in its own SRM's catalogue. + +This method returns `response.json()` (unlike `create_artefact` which returns the raw `Response` object) — one of the inconsistencies in the FM client implementation. + +#### Application Deployment at Partner (Step 3) + +**`POST /{fedContextId}/application/lcm`** — called by `FederationManagerClient.deploy_app_partner()` + +The final step requests the partner OP to instantiate the application: + +```python +deploy_app = { + 'appId': app_id, + 'appVersion': appData.get('version'), + 'appProviderId': appData.get('appProvider'), + 'zoneInfo': { + 'zoneId': zone.get('edgeCloudZoneId') # the specific partner zone requested + } +} +``` + +FM relays to the partner at `{PARTNER_API_ROOT}/{fedContextId}/application/lcm`. The partner FM instructs its own SRM to deploy the workload on the specified zone using its own Kubernetes adapter. + +This method returns the raw `requests.Response` object and the caller (`create_app_instance`) returns it directly to Connexion — which cannot serialise a `requests.Response` object. This is a bug that causes a 500 on the happy path. + +#### Zone Synchronisation (Unexposed in OEG API) + +The FM client also has methods for zone sync that are implemented but not exposed through the OpenAPI spec: + +- `request_zone_sync(fedContextId, body, token)` → `POST /{fedContextId}/zones` +- `get_zone_resource_info(fedContextId, zoneId, token)` → `GET /{fedContextId}/zones/{zoneId}` +- `remove_zone_sync(fedContextId, zoneId, token)` → `DELETE /{fedContextId}/zones/{zoneId}` + +These would allow the OEG to subscribe to zone status updates from the partner OP, get detailed resource metrics for a specific partner zone, and unsubscribe from zone updates. These are part of the GSMA OPG Zone Availability API and are wired in `federation_manager_controller.py` but absent from `openapi.yaml`. + +### 3.4 FM's Internal Configuration + +FM is configured via a `config.cfg` file mounted as a Kubernetes Secret. This file is generated from Helm values and contains: + +```ini +[keycloak] +client1_id = originating-op-1 +client1_secret = dd7vNwFqjNpYwaghlEwMbw10g0klWDHb +client2_id = originating-op-2 +client2_secret = 2mhznERfWclLDuVojY77Lp4Qd2r4e8Ms +scope = fed-mgmt + +[server] +host = 127.0.0.1 +port = 8989 +prefix = api +version = v1.0 +protocol = http + +[mongodb] +host = mongodb # the FM's own MongoDB +port = 27017 + +[i2edge] +host = 192.168.123.237 # i2CAT's edge platform (test partner) +port = 30760 + +[op_data] +partnerOPFederationId = i2cat +partnerOPCountryCode = ES +# ... MCC, MNC, fixed network codes +# ... service endpoint details (edge discovery, LCM) + +[partner_op] +role = partner_op # or originating_op +host = 127.0.0.1 +server = /operatorplatform/federation/v1 +port = 8992 +``` + +Key observations: +- FM has **two Keycloak clients** (`client1` and `client2`) supporting the scenario where it acts as both originating and partner OP simultaneously +- `[i2edge]` hardcodes an i2CAT test partner address — this should be configurable per federation context +- The `role` field (`originating_op` / `partner_op`) determines how FM processes incoming requests +- The `[partner_op]` section is the default partner endpoint before federation establishment fills in the runtime address via `X-Partner-API-Root` + +### 3.5 FM Deployment Detail + +FM is deployed in the same pod as Keycloak (multi-container pod), not as separate pods: + +``` +Pod: federation-manager +├── Container: keycloak (quay.io/keycloak/keycloak:26.1.4) +│ ├── Port: 8080 +│ ├── Startup: --start-dev --import-realm +│ └── Volume: realm-import.json (from ConfigMap) +├── Container: federation-manager (ghcr.io/sunriseopenoperatorplatform/federation-manager:0.0.1) +│ ├── Port: 8989 +│ └── Volume: config.cfg (from Secret) +└── Container: openvpn (alpine:3.20) [optional, disabled by default] + ├── Capabilities: NET_ADMIN + └── Volume: /vpn/client.ovpn + /dev/net/tun +``` + +Collocating Keycloak inside the FM pod means they share the pod's network namespace — FM can reach Keycloak at `localhost:8080` without cluster DNS resolution. This is convenient but means Keycloak is not independently scalable and cannot be shared across other services. + +--- + +## 4. Keycloak + +### 4.1 What It Is + +Keycloak is an open-source Identity and Access Management (IAM) server. In this platform it is used **exclusively for machine-to-machine (M2M) authentication** between OPs using the OAuth2 Client Credentials grant. No end-user authentication flows are used. + +**Image:** `quay.io/keycloak/keycloak:26.1.4` +**Port:** 8080 (internal to the FM pod) / NodePort 30081 (external) +**Mode:** `start-dev` (development mode — no TLS, embedded H2 database for Keycloak's own session state) + +### 4.2 Realm and Client Configuration + +The Keycloak realm is imported at startup from a JSON file generated by the Helm ConfigMap template. The realm definition: + +```json +{ + "realm": "federation", + "enabled": true, + "clientScopes": [{ + "id": "439d9c71-8a8a-469c-9280-058016000cc2", + "name": "fed-mgmt", + "protocol": "openid-connect", + "description": "fed-mgmt" + }], + "clients": [{ + "clientId": "originating-op-1", + "enabled": true, + "clientAuthenticatorType": "client-secret", + "secret": "dd7vNwFqjNpYwaghlEwMbw10g0klWDHb", + "publicClient": false, + "directAccessGrantsEnabled": true, + "serviceAccountsEnabled": true, // enables client credentials flow + "defaultClientScopes": ["fed-mgmt"], + "webOrigins": ["*"] + }] +} +``` + +`serviceAccountsEnabled: true` is the critical setting that allows the client credentials grant — the OAuth2 flow where the client (OEG) authenticates using `client_id` + `client_secret` without any user interaction. + +### 4.3 How OEG Interacts with Keycloak + +OEG calls Keycloak **directly** (not via FM) to obtain a token before every `POST /partner` call: + +```python +# In federation_manager_controller.create_federation(): +token_headers = { + 'Authorization': 'Basic b3JpZ2luYXRpbmctb3AtMTpkZDd2TndGcWpOcFl3YWdobEV3TWJ3MTBnMGtsV0RIYg==', + 'Content-Type': 'application/x-www-form-urlencoded' +} +data = {'grant_type': 'client_credentials', 'scope': 'fed-mgmt'} +TOKEN_ENDPOINT = config.TOKEN_ENDPOINT +# e.g. http://federation-manager.federation-manager.svc.cluster.local:8080/realms/federation/protocol/openid-connect/token + +token = requests.post(TOKEN_ENDPOINT, headers=token_headers, data=data).json().get('access_token') +``` + +The Basic auth header encodes `client_id:client_secret` in Base64: +- Raw: `originating-op-1:dd7vNwFqjNpYwaghlEwMbw10g0klWDHb` +- Base64: `b3JpZ2luYXRpbmctb3AtMTpkZDd2TndGcWpOcFl3YWdobEV3TWJ3MTBnMGtsV0RIYg==` + +This is hardcoded directly in the source file, not read from the environment. + +The token endpoint is accessed via cross-namespace Kubernetes DNS: `federation-manager.federation-manager.svc.cluster.local:8080`. This means OEG (in namespace `oop`) must have network access to the `federation-manager` namespace — which works by default in Kubernetes unless NetworkPolicies restrict it. + +### 4.4 Token Lifetime and Caching + +Keycloak default token expiry: **300 seconds (5 minutes)**. + +OEG's strategy: +- On `POST /partner`: fetch a fresh token, use it immediately, store in MongoDB alongside the `federationContextId` +- On subsequent calls (`GET /{id}/partner`, `DELETE /{id}/partner`): retrieve the stored token from MongoDB + +There is **no token refresh**. If more than 5 minutes pass between the `POST /partner` call and any subsequent FM call, the stored token will be expired and FM will return 401. OEG will propagate this 401 to the caller with no helpful error message. + +### 4.5 What Keycloak Does for FM + +FM uses Keycloak to: +1. Validate Bearer tokens on all incoming requests from OEG (`X-Internal: true` path) +2. Validate Bearer tokens on all incoming requests from the partner OP +3. Issue tokens to both originating and partner OPs (two clients configured) + +This provides the cryptographic trust boundary between OPs. When a partner OP calls this FM, FM validates the token against its own Keycloak, ensuring only authorised OPs can trigger federation operations. + +--- + +## 5. Artefact Manager + +### 5.1 What It Is + +The Artefact Manager is a lightweight service (`ghcr.io/sunriseopenoperatorplatform/artefactmanager:0.5`) that stores and manages application artefact metadata and binary references. It is part of the SRM sub-chart and deployed in the `oop` namespace alongside SRM. + +**Port:** 8000 (internal) / NodePort 30080 (external) +**Language:** Python (from `PYTHONPATH=/app` env var) + +### 5.2 What It Does + +The Artefact Manager handles the actual storage and retrieval of application packages (Helm charts, container image references, VM images). SRM delegates artefact operations to it via `http://artefact-manager-service:8000`. + +When an app is submitted (`POST /serviceFunction`), SRM stores the app metadata in its MongoDB and instructs the Artefact Manager to register or download the artefact from the `appRepo.imagePath` URI. This allows SRM to deploy the app later without re-fetching from the original source. + +### 5.3 OEG's Relationship with Artefact Manager + +OEG does **not call the Artefact Manager directly**. The integration is: + +``` +OEG → POST /serviceFunction → SRM → POST /artefact → Artefact Manager +``` + +OEG is unaware of the Artefact Manager's existence. This is an internal SRM concern. + +However, the FM artefact upload (`POST /{fedContextId}/artefact` in the federated path) is a different artefact concept — this is the FM's own artefact store, not the Artefact Manager. The naming is confusing but they are separate systems: + +| Term | System | What it stores | +|---|---|---| +| Artefact Manager | SRM sub-component | Local app packages used by SRM for local deployment | +| FM Artefact (`/artefact` endpoint) | Federation Manager | App metadata sent to partner OPs for cross-OP deployment | + +--- + +## 6. MongoDB — OEG Instance + +### 6.1 What It Is + +A dedicated MongoDB instance (`mongo:latest`) deployed exclusively for the OEG, in the same Kubernetes namespace (`oop`). + +**Service name:** `oegmongo` (ClusterIP :27017) +**Database:** `oeg_storage` +**Collections:** `zones`, `federations` +**Persistence:** 50Mi PV on hostPath `/mnt/data/mongodb_oeg` +**Strategy:** `Recreate` (pod is killed before the new one starts — ensures no dual-write on upgrade) + +### 6.2 What OEG Stores in It + +#### Collection: `zones` + +Every edge cloud zone known to the platform is stored here. A zone document looks like: + +```json +{ + "_id": "046b6c7f-0b8a-43b9-b35d-6489e6daee91", + "edgeCloudZoneId": "046b6c7f-0b8a-43b9-b35d-6489e6daee91", + "edgeCloudZoneName": "zone-athens-north", + "edgeCloudZoneStatus": "active", + "edgeCloudProvider": "Local", + "edgeCloudRegion": "GR-ATTICA", + "isLocal": "true" +} +``` + +For a partner zone (added by `create_federation`): + +```json +{ + "_id": "partner-zone-001", + "edgeCloudZoneId": "partner-zone-001", + "edgeCloudZoneName": "Barcelona Edge", + "edgeCloudZoneStatus": "unknown", + "edgeCloudProvider": "i2cat", + "isLocal": "false", + "fedContextId": "abc-123-fed-context" +} +``` + +**Purpose:** The only reason OEG stores zones in MongoDB is to answer one question: _"Is this zone local or federated?"_ during `create_app_instance`. If `isLocal == 'false'`, the federated 8-step flow is used; otherwise the direct SRM call is used. + +**Write paths:** +1. Startup (module import of `edge_cloud_controller`) → `insert_zones()` for local zones +2. `create_federation` success → `insert_zones()` for partner zones + +**Read paths:** +1. `create_app_instance` → `get_zone(edgeCloudZoneId)` to determine routing + +**Delete paths:** +1. `delete_federation` → `delete_partner_zones()` removes all `isLocal:'false'` zones + +#### Collection: `federations` + +Stores the OAuth2 token for each active federation context: + +```json +{ + "_id": "abc-123-fed-context", + "token": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9..." +} +``` + +**Purpose:** Avoids re-fetching a Keycloak token on every FM call. The token obtained during `POST /partner` is reused for all subsequent calls to that federation context. + +**Write paths:** +1. `create_federation` success → `insert_federation()` + +**Read paths:** +1. `get_federation` → `get_fed(fedContextId)` to retrieve token +2. `delete_federation` → `get_fed(fedContextId)` to retrieve token +3. `get_federation_context_ids` → `get_all_feds()` to get last known token +4. `create_app_instance` (federated path) → `get_fed(fedContextId)` to retrieve token + +**Delete paths:** +1. `delete_federation` → `delete_fed(fedContextId)` + +### 6.3 How OEG Talks to MongoDB + +Via `storage_service.py` using raw `pymongo`. A new `MongoClient` is created on **every function call**: + +```python +def get_zone(zone_id: str): + myclient = pymongo.MongoClient(storage_url) # new connection every call + mydbmongo = myclient['oeg_storage'] + col = mydbmongo['zones'] + zone = col.find_one({'_id': zone_id}) + return zone + # myclient is never closed — garbage collected eventually +``` + +There are 8 functions in `storage_service.py`, each repeating this pattern. The well-designed `MongoManager` class in `db_manager.py` (which uses `maxPoolSize=50` and a context manager) is **not used** in any of these paths — it exists only in commented-out code in the controllers. + +### 6.4 OEG MongoDB Persistence + +The PV is 50Mi on `hostPath: /mnt/data/mongodb_oeg`. On KIND, this maps from the host at `/tmp/kind-oop/mongodb_oeg`. The strategy is `Recreate` which means if the pod restarts, the old pod is killed first — avoiding write conflicts — but there is a brief downtime window. + +**Critical issue:** `insert_zones` uses `insert_many` with no upsert. On every pod restart, local zones are re-inserted, creating duplicate `_id` conflicts in MongoDB (MongoDB will reject duplicates on `_id` but not on other fields). The current code attempts to use `edgeCloudZoneId` as `_id`: + +```python +zone['_id'] = zone.get('edgeCloudZoneId') +insert_zones(zones) # col.insert_many(zone_list) +``` + +MongoDB will raise `BulkWriteError` on duplicate `_id` on restart, but this is caught by the surrounding try/except and silently swallowed. + +--- + +## 7. MongoDB — SRM Instance + +### 7.1 What It Is + +A separate MongoDB instance (`mongo:latest`) dedicated to SRM, deployed in namespace `oop`. + +**Service name:** `mongosrm` (ClusterIP :27017) +**Persistence:** 200Mi PV on hostPath `/mnt/data/mongodb_srm` +**Connected to:** SRM only — OEG has no direct access + +### 7.2 What SRM Stores in It + +SRM uses its MongoDB (via `EMP_STORAGE_URI = mongodb://mongosrm:27017`) to persist: + +- **Service function catalogue** — all app manifests submitted via `POST /serviceFunction` +- **Deployed service functions** — running application instances with status and endpoint info +- **Node registry** — the list of edge nodes/zones managed by this OP +- **Session state** — QoD sessions and their parameters +- **Traffic influence resources** — TI configurations + +OEG never accesses SRM's MongoDB directly. All communication goes through SRM's REST API. This is the correct architectural boundary — SRM is the authoritative source for its data. + +### 7.3 Why This Is a Separate Instance + +SRM and OEG have completely different data models and lifecycle requirements. SRM's database is the **source of truth for infrastructure state**; OEG's database is a **lightweight state cache** for federation routing decisions. Sharing them would create coupling and make independent scaling and backup impossible. + +--- + +## 8. MongoDB — Federation Manager Instance + +### 8.1 What It Is + +A MongoDB instance (`mongo:latest`) dedicated to FM, deployed in namespace `federation-manager`. + +**Service name:** `mongodb` (ClusterIP :27017, accessed as `mongodb` from within the namespace) +**Persistence:** 1Gi PV on `storageClass: mongodb-fm-storage` +**Storage path:** `/mnt/data/mongodb_fm` + +This is a significantly larger allocation than OEG's (1Gi vs 50Mi) because FM needs to persist full federation context records, zone databases, artefact metadata, and onboarding records for all active federation relationships. + +### 8.2 What FM Stores in It + +Based on the FM's role in the GSMA OPG protocol: + +- **Federation contexts** — active bilateral federation records, including both OPs' network codes, endpoint URLs, offered zones, status +- **Partner zone catalogue** — zones offered by partner OPs, with resource availability data +- **Artefact registry** — artefacts received from partner OPs (app packages to be deployed locally, or metadata of artefacts sent to partner OPs) +- **Application onboarding records** — apps onboarded from partner OPs, mapping `appId` to `artefactId` and component specs +- **LCM records** — application lifecycle management records (pending, deployed, terminating instances at partner OPs) +- **Zone subscription records** — subscriptions for zone status update notifications + +### 8.3 OEG's Indirect Dependency on FM's MongoDB + +OEG does not access FM's MongoDB directly, but the data stored there governs what FM returns to OEG on queries. For example: +- When OEG calls `GET /{fedContextId}/partner`, FM reads its MongoDB to return the current federation state +- When OEG calls `POST /{fedContextId}/application/onboarding`, FM persists the onboarding record in its MongoDB, which the partner FM later reads when processing the deployment + +--- + +## 9. OpenVPN Sidecar + +### 9.1 What It Is + +An optional Alpine Linux container running OpenVPN, co-located in the FM pod. It is **disabled by default** (`openvpn.enabled: false`). + +**Image:** `alpine:3.20` +**Capability:** `NET_ADMIN` (required for TUN/TAP device manipulation) +**Volumes:** `/vpn/client.ovpn` + `/vpn/auth.txt` (from a Kubernetes Secret) + +### 9.2 Why It Exists + +In real inter-operator federation scenarios, the two OPs are in different networks with no direct IP connectivity. The partner OP's FM is not publicly reachable. OpenVPN provides an encrypted L3 tunnel between the two OP networks, allowing the local FM to call the partner FM's private IP address as if it were on the same network. + +When OpenVPN is active: +- FM's pod gets a virtual `tun0` network interface +- All traffic destined for the partner's network range routes through the VPN tunnel +- `PARTNER_API_ROOT` is set to the partner's private IP address reachable only via VPN +- The partner FM's `X-Partner-API-Root` header value becomes a VPN-routable address + +### 9.3 Readiness Probe + +The OpenVPN sidecar has a readiness probe that checks for a live TUN interface and route: + +```bash +ip link show dev tun0 >/dev/null 2>&1 || exit 1 +ip route | grep -q 'tun0' || exit 1 +``` + +This prevents Kubernetes from marking the pod as ready until the VPN tunnel is established, ensuring no federation traffic is attempted before connectivity is confirmed. + +### 9.4 Impact on OEG + +OEG itself is unaffected by whether OpenVPN is running. OEG calls the local FM, and FM handles the routing. If FM cannot reach the partner (VPN down, wrong `PARTNER_API_ROOT`), the FM calls will timeout or return connection errors, which OEG propagates as 408/504. + +--- + +## 10. Component Interaction Summary + +### 10.1 Who Calls What + +``` + OEG + │ + ┌─────────────────┼─────────────────┐ + │ │ │ + ▼ ▼ ▼ + pymongo SRM REST FM REST + (direct) (HTTP) (HTTP + Bearer) + │ │ │ + ▼ ▼ │ + oeg_storage SRM REST API Keycloak REST + (zones/feds) routes to: (token fetch) + │ │ + ┌─────┼────┐ ▼ + │ │ Keycloak validates + MongoDB Artefact Bearer on all FM + (mongosrm) Manager calls from OEG + │ + then SRM calls: FM then calls: + K8s API server Partner FM REST + (deploy/delete (via PARTNER_API_ROOT + workloads) or OpenVPN tunnel) +``` + +### 10.2 Who Owns What Data + +| Data | Owner | OEG Access | +|---|---|---| +| App manifests (catalogue) | SRM (mongosrm) | Via SRM REST API | +| Running instances | SRM (mongosrm) | Via SRM REST API | +| Edge nodes/zones | SRM (mongosrm) | Via SRM REST API | +| QoD sessions | SRM (mongosrm) | Via SRM REST API | +| Zone routing flags (`isLocal`) | OEG (oeg_storage) | Direct pymongo | +| Federation context + token | OEG (oeg_storage) | Direct pymongo | +| Active federation records | FM (mongodb-fm) | Via FM REST API | +| Partner zones (full detail) | FM (mongodb-fm) | Via FM REST API | +| Artefact registry (local) | Artefact Manager | Via SRM (indirect) | +| Identity & access | Keycloak (embedded H2) | Direct token POST | + +### 10.3 Namespace Boundaries + +| Component | Namespace | Reachable from OEG? | +|---|---|---| +| OEG | `oop` | Self | +| SRM | `oop` | Yes (same namespace) | +| MongoDB (OEG) | `oop` | Yes (same namespace) | +| MongoDB (SRM) | `oop` | Yes (but never used) | +| Artefact Manager | `oop` | Yes (but never called) | +| Federation Manager | `federation-manager` | Yes (cross-namespace via FQDN) | +| Keycloak | `federation-manager` | Yes (cross-namespace via FQDN) | +| MongoDB (FM) | `federation-manager` | No (OEG never calls it) | + +Cross-namespace access works via Kubernetes FQDN: `service.namespace.svc.cluster.local`. + +--- + +## 11. End-to-End Call Chains + +### 11.1 Submit App + List Zones + Deploy Locally + +``` +App Provider: POST /apps → OEG + OEG: POST http://srm:8080/srm/1.0.0/serviceFunction {AppManifest} + SRM: stores in mongosrm.serviceFunctions + SRM: POST http://artefact-manager-service:8000/artefact {imagePath, credentials} + Artefact Manager: registers artefact + SRM → OEG: {appId: "uuid-xxx"} + OEG → App Provider: {appId: "uuid-xxx"} [should be 201, currently 200] + +App Provider: GET /edge-cloud-zones → OEG + OEG: GET http://srm:8080/srm/1.0.0/node + SRM: queries mongosrm.nodes + SRM → OEG: [{edgeCloudZoneId, edgeCloudZoneName, ...}] + OEG → App Provider: 200 [{EdgeCloudZone}] + +App Provider: POST /appinstances {appId: "uuid-xxx", appZones: [{EdgeCloudZone: {edgeCloudZoneId: "local-zone-id"}}]} + OEG: GET oeg_storage.zones where _id = "local-zone-id" + MongoDB: {isLocal: 'true'} + OEG: POST http://srm:8080/srm/1.0.0/deployedServiceFunction {appId, appZones} + SRM: looks up app in mongosrm.serviceFunctions + SRM: GET http://kubernetes.default.svc.cluster.local:443/apis/apps/v1/namespaces/oop/deployments (K8s API) + SRM: POST http://kubernetes.default.svc.cluster.local:443/apis/apps/v1/namespaces/oop/deployments (creates Deployment) + K8s → SRM: Deployment created + SRM → OEG: {appInstanceId, status: "instantiating"} + OEG → App Provider: 202 {appInstanceId, status} +``` + +### 11.2 Establish Federation + Deploy to Partner Zone + +``` +Admin: POST /partner {origOPFederationId: "my-op", partnerStatusLink: "..."} → OEG + OEG: POST http://federation-manager.federation-manager.svc.cluster.local:8080/realms/federation/protocol/openid-connect/token + {grant_type: client_credentials, client_id: originating-op-1, client_secret: hardcoded} + Keycloak → OEG: {access_token: "eyJ...", expires_in: 300} + OEG: POST http://federation-manager:8989/operatorplatform/federation/v1/partner + Bearer: "eyJ..." X-Partner-API-Root: "http://10.8.0.1:31002" X-Internal: true + FM: validates Bearer against Keycloak + FM: POST http://10.8.0.1:31002/partner {FederationRequestData} [via OpenVPN if active] + PartnerFM → FM: {federationContextId: "ctx-abc", offeredZones: [{zoneId: "pz-1", ...}], ...} + FM → OEG: {FederationResponseData} + OEG: oeg_storage.zones.insert_many([{_id:"pz-1", isLocal:'false', fedContextId:'ctx-abc'}]) + OEG: oeg_storage.federations.insert_one({_id:'ctx-abc', token:'eyJ...'}) + OEG → Admin: 200 {FederationResponseData} + +App Provider: POST /appinstances {appId: "uuid-xxx", appZones: [{EdgeCloudZone: {edgeCloudZoneId: "pz-1"}}]} + OEG: GET oeg_storage.zones where _id = "pz-1" + MongoDB: {isLocal:'false', fedContextId:'ctx-abc'} + OEG: GET http://srm:8080/srm/1.0.0/serviceFunction/uuid-xxx + SRM → OEG: {appManifest: {name, version, appRepo, componentSpec}} + OEG: POST http://federation-manager:8989/.../ctx-abc/artefact {GSMA artefact payload} + Bearer: token from oeg_storage.federations X-Partner-API-Root: "http://10.8.0.1:31002" + FM: POST http://10.8.0.1:31002/ctx-abc/artefact (relayed to partner) + PartnerFM → FM: 200 + FM → OEG: 200 (raw Response object) + OEG [if 200 or 409]: + POST http://federation-manager:8989/.../ctx-abc/application/onboarding {GSMA onboard payload} + FM: POST http://10.8.0.1:31002/ctx-abc/application/onboarding + PartnerFM: stores app in its own SRM catalogue + PartnerFM → FM: 200 + FM → OEG: response.json() + OEG [if 200]: + POST http://federation-manager:8989/.../ctx-abc/application/lcm {GSMA deploy payload} + FM: POST http://10.8.0.1:31002/ctx-abc/application/lcm + PartnerFM: instructs its own SRM to deploy on zone "pz-1" + PartnerSRM: calls K8s API on partner cluster + PartnerFM → FM: 202 Accepted + FM → OEG: raw requests.Response object ← BUG: cannot be serialised + OEG → App Provider: 500 Internal Server Error ← actual behaviour +``` + +--- + +## 12. What Is Missing / Broken in the Integrations + +### SRM Integration + +| Issue | Detail | +|---|---| +| No authentication | All SRM calls have `Authorization` commented out. If SRM enforces auth, all calls fail. | +| No timeout on zone fetch | `edge_cloud_zones()` has no try/except and no timeout. A slow SRM blocks indefinitely. | +| Filters not forwarded | `GET /appinstances?appId=X` does not pass `appId` to `GET /deployedServiceFunction?appId=X`. | +| No correlation ID forwarding | `x-correlator` received from client is never added to SRM call headers. | +| Response not validated | SRM responses are returned raw without validating against CAMARA schema. | +| Status code mismatch | SRM returns 200 for submit; OEG should return 201 per CAMARA spec. | +| Duplicate zone insert | `insert_many` on startup without upsert causes `BulkWriteError` on pod restart. | +| Zone live-query vs. cache | `get_edge_cloud_zones` queries SRM live but uses MongoDB cache in `create_app_instance`. | + +### FM Integration + +| Issue | Detail | +|---|---| +| Hardcoded credentials | Keycloak `client_id:client_secret` hardcoded in source, not env vars. | +| Token not refreshed | 5-minute token stored in MongoDB, never re-fetched. All FM calls fail after expiry. | +| Raw response returned | `deploy_app_partner` returns `requests.Response`; Connexion cannot serialise it → 500. | +| `create_artefact` vs. `onboard_application` inconsistency | Former returns raw Response, latter returns `.json()` — inconsistent handling in caller. | +| Empty artefact fields | `artefactDescription`, `token`, `numOfInstances`, `interfaceId`, `network` always empty. | +| Single componentSpec | Only `componentSpec[0]` is translated — multi-component apps lose their other components. | +| Unexposed zone sync API | `request_zone_sync`, `get_zone_resource_info`, `remove_zone_sync` implemented but not in OpenAPI. | +| Blanket zone delete | `delete_partner_zones()` deletes ALL partner zones, not just those of the deleted federation. | +| No retry on FM timeout | Default timeout is 10–20s. A single timeout returns error dict immediately. | +| No correlation ID | `x-correlator` not forwarded to FM calls. | + +### Keycloak Integration + +| Issue | Detail | +|---|---| +| Hardcoded credentials in source | Client secret committed to git in both `values.yaml` and `federation_manager_controller.py`. | +| `start-dev` mode | Keycloak runs in development mode with embedded H2 database — not suitable for production. | +| No token introspection cache | Each FM validation requires a round-trip to Keycloak. No caching of valid tokens by FM. | +| admin:admin credentials | Default Keycloak admin credentials in `values.yaml`. | + +### MongoDB (OEG) Integration + +| Issue | Detail | +|---|---| +| No connection pooling | New `MongoClient` per call; connections are not closed. | +| `MongoManager` unused | Proper pooled connection manager exists but is bypassed. | +| `isLocal` as string | Stored as `'true'`/`'false'` string instead of BSON boolean. | +| No index on `zones._id` | MongoDB creates a default `_id` index, so lookups are fast, but no compound indexes for status/provider filtering. | +| No TTL on federation tokens | Expired tokens accumulate in the `federations` collection forever. | +| No schema validation | No MongoDB validator on the collections — any dict can be inserted. | + +--- + +## 13. FastAPI Migration — Integration Improvements + +### SRM Client (async) + +```python +# services/srm_client.py +import httpx +from fastapi import Depends, Request +from config import settings +import logging + +logger = logging.getLogger(__name__) + +class SRMClient: + def __init__(self, request: Request): + self._client: httpx.AsyncClient = request.app.state.srm_client + self._correlation_id: str = getattr(request.state, "correlation_id", "") + + def _headers(self) -> dict: + return { + "Content-Type": "application/json", + "x-correlator": self._correlation_id, + # "Authorization": f"Bearer {await token_service.get_srm_token()}" + } + + async def submit_app(self, body: dict) -> httpx.Response: + return await self._client.post( + "/serviceFunction", + json=body, + headers=self._headers(), + timeout=30.0, + ) + + async def get_zones(self) -> list[dict]: + resp = await self._client.get("/node", headers=self._headers(), timeout=10.0) + resp.raise_for_status() + return resp.json() + + async def deploy(self, body: dict, filters: dict | None = None) -> httpx.Response: + return await self._client.post( + "/deployedServiceFunction", + json=body, + headers=self._headers(), + timeout=60.0, # deployment can take time + ) +``` + +### FM Client (async, with token refresh) + +```python +# services/fm_client.py +import httpx +from services.token_service import token_service +from config import settings + +class FMClient: + def __init__(self, request: Request): + self._client: httpx.AsyncClient = request.app.state.fm_client + self._correlation_id: str = getattr(request.state, "correlation_id", "") + + async def _headers(self) -> dict: + token = await token_service.get_token() # cached, auto-refreshed + return { + "Authorization": f"Bearer {token}", + "X-Partner-API-Root": settings.PARTNER_API_ROOT, + "X-Internal": "true", + "Content-Type": "application/json", + "Accept": "application/json", + "x-correlator": self._correlation_id, + } + + async def post_partner(self, body: dict) -> tuple[dict, int]: + headers = await self._headers() + resp = await self._client.post("/partner", json=body, headers=headers, timeout=30.0) + return resp.json(), resp.status_code + + async def create_artefact(self, fed_ctx_id: str, artefact: dict) -> httpx.Response: + headers = await self._headers() + return await self._client.post( + f"/{fed_ctx_id}/artefact", + json=artefact, + headers=headers, + timeout=30.0, + ) + + async def onboard_application(self, fed_ctx_id: str, body: dict) -> httpx.Response: + headers = await self._headers() + return await self._client.post( + f"/{fed_ctx_id}/application/onboarding", + json=body, + headers=headers, + timeout=30.0, + ) + + async def deploy_app_partner(self, fed_ctx_id: str, body: dict) -> httpx.Response: + headers = await self._headers() + # Returns httpx.Response — caller must extract status_code and .json() + return await self._client.post( + f"/{fed_ctx_id}/application/lcm", + json=body, + headers=headers, + timeout=60.0, + ) +``` + +### MongoDB (async, pooled, typed) + +```python +# storage/zone_repo.py +from motor.motor_asyncio import AsyncIOMotorCollection +from fastapi import Depends +from deps import get_db + +class ZoneRepo: + def __init__(self, db=Depends(get_db)): + self._col: AsyncIOMotorCollection = db["zones"] + + async def get(self, zone_id: str) -> dict | None: + return await self._col.find_one({"_id": zone_id}) + + async def upsert_many(self, zones: list[dict]) -> None: + for zone in zones: + await self._col.update_one( + {"_id": zone["_id"]}, + {"$set": zone}, + upsert=True, + ) + + async def get_partner_zones(self) -> list[dict]: + cursor = self._col.find({"isLocal": False}) + return await cursor.to_list(length=None) + + async def delete_federation_zones(self, fed_context_id: str) -> int: + result = await self._col.delete_many({ + "isLocal": False, + "fedContextId": fed_context_id, # scoped to this federation only + }) + return result.deleted_count + +# storage/federation_repo.py +class FederationRepo: + def __init__(self, db=Depends(get_db)): + self._col: AsyncIOMotorCollection = db["federations"] + + async def get(self, fed_context_id: str) -> dict | None: + return await self._col.find_one({"_id": fed_context_id}) + + async def insert(self, record: dict) -> None: + await self._col.replace_one({"_id": record["_id"]}, record, upsert=True) + + async def delete(self, fed_context_id: str) -> None: + await self._col.delete_one({"_id": fed_context_id}) +``` + +### Keycloak Token Service (async, cached) + +```python +# services/token_service.py +import time +import httpx +from config import settings + +class TokenService: + def __init__(self): + self._token: str | None = None + self._expires_at: float = 0.0 + + async def get_token(self) -> str: + if self._token and time.monotonic() < (self._expires_at - 30): + return self._token # return cached token with 30s buffer + async with httpx.AsyncClient(verify=settings.SSL_CA_BUNDLE or True) as client: + resp = await client.post( + settings.TOKEN_ENDPOINT, + data={ + "grant_type": "client_credentials", + "scope": settings.KEYCLOAK_SCOPE, + }, + auth=(settings.KEYCLOAK_CLIENT_ID, settings.KEYCLOAK_CLIENT_SECRET), + timeout=10.0, + ) + resp.raise_for_status() + data = resp.json() + self._token = data["access_token"] + self._expires_at = time.monotonic() + data.get("expires_in", 300) + return self._token + +token_service = TokenService() # singleton, shared across all requests +``` + +### Federated Deploy — Corrected Flow + +```python +# In routers/apps.py +async def _create_federated_instance( + body: CreateAppInstanceRequest, + zone: dict, + srm: SRMClient, + fm: FMClient, + fed_repo: FederationRepo, +) -> list[AppInstanceInfo]: + app_data_resp = await srm.get_app(body.appId) + app_manifest = AppManifest(**app_data_resp["appManifest"]) + fed_ctx_id = zone["fedContextId"] + + artefact = build_gsma_artefact(body.appId, app_manifest) + artefact_resp = await fm.create_artefact(fed_ctx_id, artefact) + if artefact_resp.status_code not in (200, 409): + raise HTTPException(status_code=artefact_resp.status_code, + detail=artefact_resp.json()) + + onboard = build_gsma_onboard(body.appId, app_manifest) + onboard_resp = await fm.onboard_application(fed_ctx_id, onboard) + if onboard_resp.status_code != 200: + raise HTTPException(status_code=onboard_resp.status_code, + detail=onboard_resp.json()) + + deploy = build_gsma_deploy(body.appId, app_manifest, zone) + deploy_resp = await fm.deploy_app_partner(fed_ctx_id, deploy) + if deploy_resp.status_code not in (200, 202): + raise HTTPException(status_code=deploy_resp.status_code, + detail=deploy_resp.json()) + + # Parse partner response into CAMARA AppInstanceInfo + return [AppInstanceInfo( + appInstanceId=deploy_resp.json().get("instanceId", str(uuid4())), + status=AppInstanceStatus.instantiating, + edgeCloudZone=EdgeCloudZone(**zone), + )] +``` + +--- + +*This document is companion to `OEG_ANALYSIS.md`. Together they cover all implementation details, integration patterns, known issues, and the FastAPI migration path.* diff --git a/docs/OEG_ANALYSIS.md b/docs/OEG_ANALYSIS.md new file mode 100644 index 0000000000000000000000000000000000000000..bd5a3baf76aae7e36bfbfd10a3a2b027fa67ffe9 --- /dev/null +++ b/docs/OEG_ANALYSIS.md @@ -0,0 +1,1415 @@ +# Open Exposure Gateway (OEG) — Full Technical Analysis + +> **Version analysed:** `1.0.1-wip` · **Branch:** `feature/helm` +> **Date:** February 2026 +> **Framework:** Connexion 3.1 + Flask 3.1 + uvicorn 0.32 + Python 3.12 +> **Standard:** GSMA Operator Platform Group (OPG) / CAMARA EdgeCloud API + +--- + +## Table of Contents + +1. [What the OEG Is](#1-what-the-oeg-is) +2. [Repository Layout](#2-repository-layout) +3. [Technology Stack](#3-technology-stack) +4. [Architecture Overview](#4-architecture-overview) +5. [API Surface](#5-api-surface) +6. [Data Models](#6-data-models) +7. [Implementation Deep-Dive — Layer by Layer](#7-implementation-deep-dive--layer-by-layer) + - 7.1 [Application Entry Point](#71-application-entry-point) + - 7.2 [Configuration](#72-configuration) + - 7.3 [Controllers](#73-controllers) + - 7.4 [Service Clients](#74-service-clients) + - 7.5 [Storage Service](#75-storage-service) + - 7.6 [MongoDB Manager](#76-mongodb-manager) + - 7.7 [Security Controller](#77-security-controller) + - 7.8 [Logging](#78-logging) +8. [Key Functional Flows](#8-key-functional-flows) + - 8.1 [Application Submit (POST /apps)](#81-application-submit-post-apps) + - 8.2 [Application Instantiation — Local Zone](#82-application-instantiation--local-zone) + - 8.3 [Application Instantiation — Federated Zone](#83-application-instantiation--federated-zone) + - 8.4 [Federation Lifecycle](#84-federation-lifecycle) + - 8.5 [Edge Cloud Zone Discovery](#85-edge-cloud-zone-discovery) + - 8.6 [QoD Session and Traffic Influence](#86-qod-session-and-traffic-influence) +9. [Integration with External Components](#9-integration-with-external-components) +10. [Deployment and Infrastructure](#10-deployment-and-infrastructure) +11. [Testing](#11-testing) +12. [CI/CD](#12-cicd) +13. [What Is Good](#13-what-is-good) +14. [What Is Bad — Detailed Issue Catalogue](#14-what-is-bad--detailed-issue-catalogue) +15. [FastAPI Migration — Full Specification](#15-fastapi-migration--full-specification) +16. [Diagram Index](#16-diagram-index) + +--- + +## 1. What the OEG Is + +The **Open Exposure Gateway** implements the *Northbound API* role of the GSMA Operator Platform Group (OPG) architecture. It is the single entry point for **Application Providers** (developers) wishing to: + +- Submit application metadata (manifests) to the operator platform +- Instantiate those applications on local or partner-operator edge zones +- Discover available edge cloud zones +- Manage cross-operator federation +- Request Quality-on-Demand (QoD) and Traffic Influence network resources + +The OEG itself is a **translation and orchestration layer**: it exposes standardised CAMARA-compatible REST APIs northbound, and proxies requests southbound to three internal services — the **Service Resource Manager (SRM)**, the **Federation Manager (FM)**, and its own **MongoDB** instance (used as state store for zone mappings and federation contexts). + +It does **not** directly manage any infrastructure. All compute, storage and networking actions happen inside SRM or the partner OP's FM. + +``` +Application Provider + │ + │ CAMARA REST + ▼ + ┌─────────────────────────────────────────┐ + │ Open Exposure Gateway │ + │ (Connexion/Flask · Port 8080 · /oeg) │ + └────┬──────────────┬────────────┬────────┘ + │ │ │ + HTTP REST HTTP REST MongoDB (oeg_storage) + │ │ │ + ▼ ▼ zones / federations + SRM Federation + /serviceFunction Manager + /deployedSF /partner + /node /artefact + /sessions /application/lcm + /traffic-inf. +``` + +--- + +## 2. Repository Layout + +``` +open-exposure-gateway/ +├── edge_cloud_management_api/ # Main Python package +│ ├── __main__.py # Module entry point +│ ├── app.py # Connexion FlaskApp factory +│ ├── configs/ +│ │ └── env_config.py # Pydantic-based env configuration +│ ├── controllers/ +│ │ ├── app_controllers.py # /apps, /appinstances handlers +│ │ ├── edge_cloud_controller.py # /edge-cloud-zones handlers +│ │ ├── federation_manager_controller.py # /partner, /fed-context-id +│ │ ├── network_functions_controller.py # /sessions, /traffic-influences +│ │ └── security_controller.py # JWT/OAuth2 (currently unused) +│ ├── managers/ +│ │ ├── db_manager.py # MongoManager context manager (unused in prod paths) +│ │ └── log_manager.py # Logger singleton +│ ├── models/ +│ │ ├── application_models.py # AppManifest, AppInstance, ComponentSpec, etc. +│ │ ├── edge_cloud_models.py # EdgeCloudZone, EdgeCloudZoneStatus +│ │ ├── federation_manager_models.py # FederationRequestData, FederationResponseData, etc. +│ │ └── error_models.py # ErrorInfo +│ ├── services/ +│ │ ├── edge_cloud_services.py # PiEdgeAPIClient + PiEdgeAPIClientFactory +│ │ ├── federation_services.py # FederationManagerClient + Factory +│ │ └── storage_service.py # Raw pymongo CRUD for zones/federations +│ └── specification/ +│ └── openapi.yaml # OpenAPI 3.0.3 — the authoritative contract +├── tests/ +│ ├── unit/ +│ │ └── controllers/ +│ │ ├── test_edge_cloud_controller.py +│ │ └── test_federation_manager_controller.py +│ ├── component/ +│ │ └── controllers/ +│ │ └── test_app_controllers.py +│ └── fixtures/ +│ ├── edge-cloud-zones.json +│ ├── submit-app-sample.json +│ └── mongo-apps-test-collection-dump.json +├── deploy/ +│ └── oeg.yaml # Raw Kubernetes manifests (Deployment + Service + Ingress) +├── helm/ +│ ├── kind-oop-config.yaml # KIND cluster config (port-mappings + host mounts) +│ ├── deploy-on-kind.sh # Automated KIND + Helm deployment script +│ └── oop-platform-chart/ # Umbrella Helm chart +│ ├── Chart.yaml +│ ├── values.yaml # Unified configuration for all sub-charts +│ └── charts/ +│ ├── oeg/ # OEG sub-chart +│ ├── srm/ # SRM sub-chart +│ └── federation-manager/ # FM sub-chart (includes Keycloak) +├── Dockerfile +├── pyproject.toml +├── requirements.txt +├── tox.ini +└── docs/ + └── diagrams/ # Generated architecture + sequence PNGs +``` + +--- + +## 3. Technology Stack + +| Component | Technology | Version | +|---|---|---| +| Web framework | Connexion (OpenAPI-first) on top of Flask | 3.1.0 / 3.1.0 | +| ASGI server | uvicorn + uvloop | 0.32.1 / 0.21.0 | +| Python | CPython | 3.12 | +| Data validation | Pydantic v2 (with v1 compat shim for config) | 2.10.3 | +| HTTP client | requests (sync, blocking) | 2.32.3 | +| Database | MongoDB via pymongo | 4.10.1 | +| Auth primitives | python-jose[cryptography] | 3.5.0 | +| OpenAPI spec | OpenAPI 3.0.3 YAML | — | +| Container | Docker, python:3.12-alpine base | — | +| Orchestration | Kubernetes + Helm v3 | — | +| Local k8s | KIND (Kubernetes-in-Docker) | — | +| Linting | ruff | 0.8.2 | +| Type checking | mypy | 1.14.1 | +| Testing | pytest + mongomock | 8.3.4 / 4.3.0 | +| Env management | uv + python-dotenv | — | + +**Key architectural decision:** Connexion is used in "OpenAPI-first" mode — the `openapi.yaml` file is the single source of truth for routing. Each `operationId` in the YAML maps directly to a Python function using dot-notation paths (e.g., `edge_cloud_management_api.controllers.app_controllers.submit_app`). Connexion performs request body validation against the spec automatically before the handler is ever called. + +--- + +## 4. Architecture Overview + +The platform has three tiers: + +### Tier 1 — Northbound (OEG) +- Accepts CAMARA-format REST requests from Application Providers +- Validates request bodies against the OpenAPI spec (via Connexion) +- Routes to the appropriate controller function +- Translates CAMARA semantics into internal SRM or GSMA-OPG FM semantics +- Responds with CAMARA-format JSON + +### Tier 2 — Platform Services +- **SRM (Service Resource Manager):** Manages the catalogue of app definitions, deployed instances, and the list of edge nodes. Exposes `/serviceFunction`, `/deployedServiceFunction`, `/node`, `/sessions`, `/traffic-influences`. +- **Federation Manager (FM):** Implements the GSMA inter-OP federation protocol. Manages federation contexts, artefact exchange, app onboarding across OPs, and LCM at partner OPs. Protected by a Keycloak OAuth2 instance (client credentials flow, scope `fed-mgmt`). +- **Artefact Manager:** Manages Helm chart and container image artefacts consumed by SRM. + +### Tier 3 — Infrastructure Adapters (inside SRM) +- Kubernetes Adapter (via `kubectl`/API server) +- VM / OpenStack Adapter +- Edge Nodes (physical/virtual compute) + +### State Store (OEG-local) +MongoDB (`oeg_storage` DB, `zones` and `federations` collections) holds: +- `zones`: a flat list of all known edge cloud zones with an `isLocal` flag and, for partner zones, a `fedContextId` +- `federations`: federation context records with `_id = federationContextId` and a cached OAuth2 `token` + +--- + +## 5. API Surface + +### 5.1 Application Management (`/apps`, `/appinstances`) + +| Method | Path | Handler | Description | +|---|---|---|---| +| `POST` | `/apps` | `app_controllers.submit_app` | Submit app manifest → forward to SRM `/serviceFunction` | +| `GET` | `/apps` | `app_controllers.get_apps` | List all apps → SRM `/serviceFunction` | +| `GET` | `/apps/{appId}` | `app_controllers.get_app` | Get single app → SRM `/serviceFunction/{appId}` | +| `DELETE` | `/apps/{appId}` | `app_controllers.delete_app` | Remove app → SRM `/serviceFunction/{appId}` | +| `POST` | `/appinstances` | `app_controllers.create_app_instance` | Instantiate app (local or federated) | +| `GET` | `/appinstances` | `app_controllers.get_app_instance` | List instances → SRM `/deployedServiceFunction` | +| `DELETE` | `/appinstances/{appInstanceId}` | `app_controllers.delete_app_instance` | Terminate instance → SRM | + +### 5.2 Edge Cloud Discovery (`/edge-cloud-zones`) + +| Method | Path | Handler | Description | +|---|---|---|---| +| `GET` | `/edge-cloud-zones` | `edge_cloud_controller.get_edge_cloud_zones` | List all zones (local + federated) | +| `GET` | `/edge-cloud-zones/{zoneId}` | `edge_cloud_controller.edge_cloud_zone_details` | Zone details from SRM `/node/{id}` | + +### 5.3 Federation Management (`/partner`, `/{fedContextId}/partner`, `/fed-context-id`) + +| Method | Path | Handler | Description | +|---|---|---|---| +| `POST` | `/partner` | `federation_manager_controller.create_federation` | Establish uni-directional federation | +| `GET` | `/{fedContextId}/partner` | `federation_manager_controller.get_federation` | Query federation context | +| `DELETE` | `/{fedContextId}/partner` | `federation_manager_controller.delete_federation` | Tear down federation | +| `GET` | `/fed-context-id` | `federation_manager_controller.get_federation_context_ids` | List known federation context IDs | + +### 5.4 Network Functions (`/sessions`, `/traffic-influences`) + +| Method | Path | Handler | Description | +|---|---|---|---| +| `POST` | `/sessions` | `network_functions_controller.create_qod_session` | Create QoD session → SRM `/sessions` | +| `GET` | `/sessions/{sessionId}` | `network_functions_controller.get_qod_session` | Get QoD session | +| `DELETE` | `/sessions/{sessionId}` | `network_functions_controller.delete_qod_session` | Delete QoD session | +| `POST` | `/traffic-influences` | `network_functions_controller.create_traffic_influence_resource` | Create TI resource | +| `GET` | `/traffic-influences` | `network_functions_controller.get_all_traffic_influence_resources` | List all TI resources | +| `GET` | `/traffic-influences/{id}` | `network_functions_controller.get_traffic_influence_resource` | Get TI resource | +| `DELETE` | `/traffic-influences/{id}` | `network_functions_controller.delete_traffic_influence_resource` | Delete TI resource | + +**Note:** There are additional controller functions in `federation_manager_controller.py` that are **implemented but not exposed in the OpenAPI spec**: `onboard_application_to_partner`, `get_onboarded_app`, `delete_onboarded_app`, `request_zone_synch`, `get_zone_resource_info`, `remove_zone_sync`. These are dead code from the public API perspective. + +### 5.5 Infrastructure Paths + +- **Base URL (spec):** `http://vitrualserver:8080/oeg/1.0.0` *(note: "virtual" is misspelled in the spec)* +- **Swagger UI:** served at `/docs` +- **Kubernetes ingress:** `/oeg` prefix via Traefik at `isiath.duckdns.org` + +--- + +## 6. Data Models + +### 6.1 AppManifest (request body for `POST /apps`) + +The richest model in the system. Defined both in `openapi.yaml` (as a JSON Schema) and mirrored in `models/application_models.py` (as Pydantic). The two are **not kept in sync** — the Pydantic model has stricter regex patterns on `name` and `appProvider` that the OpenAPI schema has commented out. + +```python +class AppManifest(BaseModel): + name: str = Field(..., pattern="^[A-Za-z][A-Za-z0-9_]{1,63}$") + appProvider: str = Field(..., pattern="^[A-Za-z][A-Za-z0-9_]{7,63}$") + version: str + packageType: PackageType # QCOW2 | OVA | CONTAINER | HELM + operatingSystem: Optional[OperatingSystem] + appRepo: AppRepo # type, imagePath, userName, credentials, authType, checksum + requiredResources: Optional[Any] # discriminated union — see below + componentSpec: List[ComponentSpec] # list of {componentName, networkInterfaces[]} +``` + +**`requiredResources` discriminated union** (in OpenAPI spec, not reflected in the Python model): + +| `infraKind` value | Schema | +|---|---| +| `kubernetes` | `KubernetesResources` — cpuPool, gpuPool, topology, networking (CNI, SR-IOV), addons (monitoring, ingress) | +| `virtualMachine` | `VmResources` — numCPU, memory, additionalStorages, gpu | +| `container` | `ContainerResources` — numCPU (in millivcpu format), memory, storage | +| `dockerCompose` | `DockerComposeResources` — numCPU, memory, storage, gpu | + +The Pydantic model uses `Optional[Any]` for `requiredResources`, completely losing the discriminated union validation at the Python layer. Connexion validates it against the spec, but errors produce generic 400 responses. + +### 6.2 EdgeCloudZone + +```python +class EdgeCloudZone(BaseModel): + edgeCloudZoneId: UUID4 + edgeCloudZoneName: str + edgeCloudZoneStatus: Optional[EdgeCloudZoneStatus] # active | inactive | unknown + edgeCloudProvider: str + edgeCloudRegion: Optional[str] +``` + +Stored in MongoDB `zones` collection with two additional fields added at runtime: +- `isLocal: 'true' | 'false'` (string, not boolean — a type inconsistency) +- `fedContextId: str` (only present for non-local/partner zones) + +### 6.3 Federation Models + +```python +class FederationRequestData(BaseModel): + origOPFederationId: str # pattern: ^[A-Za-z0-9][A-Za-z0-9-]*$ + origOPCountryCode: Optional[str] # ISO 3166-1 Alpha-2 + origOPMobileNetworkCodes: Optional[MobileNetworkIds] + origOPFixedNetworkCodes: Optional[List[str]] + initialDate: str # date-time format + partnerStatusLink: str # URI for callbacks + partnerCallbackCredentials: Optional[CallbackCredentials] + +class FederationResponseData(BaseModel): + federationContextId: str + partnerOPFederationId: str + partnerOPCountryCode: Optional[str] + offeredAvailabilityZones: Optional[List[ZoneDetails]] + platformCaps: List[str] + edgeDiscoveryServiceEndPoint: Optional[ServiceEndpoint] + lcmServiceEndPoint: Optional[ServiceEndpoint] +``` + +The `FixedNetworkIds` model uses the Pydantic v1 `__root__` pattern, which is **broken in Pydantic v2** (should use `RootModel`). + +--- + +## 7. Implementation Deep-Dive — Layer by Layer + +### 7.1 Application Entry Point + +**`app.py`** — The Connexion `FlaskApp` factory: + +```python +def get_app_instance() -> FlaskApp: + file_path = Path(__file__).resolve().parent + swagger_options = SwaggerUIOptions(swagger_ui_path="/docs") + app = FlaskApp(__name__, specification_dir=file_path / "specification") + app.add_api( + "openapi.yaml", + swagger_ui_options=swagger_options, + strict_validation=True, # ← validates request bodies against schema + ) + return app +``` + +`strict_validation=True` means Connexion will reject requests that have unknown extra fields. The `resolver` is the default Connexion resolver: it reads `operationId` from the YAML (e.g., `edge_cloud_management_api.controllers.app_controllers.submit_app`) and dynamically imports and calls that function. + +**`__main__.py`** runs the app on `0.0.0.0:8080` via uvicorn (Connexion's default ASGI adapter). + +### 7.2 Configuration + +**`configs/env_config.py`** uses Pydantic **v1 compat** (`from pydantic.v1 import BaseSettings`), not the current Pydantic v2 `BaseSettings` from `pydantic-settings`. This is a deprecated shim. + +```python +class Configuration(BaseSettings): + MONGO_URI: str = os.getenv("MONGO_URI") # mongodb://oegmongo:27017 + SRM_HOST: str = os.getenv("SRM_HOST") # http://srm:8080/srm/1.0.0 + PI_EDGE_USERNAME: str = os.getenv("PI_EDGE_USERNAME") + PI_EDGE_PASSWORD: str = os.getenv("PI_EDGE_PASSWORD") + HTTP_PROXY: str = os.getenv("HTTP_PROXY") + FEDERATION_MANAGER_HOST = os.getenv("FEDERATION_MANAGER_HOST") + TOKEN_ENDPOINT = os.getenv('TOKEN_ENDPOINT') + PARTNER_API_ROOT = os.getenv('PARTNER_API_ROOT') +``` + +Issues: +- `os.getenv()` is called at class definition time, **before** `BaseSettings` can pull from `.env`. This means `.env` is loaded by `load_dotenv()` first, but `os.getenv()` in the class body runs during module import — correct only by accident. +- `FEDERATION_MANAGER_HOST`, `TOKEN_ENDPOINT`, and `PARTNER_API_ROOT` have **no type annotation**, so Pydantic does not validate them. +- `HTTP_PROXY` and `PI_EDGE_USERNAME/PASSWORD` are defined but `HTTP_PROXY` is never used (it's passed to `proxies` dict in `edge_cloud_services.py` but that code path is commented out). + +### 7.3 Controllers + +#### `app_controllers.py` + +The most complex controller — handles both local and federated instantiation in a single function. + +**`submit_app(body: dict)`** +The `body` argument is injected by Connexion after schema validation. The function creates a new `PiEdgeAPIClientFactory` on every single invocation, then calls `api_client.submit_app(body)` which POSTs directly to SRM `/serviceFunction`. The raw SRM response (a `dict`) is returned directly without any CAMARA-format wrapping. Consequently, if SRM returns a 201 with `{appId: ...}`, the OEG returns that same dict with an implicit 200, not the 201 the spec promises. + +**`create_app_instance()`** +This is the most complex function in the codebase. It reads the JSON body directly via `request.get_json()` (bypassing Connexion's injection, which already validated it). The bifurcation logic: + +```python +zone = get_zone(app_zones[0].get('EdgeCloudZone').get('edgeCloudZoneId')) +if zone.get('isLocal') == 'false': + # FEDERATED PATH — 8 steps + ... +# LOCAL PATH +response = pi_edge_client.deploy_service_function(data=body) +``` + +The federated path is an **8-step orchestration**: +1. Fetch app metadata from SRM +2. Build GSMA artefact payload (translating CAMARA fields to GSMA-OPG fields) +3. POST artefact to FM `/{fedContextId}/artefact` +4. If artefact creation returns 200 or 409 (already exists), proceed +5. Build GSMA onboard-app payload +6. POST to FM `/{fedContextId}/application/onboarding` +7. If onboarding returns 200, build GSMA deploy payload +8. POST to FM `/{fedContextId}/application/lcm` + +The local path has debug `print()` statements left in production code and uses a try/except that catches `Exception` and returns `202` with a warning — meaning errors in SRM communication are silently swallowed and reported as accepted. + +**`get_app_instance(app_id=None, x_correlator=None, app_instance_id=None, region=None)`** +Only handles the case `app_id is None AND app_instance_id is None`. All other filter combinations (specific appId, specific instanceId, by region) hit the `if not instances` branch and return 404. The filtering logic is entirely missing. + +#### `edge_cloud_controller.py` + +Has a **module-level side effect on import**: + +```python +try: + pi_edge_factory = PiEdgeAPIClientFactory() + api_client = pi_edge_factory.create_pi_edge_api_client() + zones = api_client.edge_cloud_zones() # HTTP call to SRM at import time + for zone in zones: + zone['_id'] = zone.get('edgeCloudZoneId') + zone['isLocal'] = 'true' + insert_zones(zones) # MongoDB write at import time +except Exception as e: + logger.error(e.args) # silently swallowed +``` + +This fires every time the module is imported (including during tests). If SRM is not available at startup, the `zones` collection will be empty but the application continues — leading to every instantiation request for "local" zones failing to find a zone record. + +**`get_edge_cloud_zones`** defines filter helpers that are never applied: + +```python +def query_region_matches(zone: EdgeCloudZone) -> bool: + return query_params.region is None or zone.edgeCloudRegion == query_params.region + +def query_status_matches(zone: EdgeCloudZone) -> bool: + return query_params.status is None or zone.edgeCloudZoneStatus == query_params.status + +response = [EdgeCloudZone(**zone).model_dump() for zone in get_all_cloud_zones()] +# ↑ filter functions are defined above but never called here +``` + +The `region` and `status` query parameters are accepted, validated, but then completely ignored. + +**`get_federated_zones`** is a stub returning an empty list — federated zones from the MongoDB `zones` collection are never surfaced through this path. + +#### `federation_manager_controller.py` + +The hardcoded Basic-auth header is the most serious security issue in the entire codebase: + +```python +token_headers = { + 'Authorization': 'Basic b3JpZ2luYXRpbmctb3AtMTpkZDd2TndGcWpOcFl3YWdobEV3TWJ3MTBnMGtsV0RIYg==', + 'Content-Type': 'application/x-www-form-urlencoded' +} +data = {'grant_type': 'client_credentials', 'scope': 'fed-mgmt'} +``` + +The Base64 value decodes to `originating-op-1:dd7vNwFqjNpYwaghlEwMbw10g0klWDHb` — hardcoded client credentials for the Keycloak realm. + +`create_federation()` fetches a fresh token from Keycloak, stores it in MongoDB alongside the `federationContextId`. Subsequent calls (`get_federation`, `delete_federation`) retrieve this stored token from MongoDB. There is no token refresh mechanism — the stored token will expire (Keycloak default: 300 seconds) and all subsequent federation calls will fail with 401 until a new `POST /partner` is issued. + +**`__get_token()`** (used by the unexposed functions) reads the token from the incoming request's `Authorization` header — an entirely different strategy than the rest of the controller. + +#### `network_functions_controller.py` + +The simplest controller — pure proxy for QoD and Traffic Influence. Each function creates a new `PiEdgeAPIClientFactory` per call and forwards to SRM. No state management. Commented-out Pydantic validation code is left in all functions. + +#### `security_controller.py` + +Implements JWT decode and OAuth2 scope validation, but **never called**. The `security:` blocks in `openapi.yaml` are all commented out: + +```yaml +# security: +# - openId: +# - edge-application-management:apps:write +``` + +And the security schemes are commented out too: + +```yaml +# securitySchemes: +# jwt: +# type: http +# scheme: bearer +# x-bearerInfoFunc: edge_cloud_management_api.controllers.security_controller.decode_token +``` + +### 7.4 Service Clients + +#### `PiEdgeAPIClient` (`edge_cloud_services.py`) + +The SRM proxy. Wraps `requests` with per-method error handling. Notable issues: + +- **`_authenticate()` is never called.** The method exists, logs in via `POST /authentication`, and stores a token. But `_get_headers()` has the authentication call commented out — every request goes to SRM without any bearer token: + +```python +def _get_headers(self): + # if not self.token: + # self._authenticate() + return { + # "Authorization": f"Bearer {self.token}", + "Content-Type": "application/json", + } +``` + +- **`verify=False` everywhere:** All `requests.get/post/delete` calls pass `verify=False`, disabling TLS certificate verification. + +- **`requests_session` referenced but never set:** `_authenticate()` uses `self.requests_session.post(...)` but `__init__` never creates `self.requests_session`. This would raise `AttributeError` if authentication were ever called. + +- **`edge_cloud_zones()` has no try/except:** Unlike every other method, this one lets exceptions propagate unhandled (noted by the `#try:` comment). + +- **Error return type mixing:** Most methods return `{"error": ..., "status_code": ...}` dicts on failure but callers check `isinstance(response, dict) and "error" in response` inconsistently. + +#### `FederationManagerClient` (`federation_services.py`) + +The FM proxy. More consistent error handling than `PiEdgeAPIClient`. Adds three custom headers to every request: + +```python +headers['Authorization'] = 'Bearer ' + token +headers['X-Partner-API-Root'] = self.partner_root # from PARTNER_API_ROOT env var +headers['X-Internal'] = 'true' +headers['Content-Type'] = 'application/json' +headers['Accept'] = 'application/json' +``` + +`X-Partner-API-Root` is passed down to FM, which uses it to know the partner OP's API base URL. `X-Internal` signals to FM that the request is coming from OEG (not from a partner OP). + +**`deploy_app_partner`** returns the raw `requests.Response` object (not `.json()`), while all other methods return parsed JSON or error dicts. This makes `create_app_instance` return the raw response object directly to Connexion, which then tries to serialise it and fails silently. + +**`onboard_application_to_partners`** is defined as a static-like method **inside the `FederationManagerClientFactory` class** — a design mistake. It has its own SRM client creation logic, its own federation client, and its own error handling, but is never called from anywhere. + +### 7.5 Storage Service + +**`storage_service.py`** — creates a new `pymongo.MongoClient` on every function call: + +```python +def get_zone(zone_id: str): + myclient = pymongo.MongoClient(storage_url) # new TCP connection per call + mydbmongo = myclient[mydb_mongo] + col = mydbmongo["zones"] + zone = col.find_one({'_id': zone_id}) + return zone + # connection is never explicitly closed +``` + +This happens for every `get_zone`, `insert_zones`, `get_fed`, `insert_federation`, `get_all_feds`, `delete_fed`, and `delete_partner_zones` call — potentially dozens of open MongoDB connections per request in the federated instantiation flow. + +The connection is never closed (no `myclient.close()`). pymongo will garbage-collect them eventually but under load this causes connection pool exhaustion. + +The `isLocal` field is stored as the **string** `'true'` or `'false'`, not as a Python/BSON boolean. Comparisons throughout use `zone.get('isLocal') == 'false'` (string comparison). + +`insert_zones` uses `insert_many` with no upsert logic. If called twice (e.g., server restart), it will insert duplicate zone records, causing `find_one` to return the first match non-deterministically. + +### 7.6 MongoDB Manager + +**`managers/db_manager.py`** — a well-designed context manager with a connection pool (`maxPoolSize=50`) that is almost entirely unused. The app uses `storage_service.py` instead: + +```python +class MongoManager: + def __init__(self, mongo_uri=config.MONGO_URI): + self.client = MongoClient(mongo_uri, maxPoolSize=50) + ... + def __enter__(self): return self + def __exit__(self, ...): self.close_connection() +``` + +There are commented-out usages in controllers (e.g., `with MongoManager() as db: db.insert_document(...)`) suggesting a refactoring was started and abandoned midway. + +### 7.7 Security Controller + +**`controllers/security_controller.py`** — fully implemented but dead code: + +```python +def decode_token(token: str): + JWT_ISSUER = os.environ.get('JWT_ISSUER') + PUBLIC_KEY = os.environ.get('JWT_PUBLIC_KEY') + try: + return jwt.decode(token, key=PUBLIC_KEY, algorithms=["RS256"], issuer=JWT_ISSUER) + except JWTError as e: + raise Unauthorized from e + +def check_oAuth2ClientCredentials(token): + return {'scopes': ['fed-mgmt'], 'uid': 'test_value'} # always returns success + +def validate_scope_oAuth2ClientCredentials(required_scopes, token_scopes): + return set(required_scopes).issubset(set(token_scopes)) +``` + +`check_oAuth2ClientCredentials` **always returns success** with a hardcoded `test_value` uid — this was likely written for local testing and never replaced with a real implementation. + +### 7.8 Logging + +**`managers/log_manager.py`** creates a logger but **never attaches the handler**: + +```python +logger = logging.getLogger("Edge Cloud Management API") +logger.setLevel(logging.INFO) +formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") +handler = logging.StreamHandler(sys.stdout) +handler.setFormatter(formatter) +# logger.addHandler(handler) ← MISSING +``` + +The `handler` is created and formatted but the `addHandler` call is absent. All `logger.info/error/debug` calls across the codebase produce no output unless the root logger is configured elsewhere (which it is not). + +--- + +## 8. Key Functional Flows + +### 8.1 Application Submit (`POST /apps`) + +``` +Client → POST /oeg/1.0.0/apps {AppManifest JSON} + → Connexion validates body against OpenAPI schema (strict_validation=True) + → app_controllers.submit_app(body: dict) + → PiEdgeAPIClientFactory().create_pi_edge_api_client() + → PiEdgeAPIClient.submit_app(body) + → requests.post(SRM_HOST/serviceFunction, json=body, verify=False) + → response.raise_for_status() + → return response.json() + ← raw SRM response dict (not CAMARA-wrapped) +← 200 OK (spec says 201 — status code mismatch) +``` + +The `appId` returned is generated by SRM, not by OEG. No UUID is generated at the OEG layer. + +### 8.2 Application Instantiation — Local Zone + +``` +Client → POST /oeg/1.0.0/appinstances {appId, appZones: [{EdgeCloudZone: {edgeCloudZoneId}}]} + → Connexion validates body + → app_controllers.create_app_instance() + → body = request.get_json() # manual body re-read (unnecessary) + → get_zone(edgeCloudZoneId) # MongoDB lookup + → zone['isLocal'] == 'true' # string comparison + → PiEdgeAPIClient.deploy_service_function(data=body) + → requests.post(SRM_HOST/deployedServiceFunction, json=body) + → response.raise_for_status() + → return response.json() + → if error dict: return 202 with warning + → else: return SRM response directly +← 202 (best case) or 202 with warning (SRM unavailable) +``` + +### 8.3 Application Instantiation — Federated Zone + +``` +Client → POST /oeg/1.0.0/appinstances {appId, appZones: [{EdgeCloudZone: {edgeCloudZoneId}}]} + → get_zone(edgeCloudZoneId) → {isLocal:'false', fedContextId} + → [Step 1] PiEdgeAPIClient.get_app(appId) + → GET SRM_HOST/serviceFunction/{appId} → appManifest + → [Step 2] Build GSMA artefact: + artefact = { + artefactId: app_id, + appProviderId: appData['appProvider'], + artefactName: appData['name'], + artefactVersionInfo: appData['version'], + repoType: appData['appRepo']['type'], + artefactRepoLocation: {repoURL, userName, password, token:''}, + componentSpec: [{ + componentName, numOfInstances:0, + restartPolicy:'RESTART_POLICY_ALWAYS', + exposedInterfaces: [{interfaceId:'', commProtocol, commPort, visibilityType, ...}], + compEnvParams: [], persistentVolumes: [] + }] + } + → [Step 3] get_fed(fedContextId) → {token} + → [Step 4] FederationManagerClient.create_artefact(artefact, fedContextId, token) + → POST FM/{fedContextId}/artefact (Bearer token, X-Partner-API-Root, X-Internal:true) + → returns raw requests.Response + → [Step 5] if status 200 or 409: + onboard_app = { + appId, appProviderId, + appDeploymentZones: [], + appMetaData: {appName, version}, + appComponentSpecs: [{serviceNameNB, serviceNameEW, componentName, artefactId}] + } + → [Step 6] FederationManagerClient.onboard_application(fedContextId, onboard_app, token) + → POST FM/{fedContextId}/application/onboarding + → returns response.json() (NOT raw response — different from create_artefact) + → [Step 7] if onboard status 200: + deploy_app = { + appId, appVersion, appProviderId, + zoneInfo: {zoneId: zone['edgeCloudZoneId']} + } + → [Step 8] FederationManagerClient.deploy_app_partner(fedContextId, deploy_app, token) + → POST FM/{fedContextId}/application/lcm + → returns raw requests.Response ← returned directly to Connexion (bug) +← raw requests.Response object (Connexion cannot serialise this) +``` + +### 8.4 Federation Lifecycle + +**Establish (`POST /partner`):** +``` +Admin → POST /partner {origOPFederationId, initialDate, partnerStatusLink} + → federation_manager_controller.create_federation() + → requests.post(TOKEN_ENDPOINT, headers=hardcoded_basic_auth, data=client_credentials) + → Keycloak /realms/federation/protocol/openid-connect/token + → access_token + → FederationManagerClient.post_partner(body, token) + → POST FM/partner (Bearer + X-Partner-API-Root + X-Internal:true) + → FM relays to Partner OP + → {federationContextId, offeredAvailabilityZones, partnerOPFederationId, ...} + → insert_zones(offeredAvailabilityZones) — partner zones marked isLocal:'false' + → insert_federation({_id: federationContextId, token: access_token}) +← 200 {FederationResponseData} +``` + +**Query (`GET /{fedContextId}/partner`):** +``` +Admin → GET /{fedContextId}/partner + → get_fed(fedContextId) → {token} # may be expired + → FederationManagerClient.get_partner(fedContextId, token) + → GET FM/{fedContextId}/partner +← 200 {zone details, network codes, endpoints} +``` + +**Terminate (`DELETE /{fedContextId}/partner`):** +``` +Admin → DELETE /{fedContextId}/partner + → get_fed(fedContextId) → {token} + → FederationManagerClient.delete_partner(fedContextId, token) + → DELETE FM/{fedContextId}/partner + → if response has content: + delete_fed(fedContextId) # remove from MongoDB + delete_partner_zones() # remove ALL isLocal:'false' zones +← 200 +``` + +Note: `delete_partner_zones()` deletes **all** non-local zones, regardless of `fedContextId`. In a multi-federation scenario, tearing down one federation would delete all other partners' zones too. + +### 8.5 Edge Cloud Zone Discovery + +``` +Client → GET /edge-cloud-zones?region=X&status=active + → edge_cloud_controller.get_edge_cloud_zones(x_correlator, region, status) + → EdgeCloudQueryParams validation (region/status validated) + → get_all_cloud_zones() + → get_local_zones() + → PiEdgeAPIClient.edge_cloud_zones() → GET SRM/node → list + → get_federated_zones() → [] (stub, always empty) + → [EdgeCloudZone(**zone).model_dump() for zone in all_zones] + → region/status filters defined but never applied +← 200 [list of all zones, unfiltered] +``` + +At startup (module import), local zones are fetched from SRM and inserted into MongoDB. But `get_edge_cloud_zones` **ignores MongoDB** entirely — it calls SRM live every time. The MongoDB zone store is only used by `create_app_instance` to determine `isLocal`. + +### 8.6 QoD Session and Traffic Influence + +Pure transparent proxies to SRM. No state management, no transformation: + +``` +Client → POST /sessions {device, qosProfile, duration, sink} + → network_functions_controller.create_qod_session(body) + → PiEdgeAPIClient.create_qod_session(body) + → POST SRM/sessions {body} + → if 200: return response.json() + → if 500: return response.content ← bytes, not JSON +← SRM response (structure unknown without SRM spec) +``` + +--- + +## 9. Integration with External Components + +### 9.1 Service Resource Manager (SRM) + +| OEG endpoint | SRM endpoint | Method | Notes | +|---|---|---|---| +| `POST /apps` | `/serviceFunction` | POST | Direct forward of full body | +| `GET /apps` | `/serviceFunction` | GET | Returns list | +| `GET /apps/{id}` | `/serviceFunction/{id}` | GET | Returns `{appManifest: {...}}` | +| `DELETE /apps/{id}` | `/serviceFunction/{id}` | DELETE | Returns text | +| `POST /appinstances` | `/deployedServiceFunction` | POST | Local zones only | +| `GET /appinstances` | `/deployedServiceFunction` | GET | No filter support | +| `DELETE /appinstances/{id}` | `/deployedServiceFunction/{id}` | DELETE | Returns raw response | +| `GET /edge-cloud-zones` | `/node` | GET | Live per request | +| `GET /edge-cloud-zones/{id}` | `/node/{id}` | GET | Live per request | +| `POST /sessions` | `/sessions` | POST | QoD | +| `GET /sessions/{id}` | `/sessions/{id}` | GET | QoD | +| `DELETE /sessions/{id}` | `/sessions/{id}` | DELETE | QoD | +| `POST /traffic-influences` | `/traffic-influences` | POST | TI | +| `GET /traffic-influences` | `/traffic-influences/` | GET | Trailing slash | +| `GET /traffic-influences/{id}` | `/traffic-influences/{id}` | GET | TI | +| `DELETE /traffic-influences/{id}` | `/traffic-influences/{id}` | DELETE | TI | + +All calls use `Content-Type: application/json` with **no authentication** and `verify=False`. + +### 9.2 Federation Manager (FM) + +| OEG call | FM endpoint | Purpose | +|---|---|---| +| `post_partner` | `POST /partner` | Establish federation | +| `get_partner` | `GET /{id}/partner` | Query federation | +| `delete_partner` | `DELETE /{id}/partner` | Tear down federation | +| `get_federation_context_ids` | `GET /fed-context-id` | List context IDs | +| `create_artefact` | `POST /{id}/artefact` | Upload app artefact | +| `onboard_application` | `POST /{id}/application/onboarding` | Register app at partner | +| `deploy_app_partner` | `POST /{id}/application/lcm` | Deploy at partner | +| `request_zone_sync` | `POST /{id}/zones` | Zone sync (unexposed) | +| `get_zone_resource_info` | `GET /{id}/zones/{zoneId}` | Zone info (unexposed) | +| `remove_zone_sync` | `DELETE /{id}/zones/{zoneId}` | Zone remove (unexposed) | + +FM authentication: Bearer token obtained from Keycloak (stored in MongoDB, not refreshed). +Custom headers: `X-Partner-API-Root` (partner's base URL), `X-Internal: true`. + +### 9.3 Keycloak + +- Realm: `federation` +- Flow: Client Credentials (`grant_type=client_credentials`, `scope=fed-mgmt`) +- Client: `originating-op-1` / `dd7vNwFqjNpYwaghlEwMbw10g0klWDHb` (hardcoded) +- Endpoint: `TOKEN_ENDPOINT` env var (typically `http://federation-manager.federation-manager.svc.cluster.local:8080/realms/federation/protocol/openid-connect/token`) + +--- + +## 10. Deployment and Infrastructure + +### 10.1 Docker + +```dockerfile +FROM python:3.12-alpine +RUN mkdir -p /usr/src/app +WORKDIR /usr/src/app +COPY requirements.txt /usr/src/app/ +RUN pip install --no-cache-dir --trusted-host pypi.org --trusted-host files.pythonhosted.org -r requirements.txt +COPY . /usr/src/app +EXPOSE 8080 +ENTRYPOINT ["python"] +CMD ["-m", "edge_cloud_management_api"] +``` + +Issues: `--trusted-host` flags disable pip's TLS verification for PyPI; no `.dockerignore` so tests, git history, and local configs are copied into the image. + +### 10.2 Kubernetes (Raw Manifests — `deploy/oeg.yaml`) + +Deploys: +- `oegmongo` — MongoDB Deployment + Service (ClusterIP:27017) + PV/PVC (50Mi, hostPath `/mnt/data/mongodb_oeg`) +- `oegcontroller` — OEG Deployment (1 replica, `imagePullPolicy: Always`) + Service (ClusterIP:80→8080) +- Ingress via Traefik at `isiath.duckdns.org/oeg` + +Environment variables in the Deployment: +```yaml +MONGO_URI: mongodb://oegmongo:27017 +SRM_HOST: http://srm:8080/srm/1.0.0 +FEDERATION_MANAGER_HOST: http://federation-manager.federation-manager.svc.cluster.local:8989/operatorplatform/federation/v1 +PARTNER_API_ROOT: http://10.8.0.1:31002 +TOKEN_ENDPOINT: http://federation-manager.federation-manager.svc.cluster.local:8080/realms/federation/protocol/openid-connect/token +``` + +No liveness or readiness probes configured. + +### 10.3 Helm Chart + +Umbrella chart `oop-platform` with three sub-charts: `oeg`, `srm`, `federation-manager`. + +**KIND cluster config** (`kind-oop-config.yaml`): +- Single control-plane node +- Port mappings: 32263 (OEG), 32415 (SRM), 30080 (Artefact Manager), 30081 (Keycloak), 30989 (FM) +- Host mounts: `/tmp/kind-oop/mongodb_srm` and `/tmp/kind-oop/mongodb_oeg` + +**OEG sub-chart** (`charts/oeg/templates/oegcontroller-deployment.yaml`): +- RollingUpdate strategy (`maxSurge:1`, `maxUnavailable:0`) +- Liveness and readiness probes are **commented out** +- No Kubernetes Secret for credentials — all env vars are plaintext in `values.yaml` + +**Serious issue in values.yaml:** A long-lived Kubernetes ServiceAccount JWT for the SRM (`kubernetesMasterToken`) is **committed in plaintext** in the default values file. This token has a 1-year expiry from when it was generated. + +### 10.4 Service Topology (Kubernetes) + +``` +Namespace: oop + oegcontroller (NodePort 32263) ─┬→ srm (ClusterIP :8080) + └→ mongodb (ClusterIP :27017) + + federation-manager talks to: federation-manager.federation-manager.svc.cluster.local + +Namespace: federation-manager + federation-manager (NodePort 30989) + keycloak (NodePort 30081) + mongodb (ClusterIP :27017) +``` + +--- + +## 11. Testing + +### Structure + +``` +tests/ +├── unit/ — pure Python, no external deps +│ └── controllers/ +│ ├── test_edge_cloud_controller.py (7 parametrized test cases) +│ └── test_federation_manager_controller.py (4 test cases) +├── component/ — requires Flask app context, mocks external services +│ └── controllers/ +│ └── test_app_controllers.py (4 test cases) +└── fixtures/ + ├── edge-cloud-zones.json (3 sample zones) + ├── submit-app-sample.json (complete AppManifest) + └── mongo-apps-test-collection-dump.json +``` + +### What Is Tested + +- `get_edge_cloud_zones` — parametrized with region/status combos; validates that unknown status returns 400; validates that all other combos return 200 with all zones (filter is not implemented so all always return all) +- `create_federation`, `get_federation`, `delete_federation`, `get_federation_context_ids` — basic happy-path mocking of factory client +- `delete_app`, `create_app_instance`, `get_app_instance`, `delete_app_instance` — basic happy-path mocking + +### What Is NOT Tested + +- Federated instantiation path (the 8-step orchestration) +- Error paths (SRM timeout, MongoDB failure) +- Schema validation (no end-to-end Connexion tests against the YAML) +- The `network_functions_controller` +- `storage_service.py` +- `edge_cloud_services.py` (the HTTP client layer) +- `federation_services.py` +- The security controller + +`tests/unit/managers/test_db_manager.py.old` — the MongoDB manager tests were disabled (renamed `.old`) and never replaced. + +### Test Configuration + +```ini +# tox.ini +[tox] +env_list = lint, type, 3.1{2} + +[testenv] +passenv = MONGO_URI +commands = + pytest --cov=edge_cloud_management_api --cov-report=xml --cov-report=term-missing tests/ +``` + +`mongomock` is listed as a dependency but is not actively used in any current test — tests mock the factory directly rather than the DB layer. + +--- + +## 12. CI/CD + +### GitHub Actions (`.github/workflows/edge-cloud-management-api.yml`) + +Triggers on push to `main` / `releases/**` and PRs to `main`. + +```yaml +services: + mongodb: + image: mongo:6.0 + ports: + - 27017:27017 +env: + MONGO_URI: "mongodb://test_admin:test_password@localhost:27017/test_db?authSource=admin" +``` + +Runs `tox -e 3.12` (lint + type check + tests). No Docker build, no image push in this pipeline. + +### GitLab CI (`.gitlab-ci.yml`) + +Two stages, only on `main`: +1. `build` — `docker build -t $IMAGE_NAME:$IMAGE_TAG .` +2. `push` — `docker login` + `docker tag` + `docker push` to GitLab registry + +No test step in the GitLab pipeline — the assumption is tests are covered by GitHub Actions. + +--- + +## 13. What Is Good + +| Area | Positive | +|---|---| +| **API design** | Full CAMARA EdgeCloud API coverage — GSMA-compliant AppManifest with discriminated union for 4 infra types | +| **OpenAPI-first** | Connexion routes from YAML spec; `strict_validation=True`; Swagger UI auto-generated at `/docs` | +| **Data model richness** | KubernetesResources with CPU pool, GPU pool, SR-IOV networking, addons; complete federation model | +| **Factory pattern** | `PiEdgeAPIClientFactory` and `FederationManagerClientFactory` make mocking clean in tests | +| **Pydantic v2** | Used for request/response validation; `model_dump()` works correctly | +| **Test structure** | Clear unit/component separation; `@pytest.mark.unit` / `@pytest.mark.component` markers | +| **Helm umbrella** | Full OOP platform deployable as one `helm install`; sub-chart isolation | +| **RollingUpdate** | `maxSurge:1, maxUnavailable:0` configured for zero-downtime deploys | +| **Correlation ID** | `x-correlator` header defined and wired through all API endpoints | +| **Error model** | Consistent `ErrorInfo {status, code, message}` schema with CAMARA error codes | +| **uv** | Modern Python dependency management; `uv.lock` for reproducible installs | +| **ruff + mypy** | Static analysis infrastructure in place | +| **Federation flow** | The 8-step GSMA OPG federation-based deployment is architecturally correct | + +--- + +## 14. What Is Bad — Detailed Issue Catalogue + +### 14.1 Security (Critical) + +**SEC-1: No authentication enforced** +All `security:` blocks in `openapi.yaml` are commented out. Every endpoint is publicly accessible. The `security_controller.py` with JWT/OAuth2 logic exists but is never wired in. + +**SEC-2: Hardcoded client credentials** +`federation_manager_controller.py` line 13–14 has a hardcoded Base64 Basic-auth header (`b3JpZ2luYXRpbmctb3AtMTpkZDd2TndGcWpOcFl3YWdobEV3TWJ3MTBnMGtsV0RIYg==` = `originating-op-1:dd7vNwFqjNpYwaghlEwMbw10g0klWDHb`). These are OAuth2 client credentials for Keycloak committed to source code. + +**SEC-3: Kubernetes ServiceAccount token in Helm values** +`values.yaml` contains a long-lived (1-year) Kubernetes ServiceAccount JWT under `srm.srmcontroller.env.kubernetesMasterToken`. This grants cluster API access. + +**SEC-4: TLS verification disabled** +All `requests` calls use `verify=False`, disabling certificate validation on every outbound connection to SRM and FM. Susceptible to MITM. + +**SEC-5: pip `--trusted-host` in Dockerfile** +Disables TLS verification for PyPI during Docker builds. + +**SEC-6: `check_oAuth2ClientCredentials` always succeeds** +The OAuth2 token check stub always returns `{'scopes': ['fed-mgmt'], 'uid': 'test_value'}` regardless of the actual token, meaning if security were re-enabled, any token would pass. + +### 14.2 Correctness + +**BUG-1: `get_edge_cloud_zones` filter is dead code** +`query_region_matches` and `query_status_matches` are defined as inner functions but never called. The `region` and `status` query parameters are accepted and validated but silently ignored. + +**BUG-2: `get_app_instance` ignores all filters** +The function only handles the `app_id is None AND app_instance_id is None` case and returns 404 for all other filter combinations. + +**BUG-3: `create_app_instance` returns raw `requests.Response` for federated path** +`deploy_app_partner` returns a raw `requests.Response` object, which Connexion cannot serialise, causing an internal server error on the federated deploy path. + +**BUG-4: `get_federated_zones` is a stub returning `[]`** +Federated zones are in MongoDB but never returned by `get_edge_cloud_zones`. The endpoint only shows local zones. + +**BUG-5: Status code mismatch** +`POST /apps` (spec: 201) returns whatever SRM returns (200). `POST /appinstances` (spec: 202) returns 200 from SRM or 202 with warning. Clients relying on HTTP status codes for flow control will break. + +**BUG-6: `insert_zones` on startup creates duplicates** +Module-level code in `edge_cloud_controller.py` calls `insert_zones` without checking for existing records. Every pod restart inserts duplicate zone documents. + +**BUG-7: Token expiry not handled** +Keycloak tokens (default 300s) are stored in MongoDB but never refreshed. All FM operations fail silently after the token expires, returning 401 upstream. + +**BUG-8: `delete_partner_zones` deletes all partner zones** +When a single federation is torn down, all zones with `isLocal:'false'` are deleted, affecting other active federation contexts. + +**BUG-9: `requests_session` never initialised** +`PiEdgeAPIClient._authenticate()` references `self.requests_session` which is never set in `__init__`. If authentication were enabled, this would raise `AttributeError`. + +**BUG-10: `isLocal` stored as string not boolean** +`isLocal='true'` / `isLocal='false'` — string comparison throughout. One typo would make zones permanently appear as remote or local. + +**BUG-11: `FixedNetworkIds` uses Pydantic v1 `__root__`** +`class FixedNetworkIds(BaseModel): __root__: List[str]` — this pattern is removed in Pydantic v2 (should be `RootModel[List[str]]`). This model will fail silently. + +**BUG-12: Logger handler never attached** +`log_manager.py` creates and formats a `StreamHandler` but never calls `logger.addHandler(handler)`. All logging statements throughout the codebase are silent. + +### 14.3 Architecture / Design + +**ARCH-1: No MongoDB connection pooling** +`storage_service.py` opens a new `MongoClient` TCP connection per function call, while the proper pooled `MongoManager` class is unused. Under load this causes connection exhaustion. + +**ARCH-2: Module-level side effects** +`edge_cloud_controller.py` and the top of `federation_manager_controller.py` perform HTTP calls and DB writes at import time. This causes startup failures when dependencies are unavailable, and makes testing hard (module import triggers real network calls). + +**ARCH-3: Mixed return types from controllers** +Some controllers return `(jsonify(dict), int)`, others return `(dict, int)`, others return a raw `requests.Response`. Connexion handles these inconsistently. + +**ARCH-4: Duplicate body parsing** +`create_app_instance` re-reads `request.get_json()` even though Connexion already parsed and validated the body and would inject it as a parameter if the function signature matched the operationId's expected parameters. + +**ARCH-5: Business logic in factory class** +`FederationManagerClientFactory.onboard_application_to_partners` is a full business logic function placed inside a factory class, with unreachable code. + +**ARCH-6: Sync Flask under uvicorn** +All request handlers are synchronous, blocking I/O. uvicorn is running them as sync functions, meaning the event loop is blocked during every HTTP call to SRM/FM. FastAPI with async handlers would allow genuine concurrency. + +**ARCH-7: `x-correlator` not propagated** +The correlation ID is accepted from the client but never forwarded in outbound HTTP headers to SRM or FM, breaking distributed tracing. + +**ARCH-8: Pydantic models disconnected from OpenAPI spec** +`application_models.py` has stricter field patterns than `openapi.yaml` (e.g., `name` regex). They can diverge silently. + +**ARCH-9: `print()` statements in production** +`create_app_instance` has three `print()` calls dumping base URLs, headers, and full request bodies to stdout. Sensitive data (credentials in the body) would appear in logs. + +**ARCH-10: No health/readiness endpoint** +No `/health` or `/ready` route. Kubernetes liveness and readiness probes are commented out in the Helm chart, meaning unhealthy pods receive traffic indefinitely. + +**ARCH-11: Trailing slash inconsistency** +`get_all_traffic_influence_resources` calls `GET {base_url}/traffic-influences/` (trailing slash) while the spec defines `/traffic-influences`. SRM may return 301 or 404. + +**ARCH-12: `HTTP_PROXY` configured but unused** +The proxy is set up in `edge_cloud_services.py` as a `proxies` dict but never passed to any `requests` call. + +--- + +## 15. FastAPI Migration — Full Specification + +### 15.1 Project Structure + +``` +oeg_fastapi/ +├── main.py # FastAPI app + lifespan +├── config.py # Pydantic-settings v2 BaseSettings +├── deps.py # Shared FastAPI dependencies +├── routers/ +│ ├── apps.py # /apps, /appinstances +│ ├── zones.py # /edge-cloud-zones +│ ├── federation.py # /partner, /fed-context-id +│ └── network.py # /sessions, /traffic-influences +├── models/ +│ ├── app_models.py # AppManifest, AppInstance, AppInstanceInfo +│ ├── zone_models.py # EdgeCloudZone, EdgeCloudZoneStatus +│ ├── federation_models.py # FederationRequestData, FederationResponseData +│ ├── network_models.py # QoDSchema, TrafficInfluenceSchema +│ └── errors.py # ErrorInfo, CAMARA error codes +├── services/ +│ ├── srm_client.py # Async SRM client (httpx.AsyncClient) +│ ├── fm_client.py # Async FM client (httpx.AsyncClient) +│ └── token_service.py # Keycloak token fetch + refresh +├── storage/ +│ ├── zone_repo.py # Async zone CRUD (motor) +│ └── federation_repo.py # Async federation CRUD (motor) +├── middleware/ +│ ├── correlation_id.py # x-correlator propagation middleware +│ └── logging.py # Structured request/response logging +└── security/ + └── auth.py # OAuth2/JWT FastAPI Security dependency +``` + +### 15.2 Core Application (`main.py`) + +```python +from contextlib import asynccontextmanager +from fastapi import FastAPI +from motor.motor_asyncio import AsyncIOMotorClient +import httpx + +@asynccontextmanager +async def lifespan(app: FastAPI): + # Startup + app.state.mongo = AsyncIOMotorClient(settings.MONGO_URI) + app.state.srm_client = httpx.AsyncClient( + base_url=settings.SRM_HOST, + verify=settings.SSL_CA_BUNDLE or True, + headers={"Content-Type": "application/json"}, + ) + app.state.fm_client = httpx.AsyncClient( + base_url=settings.FEDERATION_MANAGER_HOST, + verify=settings.SSL_CA_BUNDLE or True, + ) + await seed_local_zones(app) # replaces module-level side effect + yield + # Shutdown + app.state.mongo.close() + await app.state.srm_client.aclose() + await app.state.fm_client.aclose() + +app = FastAPI( + title="Open Exposure Gateway", + version="1.0.1", + lifespan=lifespan, +) +app.add_middleware(CorrelationIdMiddleware) +app.add_middleware(RequestLoggingMiddleware) +app.include_router(apps_router, prefix="/oeg/1.0.0") +app.include_router(zones_router, prefix="/oeg/1.0.0") +app.include_router(federation_router, prefix="/oeg/1.0.0") +app.include_router(network_router, prefix="/oeg/1.0.0") +``` + +### 15.3 Configuration (`config.py`) + +```python +from pydantic_settings import BaseSettings, SettingsConfigDict + +class Settings(BaseSettings): + model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8") + + MONGO_URI: str + SRM_HOST: str + FEDERATION_MANAGER_HOST: str + TOKEN_ENDPOINT: str + PARTNER_API_ROOT: str + SSL_CA_BUNDLE: str | None = None # path to CA bundle; None = system default + + # Keycloak — loaded from env/secrets, not hardcoded + KEYCLOAK_CLIENT_ID: str + KEYCLOAK_CLIENT_SECRET: str + KEYCLOAK_SCOPE: str = "fed-mgmt" + + # JWT validation + JWT_ISSUER: str | None = None + JWT_PUBLIC_KEY: str | None = None +``` + +### 15.4 Dependency Injection (`deps.py`) + +```python +from fastapi import Depends, Request +from motor.motor_asyncio import AsyncIOMotorDatabase +import httpx + +def get_db(request: Request) -> AsyncIOMotorDatabase: + return request.app.state.mongo["oeg_storage"] + +def get_srm_client(request: Request) -> httpx.AsyncClient: + return request.app.state.srm_client + +def get_fm_client(request: Request) -> httpx.AsyncClient: + return request.app.state.fm_client +``` + +### 15.5 Example Router (`routers/apps.py`) + +```python +from fastapi import APIRouter, Depends, HTTPException, status +from models.app_models import AppManifest, SubmittedApp, AppInstanceInfo, CreateAppInstanceRequest +from services.srm_client import SRMClient +from services.fm_client import FMClient +from storage.zone_repo import ZoneRepo +from storage.federation_repo import FederationRepo +from security.auth import require_scope +import httpx + +router = APIRouter(tags=["Application"]) + +@router.post( + "/apps", + response_model=SubmittedApp, + status_code=status.HTTP_201_CREATED, + dependencies=[Depends(require_scope("edge-application-management:apps:write"))], +) +async def submit_app( + body: AppManifest, + srm: SRMClient = Depends(), + x_correlator: str | None = Header(None), +) -> SubmittedApp: + result = await srm.post("/serviceFunction", json=body.model_dump(mode="json")) + if result.status_code != 201: + raise HTTPException(status_code=result.status_code, detail=result.json()) + return SubmittedApp(**result.json()) + +@router.post( + "/appinstances", + response_model=list[AppInstanceInfo], + status_code=status.HTTP_202_ACCEPTED, +) +async def create_app_instance( + body: CreateAppInstanceRequest, + srm: SRMClient = Depends(), + fm: FMClient = Depends(), + zone_repo: ZoneRepo = Depends(), + fed_repo: FederationRepo = Depends(), +) -> list[AppInstanceInfo]: + zone = await zone_repo.get(body.appZones[0].EdgeCloudZone.edgeCloudZoneId) + if zone is None: + raise HTTPException(status_code=404, detail="Zone not found") + if not zone.isLocal: + return await _create_federated_instance(body, zone, srm, fm, fed_repo) + return await _create_local_instance(body, srm) +``` + +### 15.6 Fix: MongoDB Connection Pooling (`storage/zone_repo.py`) + +```python +from motor.motor_asyncio import AsyncIOMotorDatabase +from fastapi import Depends +from deps import get_db + +class ZoneRepo: + def __init__(self, db: AsyncIOMotorDatabase = Depends(get_db)): + self.col = db["zones"] + + async def get(self, zone_id: str) -> dict | None: + return await self.col.find_one({"_id": zone_id}) + + async def upsert_many(self, zones: list[dict]) -> None: + for zone in zones: + await self.col.update_one( + {"_id": zone["_id"]}, + {"$set": zone}, + upsert=True, + ) + + async def delete_partner_zones(self, fed_context_id: str) -> None: + # Scoped deletion — only delete zones for THIS federation + await self.col.delete_many({ + "isLocal": False, + "fedContextId": fed_context_id, + }) +``` + +### 15.7 Fix: Token Service (`services/token_service.py`) + +```python +import time +import httpx +from config import settings + +class TokenService: + def __init__(self): + self._token: str | None = None + self._expires_at: float = 0.0 + + async def get_token(self) -> str: + if self._token and time.monotonic() < self._expires_at - 30: + return self._token + async with httpx.AsyncClient() as client: + resp = await client.post( + settings.TOKEN_ENDPOINT, + data={ + "grant_type": "client_credentials", + "scope": settings.KEYCLOAK_SCOPE, + "client_id": settings.KEYCLOAK_CLIENT_ID, + "client_secret": settings.KEYCLOAK_CLIENT_SECRET, + }, + ) + resp.raise_for_status() + data = resp.json() + self._token = data["access_token"] + self._expires_at = time.monotonic() + data.get("expires_in", 300) + return self._token + +token_service = TokenService() +``` + +### 15.8 Fix: Correlation ID Middleware + +```python +from starlette.middleware.base import BaseHTTPMiddleware +from starlette.requests import Request +import uuid + +class CorrelationIdMiddleware(BaseHTTPMiddleware): + async def dispatch(self, request: Request, call_next): + correlation_id = request.headers.get("x-correlator") or str(uuid.uuid4()) + request.state.correlation_id = correlation_id + # Propagate to all outbound clients via context var or header injection + response = await call_next(request) + response.headers["x-correlator"] = correlation_id + return response +``` + +### 15.9 Fix: Zone Seeding in Lifespan (replaces import-time side effects) + +```python +async def seed_local_zones(app: FastAPI): + try: + srm = app.state.srm_client + resp = await srm.get("/node") + resp.raise_for_status() + zones = resp.json() + repo = ZoneRepo(app.state.mongo["oeg_storage"]) + for zone in zones: + zone["_id"] = zone["edgeCloudZoneId"] + zone["isLocal"] = True # boolean, not string + await repo.upsert_many(zones) # upsert, not insert_many + except Exception as exc: + logger.warning("Could not seed zones from SRM at startup: %s", exc) +``` + +### 15.10 Fix: Filtering in `get_edge_cloud_zones` + +```python +@router.get("/edge-cloud-zones", response_model=list[EdgeCloudZone]) +async def get_edge_cloud_zones( + region: str | None = Query(None), + status: EdgeCloudZoneStatus | None = Query(None), + zone_repo: ZoneRepo = Depends(), + srm: SRMClient = Depends(), +) -> list[EdgeCloudZone]: + # Live from SRM + local = await srm.get("/node") + # Partner zones from MongoDB + partner = await zone_repo.get_partner_zones() + all_zones = local.json() + partner + + if region is not None: + all_zones = [z for z in all_zones if z.get("edgeCloudRegion") == region] + if status is not None: + all_zones = [z for z in all_zones if z.get("edgeCloudZoneStatus") == status.value] + + return [EdgeCloudZone(**z) for z in all_zones] +``` + +### 15.11 Complete Issue → Fix Mapping + +| Issue | Fix | +|---|---| +| SEC-1: No auth | FastAPI `Security(require_scope(...))` dependency on every route | +| SEC-2: Hardcoded credentials | `KEYCLOAK_CLIENT_ID` / `KEYCLOAK_CLIENT_SECRET` via env/k8s Secret | +| SEC-3: JWT in values.yaml | Use `kubectl create secret` + `secretKeyRef` in Helm; never commit tokens | +| SEC-4: verify=False | `SSL_CA_BUNDLE` setting; default to system CA bundle | +| SEC-5: pip trusted-host | Use official PyPI, or internal mirror with proper TLS | +| BUG-1: Dead filter | Implement filter in `get_edge_cloud_zones` | +| BUG-2: get_app_instance | Add filter branches for appId, appInstanceId, region | +| BUG-3: Raw response returned | Return parsed `AppInstanceInfo` Pydantic models always | +| BUG-4: Federated zones missing | `get_partner_zones()` from MongoDB, merged with SRM response | +| BUG-5: Status code mismatch | `status_code=201` / `202` on route decorators | +| BUG-6: Duplicate zone inserts | `upsert_many` with `$set` + `upsert=True` | +| BUG-7: Token expiry | `TokenService` with in-memory cache and 30s pre-expiry refresh | +| BUG-8: delete all partner zones | Scope deletion to `fedContextId` | +| BUG-10: isLocal as string | Use Python `bool` stored as BSON `true`/`false` | +| BUG-12: Logger not attached | Structured logging with `structlog` or Python `logging.config.dictConfig` | +| ARCH-1: No DB pooling | `motor.AsyncIOMotorClient` shared via `lifespan` + `Depends` | +| ARCH-2: Import side effects | Move all startup I/O into `lifespan` hook | +| ARCH-3: Mixed return types | All routes return Pydantic `response_model` | +| ARCH-4: Double body parse | FastAPI injects validated Pydantic model directly | +| ARCH-6: Sync under uvicorn | `async def` handlers + `httpx.AsyncClient` | +| ARCH-7: No correlator propagation | `CorrelationIdMiddleware` injects into every outbound header | +| ARCH-9: print() statements | Remove; use `logger.debug()` with `%s` formatting | +| ARCH-10: No health endpoint | `GET /health` returning `{"status":"ok"}` with DB ping | +| ARCH-11: Trailing slash | Consistent URL construction without trailing slash | + +--- + +## 16. Diagram Index + +All diagrams are in `docs/diagrams/`: + +| File | Description | +|---|---| +| `architecture_diagram.png` | Full system architecture — OEG, SRM, FM, MongoDB, Ingress, Partner OP, Infrastructure layers | +| `sequence_app_local.png` | Submit app + instantiate on a local edge zone (SRM path) | +| `sequence_app_federated.png` | 8-step federated instantiation (artefact → onboard → deploy via FM) | +| `sequence_federation.png` | Federation lifecycle: establish with Keycloak token, query, terminate with zone cleanup | +| `sequence_qod.png` | Quality-on-Demand session + Traffic Influence resource lifecycle | +| `api_map.png` | Full API endpoint map with known issues and FastAPI migration checklist | + +--- + +*Analysis performed on the `feature/helm` branch. All line references are to the files as they exist at time of writing.* diff --git a/docs/SOFTWARE_QUALITY_ASSESSMENT.md b/docs/SOFTWARE_QUALITY_ASSESSMENT.md new file mode 100644 index 0000000000000000000000000000000000000000..f870dcec912361c06fdf0f669017195ada044bd2 --- /dev/null +++ b/docs/SOFTWARE_QUALITY_ASSESSMENT.md @@ -0,0 +1,1379 @@ +# Open Exposure Gateway — Software Engineering Quality Assessment + +> **Assessed version:** `1.0.1-wip` · `feature/helm` branch +> **Date:** February 2026 +> **Assessor methodology:** Static analysis (ruff, AST inspection), code review, architectural review, +> security review, operations review. All metrics are measured from the actual source tree. + +--- + +## Table of Contents + +1. [Executive Summary](#1-executive-summary) +2. [Measured Metrics](#2-measured-metrics) +3. [Code Quality](#3-code-quality) +4. [Architecture Quality](#4-architecture-quality) +5. [Security Engineering](#5-security-engineering) +6. [Error Handling and Resilience](#6-error-handling-and-resilience) +7. [Testability and Test Coverage](#7-testability-and-test-coverage) +8. [Observability and Operability](#8-observability-and-operability) +9. [Performance Engineering](#9-performance-engineering) +10. [Maintainability and Technical Debt](#10-maintainability-and-technical-debt) +11. [Deployment and Operations Quality](#11-deployment-and-operations-quality) +12. [Scoring Summary](#12-scoring-summary) +13. [Improvement Roadmap](#13-improvement-roadmap) +14. [Concrete Code Improvements](#14-concrete-code-improvements) + +--- + +## 1. Executive Summary + +The Open Exposure Gateway is a **functionally correct prototype** that implements a non-trivial +inter-operator edge cloud management workflow. The CAMARA API surface is well-designed, the OpenAPI +specification is rich and accurate, and the federated deployment orchestration logic is architecturally +sound. However, the implementation has accumulated a significant number of engineering quality issues +that make it **unsuitable for production** in its current state. + +The most critical cluster of issues is **security** — authentication is entirely disabled and +credentials are hardcoded in source. The second critical cluster is **resilience** — the error +handling style produces a false sense of safety (exceptions are caught but the resulting error +states are not properly propagated). The third cluster is **observability** — the logger is +misconfigured and produces no output, making the system invisible to operators. + +All of these issues are **fixable** through the FastAPI rewrite combined with the improvements +catalogued in this document. + +| Category | Score | Verdict | +|---|---|---| +| Code Quality | 5 / 10 | Needs work | +| Architecture | 6 / 10 | Reasonable, fixable flaws | +| Security Engineering | 2 / 10 | Critical — do not deploy | +| Error Handling | 3 / 10 | Systematically broken | +| Test Coverage | 4 / 10 | Insufficient | +| Observability | 2 / 10 | Effectively dark | +| Performance | 4 / 10 | Blocking I/O, no pooling | +| Maintainability | 5 / 10 | Growing debt | +| Deployment | 5 / 10 | Works, but credentials exposed | +| **Overall** | **4 / 10** | **Not production-ready** | + +--- + +## 2. Measured Metrics + +The following metrics were measured directly from the source tree using static analysis. + +### 2.1 Codebase Size + +| Metric | Value | +|---|---| +| Python source files | 22 | +| Total lines | 1,728 | +| Code lines | 1,385 (80%) | +| Comment lines | 53 (3%) | +| Blank lines | 290 (17%) | +| Functions defined | 91 | + +At 1,385 lines of code, this is a small service. The low comment ratio (3%) is notable but acceptable +given that most logic is self-explanatory when the architecture is understood. The concern is not the +count but the quality and accuracy of the comments that do exist (see §3.5). + +### 2.2 Complexity + +| Metric | Value | +|---|---| +| Functions with CC > 8 (high complexity) | 2 | +| Highest CC function | `create_app_instance` (CC ≈ 16) | +| Second highest | `get_edge_cloud_zones` (CC ≈ 12) | +| Functions > 30 lines | 4 | +| Longest function | `create_app_instance` (116 lines) | + +A Cyclomatic Complexity of 16 for `create_app_instance` means there are at minimum 16 independent +paths through that function. Testing all paths requires at minimum 16 test cases; the current +codebase has 0 tests for this function's internals. + +### 2.3 Type Safety + +| Metric | Value | +|---|---| +| Functions with full argument annotations | 56 / 91 (61%) | +| Functions with return type annotations | 7 / 91 (8%) | +| Pydantic v1 compat shim usage | 1 file (`env_config.py`) | +| `Optional[Any]` usage (type erasure) | 2 fields in `AppManifest` | + +The near-total absence of return type annotations (92% missing) means mypy cannot perform flow +analysis across function boundaries. Type errors are invisible until runtime. + +### 2.4 Exception Handling + +| Metric | Value | +|---|---| +| Total `except Exception` blocks | 35 | +| Total `except Timeout` blocks | 17 | +| Total `except ConnectionError` blocks | 17 | +| Total `except HTTPError` blocks | 18 | +| Total exception handler blocks | 87 | +| Exception handlers that log | ~25 (29%) | +| Exception handlers that re-raise | 0 | +| Exception handlers that produce typed errors | ~15 (17%) | + +87 exception handlers for 91 functions means almost every function is wrapped in try/except. +The problem is what these handlers *do* — they almost universally swallow the exception and return +an error dict that callers are not required to check. + +### 2.5 Test Coverage + +| Metric | Value | +|---|---| +| Test files | 5 | +| Test cases | 15 | +| Source modules with any test coverage | 2 / 9 (22%) | +| Source modules with NO test coverage | 7 (78%) | +| Untested functions | ~65 / 91 (71%) | +| Disabled test files | 1 (`test_db_manager.py.old`) | + +### 2.6 Code Hygiene + +| Metric | Value | +|---|---| +| `print()` statements in production code | 5 | +| `verify=False` (TLS disabled) | 16 occurrences | +| Unused imports (ruff F401) | 6 | +| Hardcoded credential strings in source | 3 | +| Magic HTTP status code literals | 8 | +| Docstring coverage | 39 / 91 functions (42%) | +| Duplicate exception handler patterns | 43 in `federation_services.py` alone | + +--- + +## 3. Code Quality + +### 3.1 Function Design + +**`create_app_instance` — the worst function in the codebase** + +This single function (116 lines, CC=16) does all of the following: +- Reads and re-parses the HTTP request body (already parsed by Connexion) +- Performs a MongoDB lookup +- Makes a conditional branch decision (local vs. federated) +- Executes an 8-step cross-system orchestration on the federated branch +- Makes 4 different HTTP calls to 2 different services on the federated branch +- Makes 1 HTTP call on the local branch +- Has 8 separate exception handlers with different error formats +- Contains 4 `print()` debug statements +- Returns different types depending on path (dict, Response object, jsonify tuple) + +**The principle violated:** Single Responsibility Principle (SRP). This function should be +decomposed into at minimum: + +``` +create_app_instance() ← routing only: read body, lookup zone, dispatch + └─ _deploy_local() ← local SRM deployment + └─ _deploy_federated() ← federated path coordinator + ├─ _upload_artefact() ← step 1-2 + ├─ _onboard_app() ← step 3-4 + └─ _deploy_at_partner()← step 5-6 +``` + +**`get_edge_cloud_zones` — dead filter code (CC=12)** + +This function validates `region` and `status` query parameters and defines two inner filter functions, +but then calls neither of them. The validation is wasted work and the user gets no filtering. +The complexity score of 12 comes from the dead code paths. With filtering removed it would drop to ~4. + +### 3.2 Naming Inconsistencies + +The codebase mixes naming conventions within the same layer: + +| Context | Inconsistency | +|---|---| +| `isLocal` field | Stored as `'true'`/`'false'` (string) but semantically a boolean | +| `PiEdgeAPIClient` | Named after "PiEdge" (internal project name), not its function | +| `get_service_functions_catalogue()` | Returns a list, not a catalogue object | +| `requests_session` | Attribute defined nowhere but referenced in `_authenticate()` | +| `FederationManagerClientFactory.onboard_application_to_partners()` | Business logic inside a factory | +| `check_oAuth2ClientCredentials` | Named as a check but always returns success | +| `inline_response_200`, `inline_response_200_1`, `inline_response_200_2` | Unnamed schema references in OpenAPI spec — auto-generated names that convey no meaning | + +### 3.3 Dead Code + +| Location | Dead Code | +|---|---| +| `security_controller.py` | Entire file — never imported in active code paths | +| `managers/db_manager.py` | `MongoManager` — never used in production paths | +| `edge_cloud_controller.py` | `query_region_matches()`, `query_status_matches()` — defined, never called | +| `edge_cloud_controller.py` | `get_federated_zones()` — stub returning `[]` | +| `federation_services.py:258` | `onboard_application_to_partners()` — entire function, never called | +| `app_controllers.py:199` | Commented-out return statement `# return jsonify(...)` | +| `edge_cloud_services.py:20` | `_get_proxy_session()` — proxy dict defined but never passed to requests | +| `edge_cloud_services.py:25` | `_authenticate()` — authentication entirely bypassed | +| `openapi.yaml:161` | `# security:` blocks on every endpoint — 12 commented-out security definitions | +| `openapi.yaml:893` | `# callbacks:` block on `/partner` — 140 lines of commented-out callback spec | +| Pydantic models in `models/` | Defined but never imported or used by controllers — controllers use raw dicts | + +The ratio of dead to live code is unusually high for a service of this size. Dead code creates +cognitive overhead for new contributors and can cause confusion about intended behaviour. + +### 3.4 Inconsistent Return Contracts + +Controllers return values in at least 5 different formats: + +```python +# Format 1: (jsonify(dict), int) — most common in app_controllers +return jsonify({"error": "..."}), 400 + +# Format 2: raw dict — from SRM client +return response.json() # submit_app, get_app, get_apps + +# Format 3: raw string — from SRM client +return response.text # delete_app + +# Format 4: (dict, int) — in federation_manager_controller +return response, code # where response is already a dict + +# Format 5: raw requests.Response object — deploy_app_partner +return deploy_app_response # BUG: Connexion cannot serialise this +``` + +Connexion handles formats 1 and 4 correctly. Formats 2, 3, and 5 produce unpredictable results +depending on whether Connexion's response normaliser can infer the correct HTTP status code. + +### 3.5 Comment Quality + +The 53 comment lines that exist are mostly one of three types: + +1. **Commented-out code** — security blocks, old implementations, debug calls +2. **Step labels** — `# Step 1: retrieve app metadata`, `# Step 2: compose GSMA artefact payload` + (useful, but should be function names, not comments) +3. **Wrong/misleading** — `# Creates a new QoD session` as docstring for `delete_qod_session` and + `get_qod_session` (copy-pasted and never updated) + +```python +def delete_qod_session(sessionId: str): + """ + Creates a new QoD session ← WRONG: this deletes, not creates + """ + ... + +def get_qod_session(sessionId: str): + """ + Creates a new QoD session ← WRONG: this gets, not creates + """ + ... +``` + +### 3.6 Import and Module Organisation + +- 6 unused imports detected by ruff (all fixable automatically with `ruff --fix`) +- Circular dependency risk: `federation_services.py` imports from `edge_cloud_services.py` + and `storage_service.py`; controllers import from both services and storage — a flat + dependency graph with no clear layering enforcement +- `__init__.py` files in all packages are empty — no public API surface defined per package + +--- + +## 4. Architecture Quality + +### 4.1 Layer Violations + +The intended layering is: + +``` +Controllers → Services → (HTTP clients / Storage) +``` + +But the actual layering is: + +``` +Controllers → Services +Controllers → Storage (direct) ← violation +Controllers → HTTP clients (direct, via factory) ← acceptable but verbose +Services → Storage (direct) ← violation in federation_services.py +``` + +`app_controllers.py` calls `get_zone()` and `get_fed()` directly from `storage_service`, +bypassing any service layer. `federation_services.py` calls `delete_fed()` and +`delete_partner_zones()` directly from storage, mixing service and persistence concerns. + +### 4.2 Module-Level Side Effects + +Two modules execute real I/O at import time: + +**`edge_cloud_controller.py` (lines 11–20):** +```python +# This runs when Python first imports this module +pi_edge_factory = PiEdgeAPIClientFactory() +api_client = pi_edge_factory.create_pi_edge_api_client() +zones = api_client.edge_cloud_zones() # HTTP call to SRM +for zone in zones: + zone['_id'] = zone.get('edgeCloudZoneId') + zone['isLocal'] = 'true' +insert_zones(zones) # MongoDB write +``` + +**`federation_manager_controller.py` (lines 22–23):** +```python +factory = FederationManagerClientFactory() +federation_client = factory.create_federation_client() # reads config +``` + +Consequences: +1. **Slow startup** — any latency in SRM adds directly to startup time +2. **Startup failure** — if SRM is not available, the import fails (caught silently, but zones + are never seeded) +3. **Test pollution** — importing the module in tests triggers live HTTP calls unless mocked + at import time, which is notoriously fragile +4. **No retry** — if SRM is temporarily unavailable at the exact startup moment, zones are never + loaded until the next restart + +### 4.3 Dependency Injection Anti-Patterns + +Every controller function creates its own service client: + +```python +def submit_app(body: dict): + pi_edge_factory = PiEdgeAPIClientFactory() # new factory + api_client = pi_edge_factory.create_pi_edge_api_client() # new client + return api_client.submit_app(body) +``` + +```python +def get_app(appId, x_correlator=None): + pi_edge_factory = PiEdgeAPIClientFactory() # another new factory + api_client = pi_edge_factory.create_pi_edge_api_client() # another new client + return api_client.get_app(appId) +``` + +This creates: +- Unnecessary object allocation on every request +- No shared connection pooling (though `requests` does maintain a pool internally, the session is not reused) +- No ability to inject test doubles at the function level without patching the class + +The factory pattern was added (good intention), but the factories are not shared — they are +created anew inside each function call. + +### 4.4 Synchronous Blocking in Async Context + +The application is served by **uvicorn** (an async ASGI server) but all request handlers are +**synchronous Flask functions**. Connexion wraps them using `a2wsgi`, converting ASGI↔WSGI. + +This means: +- Each request blocks an entire thread +- During an SRM call (which can take 1–60s for deployment), the thread is idle but held +- Under concurrent requests, threads pile up quickly +- The process cannot service other requests during long-running FM federation calls + +With 4 `requests` calls in the federated path (each with 10–30s timeout), a single federated +instantiation can block a thread for up to 70 seconds. If the default worker count is 1 (the +Connexion/uvicorn default for sync apps), the server is completely unresponsive for that duration. + +### 4.5 Configuration Coupling + +The `config` object is a module-level singleton imported directly everywhere: + +```python +from edge_cloud_management_api.configs.env_config import config +... +proxies = {"http": config.HTTP_PROXY, "https": config.HTTP_PROXY} +``` + +This makes configuration impossible to override in tests without monkeypatching the module. +The proper pattern is dependency injection — pass config to classes through constructors. + +--- + +## 5. Security Engineering + +### 5.1 Authentication: Completely Disabled (Critical) + +Every single `security:` block in `openapi.yaml` is commented out. The security controller +(`security_controller.py`) is implemented but never invoked. The result is that **every API +endpoint is publicly accessible with no authentication**. + +The code that *should* be active: +```yaml +# IN openapi.yaml — ALL 17 ROUTES HAVE THIS PATTERN: +# security: +# - openId: +# - edge-application-management:apps:write +``` + +The implementation that exists but never runs: +```python +# security_controller.py +def decode_token(token: str): + JWT_ISSUER = os.environ.get('JWT_ISSUER') + PUBLIC_KEY = os.environ.get('JWT_PUBLIC_KEY') + return jwt.decode(token, key=PUBLIC_KEY, algorithms=["RS256"], issuer=JWT_ISSUER) +``` + +**Risk:** Any network-accessible actor can submit arbitrary app manifests, trigger deployments, +establish federations, and access all platform data. + +### 5.2 Hardcoded Credentials (Critical) + +Three separate locations contain hardcoded secrets: + +**Location 1:** `federation_manager_controller.py:13-14` +```python +token_headers = { + 'Authorization': 'Basic b3JpZ2luYXRpbmctb3AtMTpkZDd2TndGcWpOcFl3YWdobEV3TWJ3MTBnMGtsV0RIYg==' +} +# Decodes to: originating-op-1:dd7vNwFqjNpYwaghlEwMbw10g0klWDHb +``` + +**Location 2:** `helm/.../federation-manager/values.yaml:49` +```yaml +secret: "dd7vNwFqjNpYwaghlEwMbw10g0klWDHb" # Keycloak client secret in Helm values +``` + +**Location 3:** `helm/.../srm/values.yaml:72` (and umbrella `values.yaml:74`) +```yaml +kubernetesMasterToken: "eyJhbGciOiJSUzI1NiIsImtpZCI6..." # 1-year K8s ServiceAccount JWT +``` + +Once committed to git, these secrets are permanently in git history regardless of future deletion. +A `git log -S "dd7vNwFqjNpYwaghlEwMbw10g0klWDHb"` will always find them. + +### 5.3 TLS Certificate Verification Disabled (High) + +All 16 outbound HTTP calls to SRM disable TLS verification: +```python +requests.post(url, json=body, verify=False) +``` + +This is present on every single `requests.get/post/delete` call in `edge_cloud_services.py`. +The FM client does not explicitly set `verify=False` but also does not set a CA bundle, so +it uses the system default (which may or may not include the correct CA for the FM endpoint). + +**Risk:** MITM attack — an adversary on the network can intercept all OEG↔SRM traffic, +read app manifests (including repository credentials), modify deployment targets, or inject +malicious responses. + +### 5.4 OAuth2 Stub Always Authorises (High) + +```python +def check_oAuth2ClientCredentials(token): + return {'scopes': ['fed-mgmt'], 'uid': 'test_value'} # always passes +``` + +If security were re-enabled (comments removed from openapi.yaml), this function would accept +*any* token string — including empty strings, invalid JWTs, or tokens from unrelated services — +because it never actually validates the token. + +### 5.5 App Repository Credentials in Plaintext (Medium) + +`AppManifest.appRepo` accepts: +```json +{ + "userName": "helm-user", + "credentials": "secure-token", + "authType": "DOCKER" +} +``` + +These credentials are: +- Sent from App Provider to OEG in HTTP body +- Stored verbatim in SRM's MongoDB (no encryption at rest) +- Included in OEG→SRM forwarded request bodies +- Logged by the `print()` statements in `create_app_instance` (leaked to stdout) +- Included in GSMA artefact payloads sent to partner OPs during federation + +The `credentials` field has a `maxLength: 128` constraint but no encryption, hashing, or +masking at any layer. + +### 5.6 Sensitive Data in Logs (Medium) + +```python +# app_controllers.py:176-177 +print(f" Headers: {pi_edge_client._get_headers()}") +print(f"Payload: {body}") # body contains appRepo credentials +``` + +If stderr/stdout is collected by a log aggregator (as it typically is in Kubernetes), app +repository credentials appear in plaintext in the log stream. + +### 5.7 `start-dev` Keycloak Mode (Medium) + +Keycloak is configured with `--start-dev`, which: +- Disables TLS on the Keycloak HTTP endpoint +- Uses an in-memory H2 database (sessions lost on pod restart) +- Enables verbose debug logging +- Is explicitly not supported by Keycloak for production use + +### 5.8 Default Admin Credentials on Keycloak (Medium) + +```yaml +admin: + username: admin + password: admin +``` + +The Keycloak admin interface is accessible at NodePort 30081 with `admin:admin`. + +--- + +## 6. Error Handling and Resilience + +### 6.1 The Swallowing Pattern + +The dominant error handling pattern throughout the codebase is: + +```python +except Exception as e: + return jsonify({"error": "An unexpected error occurred", "details": str(e)}), 500 +``` + +This pattern has several problems: + +1. **`str(e)` leaks internal details** — Python exception messages often contain internal paths, + stack frame information, connection strings, and implementation details. Returning these to + API clients violates the principle of least information. + +2. **No logging** — because the logger has no handler attached, the exception is silently consumed. + Operators see no trace of what went wrong. + +3. **No differentiation** — a `requests.ConnectionError` (SRM unreachable), a `ValueError` + (bad data from SRM), and a `pymongo.errors.OperationFailure` (DB write failure) all produce + the same generic 500 response. Callers cannot distinguish transient from permanent failures. + +4. **No re-raising** — the exception terminates here. Upstream callers that might handle it + differently never see it. + +### 6.2 Inconsistent Error Dict vs. HTTP Response + +SRM client methods return error dicts on failure: +```python +except Timeout: + return {"error": "The request to the external API timed out."} +``` + +But the controllers check for these inconsistently: +```python +# app_controllers.submit_app — does NOT check for error dict: +response = api_client.submit_app(body) +return response # if SRM timed out, returns {"error": "..."} as 200 OK + +# app_controllers.create_app_instance — DOES check: +if isinstance(response, dict) and "error" in response: + return jsonify({"warning": "Deployment not completed"}), 202 +``` + +A timeout on `submit_app` silently becomes `200 OK {"error": "The request timed out"}` because +the controller returns the error dict without checking it. The client gets a success-coded response +with an error body. + +### 6.3 Cascade Failure in Federated Path + +The 8-step federated instantiation has no rollback logic. If step 6 (onboarding) succeeds +but step 8 (deployment) fails: +- The artefact is registered at the partner (step 4 succeeded) +- The app is onboarded at the partner (step 6 succeeded) +- No deployment was started (step 8 failed) +- OEG has no record of what was partially completed +- The next retry will attempt to re-register the artefact (409 conflict is handled) and + re-onboard (behaviour unknown — may create duplicate records at partner) + +There is no compensating transaction, no saga pattern, no idempotency key. + +### 6.4 No Timeout on `edge_cloud_zones()` + +```python +def edge_cloud_zones(self): + url = f"{self.base_url}/node" + request_headers = self._get_headers() + response = requests.get(url, headers=request_headers, verify=False) + response.raise_for_status() + return nodes +``` + +No `timeout=` parameter. If SRM is slow, this blocks indefinitely. Every other SRM method +has timeout handling, but this one — called both at startup and on every `GET /edge-cloud-zones` +request — does not. + +### 6.5 Silent Startup Failure + +```python +# edge_cloud_controller.py — module level +try: + zones = api_client.edge_cloud_zones() + insert_zones(zones) +except Exception as e: + logger.error(e.args) # logger has no handler → silent +``` + +If this fails: +- The `zones` collection is empty +- `create_app_instance` will call `get_zone(id)` and get `None` +- `None.get('isLocal')` raises `AttributeError` +- The `AttributeError` is caught by `create_app_instance`'s outer `except Exception` +- The user gets `500 {"error": "An unexpected error occurred", "details": "'NoneType' has no attribute 'get'"}` + +There is no operator alert, no health check failure, no startup probe failure — just a cryptic +500 on the first instantiation request. + +--- + +## 7. Testability and Test Coverage + +### 7.1 Coverage by Layer + +| Layer | Files | Test Coverage | +|---|---|---| +| Controllers | 4 files | Partial (2 of 4 controllers tested) | +| Services — SRM client | 1 file | None | +| Services — FM client | 1 file | None | +| Storage service | 1 file | None | +| Configuration | 1 file | None | +| Models | 4 files | None | +| Managers | 2 files | None (test file disabled) | + +The most business-critical code path — the 8-step federated instantiation in `create_app_instance` +— has **zero tests**. The only test for `create_app_instance` mocks the entire factory and tests +that a 202 is returned for the *local* path only. + +### 7.2 Test Anti-Patterns + +**Wrong assertions (tests that can't fail):** + +```python +# test_app_controllers.py:30-32 +def test_delete_app(mock_factory_class, test_app): + ... + result = app_controllers.delete_app(app_id) + assert isinstance(result, dict) # this will ALWAYS pass + assert result == {"result": "deleted"} # only valid if mock is correct +``` + +The test is asserting on mock output, not on the actual function's transformation of that output. +Because `delete_app` returns `api_client.delete_app(appId)` directly, this test only verifies +that Python's mock system works correctly, not that `delete_app` does the right thing. + +**Stale test expectations:** + +```python +# test_app_controllers.py:53-54 +data = response.get_json() +assert "Application mock-app-id instantiation accepted" in data["message"] +``` + +Looking at the actual `create_app_instance` implementation, it does NOT return a +`{"message": "Application ... instantiation accepted"}` response — that was the old implementation +(the commented-out `return jsonify({"message": ...})` at line 199). This test is testing +**deleted code** and would fail if actually run against the current implementation. + +**Module-level side effects break test isolation:** + +Importing `edge_cloud_controller` in tests triggers a live HTTP call to SRM and a MongoDB write. +The unit test for `get_edge_cloud_zones` patches `get_all_cloud_zones` but the module-level +startup code runs first, before the patch is applied. If SRM is not available in the test +environment, the import raises (caught silently), leaving `zones` empty. + +### 7.3 Missing Test Categories + +| Test Category | Status | +|---|---| +| Unit tests for SRM client methods | Missing | +| Unit tests for FM client methods | Missing | +| Unit tests for storage service | Missing | +| Integration tests (OEG + real MongoDB) | Missing | +| Contract tests (OEG validates against SRM API) | Missing | +| Error path tests (SRM timeout, FM 4xx) | Missing | +| Federated instantiation flow | Missing | +| Security tests (auth rejection) | Missing | +| Schema validation tests | Missing | +| Performance / load tests | Missing | + +### 7.4 Test Infrastructure + +What exists is well-structured: +- `@pytest.mark.unit` / `@pytest.mark.component` markers are correct and useful +- `mongomock` is installed but unused — tests mock the factory instead of the DB layer +- Fixtures in `tests/fixtures/` are well-formed and realistic +- GitHub Actions runs tests with a real MongoDB container — good practice +- `tox` configuration is complete and runs lint + type + tests + +The infrastructure is good. The test cases themselves are inadequate. + +--- + +## 8. Observability and Operability + +### 8.1 Logging: Completely Non-Functional + +The most fundamental observability failure: + +```python +# managers/log_manager.py +logger = logging.getLogger("Edge Cloud Management API") +logger.setLevel(logging.INFO) +formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") +handler = logging.StreamHandler(sys.stdout) +handler.setFormatter(formatter) +# ← logger.addHandler(handler) IS MISSING +``` + +The handler is created and formatted, but never attached. Every `logger.info()`, `logger.error()`, +`logger.debug()` across the codebase produces **no output**. The root Python logger may emit +some output with the default format, but the custom formatter and level settings are inert. + +This means: +- Operators cannot tell what requests are being processed +- Exceptions are invisible (the logger.error calls in except blocks produce nothing) +- The 8-step federation orchestration is invisible +- Startup failures are invisible + +### 8.2 No Structured Logging + +Even if the handler were attached, the log format is unstructured text: + +``` +2025-01-01 12:00:00 - Edge Cloud Management API - ERROR - Connection refused +``` + +In a Kubernetes environment with log aggregation (Loki, Elasticsearch, CloudWatch), structured +JSON logs are required for effective querying: + +```json +{"timestamp": "2025-01-01T12:00:00Z", "level": "ERROR", "service": "oeg", + "correlation_id": "abc-123", "event": "srm_connection_failed", + "endpoint": "/serviceFunction", "app_id": "uuid-xxx"} +``` + +### 8.3 No Health or Readiness Endpoints + +The OpenAPI spec defines no `/health`, `/ready`, or `/live` endpoints. The Kubernetes Deployment +manifests have liveness and readiness probes **commented out**: + +```yaml +# livenessProbe: +# httpGet: +# path: / +# port: http +# initialDelaySeconds: 30 +``` + +Consequences: +- Kubernetes cannot detect when OEG is unhealthy and needs restart +- A pod where SRM is unreachable (zones not seeded, all deployments will return 500) appears + healthy to Kubernetes and continues to receive traffic +- Rolling upgrades have no readiness gate + +### 8.4 No Metrics + +No Prometheus metrics, no StatsD counters, no request duration histograms. There is no way to: +- Know how many requests per second OEG is handling +- Know the P99 latency of SRM calls +- Know how many federated deployments have succeeded vs. failed +- Alert on error rate increases + +### 8.5 No Distributed Tracing + +The `x-correlator` header is accepted from clients but: +1. Not propagated to SRM in outbound request headers +2. Not propagated to FM in outbound request headers +3. Not included in log messages (even if logging worked) + +This means a single request trace cannot be reconstructed across OEG + SRM + FM logs. + +### 8.6 No Request/Response Logging Middleware + +There is no middleware that logs: +- Inbound request method, path, and query parameters +- Response status code and duration +- Client IP address + +The only visibility into request processing is the Connexion access log (if enabled) and +the occasional `logger.info()` inside handlers (which produces no output). + +--- + +## 9. Performance Engineering + +### 9.1 Synchronous I/O Architecture + +The fundamental performance bottleneck is the use of synchronous `requests` inside a synchronous +Flask application: + +``` +Request arrives → Thread assigned → Thread blocks on SRM HTTP call (1-60s) → Thread released +``` + +Maximum throughput = `number_of_threads / average_request_duration`. + +For a federated instantiation (4 sequential HTTP calls, each up to 30s): +- Best case: 4 × 1s = 4s → ~15 req/minute on a single thread +- Worst case: 4 × 30s = 120s → 0.5 req/minute + +For local instantiation (1 HTTP call): +- Best case: 1s → ~60 req/minute +- Worst case: 60s timeout → 1 req/minute + +This is acceptable for a low-traffic internal platform but degrades quickly under concurrent usage. + +### 9.2 MongoDB: No Connection Pooling + +Every storage operation opens and abandons a MongoDB connection: + +```python +def get_zone(zone_id: str): + myclient = pymongo.MongoClient(storage_url) # new TCP connection + ... + # connection abandoned (GC will close eventually) +``` + +MongoDB has a default `maxPoolSize` of 100. With this code, each call uses up a pool slot +until GC runs. Under a burst of requests, the connection pool can be exhausted, causing new +MongoDB operations to fail with `"connection pool exhausted"` errors. + +The `MongoManager` class already exists with `maxPoolSize=50` — it just needs to be used. + +### 9.3 Zone Fetch: Live on Every Request + +`GET /edge-cloud-zones` calls SRM live on every request. There is no caching: + +```python +def get_local_zones(): + api_client = pi_edge_factory.create_pi_edge_api_client() + result = api_client.edge_cloud_zones() # HTTP to SRM every time + return result +``` + +Zones are essentially static data — they change only when new infrastructure is added. Fetching +them live on every discovery request adds SRM latency to what should be a fast, read-only query. +A 5-minute TTL cache would eliminate this overhead for 99% of requests. + +### 9.4 No HTTP Connection Reuse + +Each `requests.get/post/delete` creates a new HTTP session: + +```python +response = requests.get(url, ...) # no session — no connection reuse +``` + +Using `requests.Session` would allow TCP connection reuse (keepalive), saving the TCP handshake +overhead on every call. On a local cluster network this is ~1ms per call, but it adds up across +the many calls in the federated instantiation path. + +--- + +## 10. Maintainability and Technical Debt + +### 10.1 Technical Debt Inventory + +| Debt Item | Impact | Effort to Fix | +|---|---|---| +| Authentication commented out | Critical | Medium (re-enable + test) | +| Hardcoded credentials | Critical | Small (env vars) | +| Logger handler missing | High | Trivial (1 line) | +| `create_app_instance` monolith | High | High (decompose + test) | +| `MongoManager` unused | High | Medium (migrate storage_service) | +| Dead filter code | Medium | Small (implement or remove) | +| `isLocal` as string | Medium | Small (change to bool, migrate data) | +| Pydantic v1 compat shim | Medium | Small (upgrade to pydantic-settings) | +| `FixedNetworkIds.__root__` | Medium | Small (migrate to RootModel) | +| Token refresh absent | High | Medium (add refresh logic) | +| No zone seed retry | Medium | Small (retry with backoff) | +| `verify=False` everywhere | High | Small (add SSL_CA_BUNDLE config) | +| Duplicate zone inserts | Medium | Small (upsert) | +| Blanket zone delete | Medium | Small (scope to fedContextId) | +| Stale/wrong docstrings | Low | Small (fix copy-paste) | +| Unused imports (6) | Low | Trivial (ruff --fix) | + +**Estimated total backlog:** ~15 items, ranging from trivial (minutes) to high (days). + +### 10.2 Dependency Freshness + +| Package | Pinned version | Latest (Feb 2026) | Status | +|---|---|---|---| +| connexion | 3.1.0 | 3.1.x | Current | +| flask | 3.1.0 | 3.1.x | Current | +| pydantic | 2.10.3 | 2.10.x | Current | +| pymongo | 4.10.1 | 4.10.x | Current | +| requests | 2.32.3 | 2.32.x | Current | +| uvicorn | 0.32.1 | 0.34.x | One minor behind | +| pydantic.v1 shim | — | Should use pydantic-settings | Deprecated pattern | + +Dependencies are reasonably current. The main concern is the `pydantic.v1` compat shim in +`env_config.py` — this will break when the shim is removed in a future Pydantic release. + +### 10.3 OpenAPI Spec Drift + +The OpenAPI spec and Python models have diverged in multiple places: + +| Field | Spec | Python model | +|---|---|---| +| `AppManifest.name` pattern | commented out (`# pattern:`) | `^[A-Za-z][A-Za-z0-9_]{1,63}$` enforced | +| `AppManifest.appProvider` pattern | commented out | `^[A-Za-z][A-Za-z0-9_]{7,63}$` enforced | +| `requiredResources` | discriminated union (4 schemas) | `Optional[Any]` | +| `AppZones.additionalProperties` | `false` | not modelled | +| `FixedNetworkIds` | array of strings | `__root__: List[str]` (v1 pattern) | + +Validation behaviour differs: Connexion validates against the YAML spec, Pydantic validates against +the Python models. The models are used in some code paths but not in the main request handlers +(controllers receive raw dicts from Connexion, not Pydantic models). This dual-validation is +inconsistent and creates confusion about which validation rules apply. + +--- + +## 11. Deployment and Operations Quality + +### 11.1 Kubernetes Deployment Gaps + +| Missing Feature | Impact | +|---|---| +| Liveness probe (commented out) | Unhealthy pods serve traffic indefinitely | +| Readiness probe (commented out) | Traffic sent before OEG is ready | +| Resource limits not set on OEG pod | Potential OOM on adjacent pods | +| No `PodDisruptionBudget` | Cluster operations can take down OEG completely | +| No `HorizontalPodAutoscaler` | Cannot scale under load | +| MongoDB without auth | Anyone in-cluster can access oeg_storage | +| `mongo:latest` image tag | Non-reproducible builds; silent breaking changes | +| No NetworkPolicy | Any pod in cluster can call OEG, SRM, or FM | +| Secrets as plain env vars | Visible in `kubectl describe pod` output | + +### 11.2 Image Build Quality + +```dockerfile +FROM python:3.12-alpine +... +RUN pip install --trusted-host pypi.org --trusted-host files.pythonhosted.org -r requirements.txt +COPY . /usr/src/app +``` + +Issues: +- `--trusted-host` disables pip TLS verification — packages can be tampered with in transit +- No `.dockerignore` — `.git/`, `tests/`, `docs/`, `helm/`, and local `.env` files are all + copied into the image, increasing size and potentially leaking secrets +- `COPY . /usr/src/app` after `pip install` means **every source change rebuilds all dependencies** + — slow CI pipelines. The correct pattern is to copy dependency files first, then install, then copy source. +- No image vulnerability scanning in CI + +### 11.3 CI/CD Gaps + +| Gap | Impact | +|---|---| +| GitLab CI has no test step | Code pushed to GitLab main is not tested | +| GitHub Actions has no Docker build | Image quality not verified in PR checks | +| No image signing | Cannot verify image provenance in Kubernetes | +| No SBOM generation | Cannot audit transitive dependencies | +| No staging environment deploy | Changes go straight from PR to whatever environment uses the image | +| `docker push :latest` (non-semantic tag) | Cannot roll back to a specific version | + +### 11.4 Helm Chart Quality + +Good parts: +- RollingUpdate strategy (`maxSurge: 1, maxUnavailable: 0`) +- Sub-chart structure is clean +- VALUES are all documented + +Gaps: +- Secrets stored as plaintext values (should use `secretKeyRef` from Kubernetes Secrets) +- `mongo:latest` and similar `:latest` tags +- No `helm test` hooks defined +- No default resource limits on OEG controller + +--- + +## 12. Scoring Summary + +``` +╔══════════════════════════════════════════════════════════════════╗ +║ Quality Scorecard (max 10 per category) ║ +╠════════════════════════╦═════════════╦════════════════════════════╣ +║ Category ║ Score ║ Key Issues ║ +╠════════════════════════╬═════════════╬════════════════════════════╣ +║ Code Quality ║ ████░░░░░░ ║ Dead code, monolith fn, ║ +║ ║ 5 / 10 ║ wrong docstrings ║ +╠════════════════════════╬═════════════╬════════════════════════════╣ +║ Architecture ║ █████░░░░░ ║ Layer violations, sync I/O,║ +║ ║ 6 / 10 ║ startup side effects ║ +╠════════════════════════╬═════════════╬════════════════════════════╣ +║ Security Engineering ║ ██░░░░░░░░ ║ NO AUTH, hardcoded creds, ║ +║ ║ 2 / 10 ║ TLS disabled everywhere ║ +╠════════════════════════╬═════════════╬════════════════════════════╣ +║ Error Handling ║ ███░░░░░░░ ║ Swallowing pattern, no ║ +║ ║ 3 / 10 ║ typed errors, no rollback ║ +╠════════════════════════╬═════════════╬════════════════════════════╣ +║ Test Coverage ║ ████░░░░░░ ║ 22% module coverage, stale ║ +║ ║ 4 / 10 ║ test assertions ║ +╠════════════════════════╬═════════════╬════════════════════════════╣ +║ Observability ║ ██░░░░░░░░ ║ Logger broken, no health, ║ +║ ║ 2 / 10 ║ no metrics, no tracing ║ +╠════════════════════════╬═════════════╬════════════════════════════╣ +║ Performance ║ ████░░░░░░ ║ Sync blocking I/O, no ║ +║ ║ 4 / 10 ║ pooling, no caching ║ +╠════════════════════════╬═════════════╬════════════════════════════╣ +║ Maintainability ║ █████░░░░░ ║ Drift between spec and ║ +║ ║ 5 / 10 ║ models, growing debt ║ +╠════════════════════════╬═════════════╬════════════════════════════╣ +║ Deployment & Ops ║ █████░░░░░ ║ No probes, :latest tags, ║ +║ ║ 5 / 10 ║ secrets in values.yaml ║ +╠════════════════════════╬═════════════╬════════════════════════════╣ +║ OVERALL ║ ████░░░░░░ ║ Functional prototype, ║ +║ ║ 4 / 10 ║ not production-ready ║ +╚════════════════════════╩═════════════╩════════════════════════════╝ +``` + +--- + +## 13. Improvement Roadmap + +### Phase 0 — Immediate Fixes (< 1 day, no architecture change) + +These are trivial changes that fix real bugs with zero risk of regression: + +| # | Fix | File | Effort | +|---|---|---|---| +| P0-1 | Add `logger.addHandler(handler)` | `log_manager.py` | 1 line | +| P0-2 | Remove 5 `print()` statements | `app_controllers.py`, `federation_services.py` | 5 lines | +| P0-3 | Remove 6 unused imports | ruff `--fix` | automated | +| P0-4 | Fix wrong docstrings on `get_qod_session`, `delete_qod_session` | `network_functions_controller.py` | 2 lines | +| P0-5 | Add `timeout=10.0` to `edge_cloud_zones()` | `edge_cloud_services.py` | 1 line | +| P0-6 | Move Keycloak client credentials to env vars | `federation_manager_controller.py` | 3 lines | +| P0-7 | Fix `FixedNetworkIds` Pydantic v2 | `federation_manager_models.py` | 2 lines | + +### Phase 1 — Bug Fixes (1–3 days) + +| # | Fix | Files | Effort | +|---|---|---|---| +| P1-1 | Actually apply region/status filters in `get_edge_cloud_zones` | `edge_cloud_controller.py` | Small | +| P1-2 | Fix `get_app_instance` to handle all filter combos | `app_controllers.py` | Small | +| P1-3 | Fix `deploy_app_partner` to return parsed JSON, not raw Response | `federation_services.py`, `app_controllers.py` | Small | +| P1-4 | Fix `insert_zones` to upsert (not insert_many) | `storage_service.py` | Small | +| P1-5 | Fix `delete_partner_zones` to scope by `fedContextId` | `storage_service.py` | Small | +| P1-6 | Change `isLocal` to Python bool | `edge_cloud_controller.py`, `federation_manager_controller.py`, `storage_service.py` | Small | +| P1-7 | Move startup zone seeding to a startup hook, not module-level | `edge_cloud_controller.py` | Medium | +| P1-8 | Add `try/except` to `edge_cloud_zones()` in service | `edge_cloud_services.py` | Small | + +### Phase 2 — Security (2–5 days) + +| # | Fix | Files | Effort | +|---|---|---|---| +| P2-1 | Re-enable `security:` blocks in OpenAPI spec | `openapi.yaml` | Small | +| P2-2 | Fix `check_oAuth2ClientCredentials` to actually validate | `security_controller.py` | Medium | +| P2-3 | Move all secrets to env vars + K8s Secrets | `values.yaml`, `.env` | Medium | +| P2-4 | Add `SSL_CA_BUNDLE` env var, pass to all requests | `edge_cloud_services.py`, `federation_services.py` | Medium | +| P2-5 | Rotate committed secrets | Keycloak + K8s admin | Immediate | +| P2-6 | Mask credentials fields in log output | All loggers | Small | +| P2-7 | Add `.dockerignore` | repo root | Small | + +### Phase 3 — Architecture (FastAPI rewrite, 2–4 weeks) + +The Connexion/Flask foundation has structural limitations (sync I/O, no native DI, awkward +response handling) that make implementing many of the above fixes difficult to do cleanly. +The FastAPI rewrite addresses all categories simultaneously: + +| Goal | How FastAPI Addresses It | +|---|---| +| Async I/O | `async def` handlers + `httpx.AsyncClient` | +| Typed responses | `response_model=` on every route | +| Dependency injection | `Depends()` for DB, clients, auth | +| Auth enforcement | `Security(require_scope(...))` as route dependency | +| Startup logic | `@asynccontextmanager lifespan` | +| Structured errors | `@app.exception_handler` with typed `HTTPException` | +| Pooled MongoDB | Single `motor.AsyncIOMotorClient` via lifespan | +| Correlation propagation | Middleware injecting into `httpx` client headers | + +--- + +## 14. Concrete Code Improvements + +### 14.1 Fix the Logger (1 line) + +```python +# managers/log_manager.py — CURRENT +logger = logging.getLogger("Edge Cloud Management API") +logger.setLevel(logging.INFO) +formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") +handler = logging.StreamHandler(sys.stdout) +handler.setFormatter(formatter) +# addHandler MISSING ↑ + +# FIXED +logger = logging.getLogger("Edge Cloud Management API") +logger.setLevel(logging.INFO) +formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") +handler = logging.StreamHandler(sys.stdout) +handler.setFormatter(formatter) +logger.addHandler(handler) # ← add this line +logger.propagate = False # ← prevent double-logging via root logger +``` + +### 14.2 Remove Print Statements + +```python +# app_controllers.py — REMOVE THESE LINES: +print("\n === Preparing Deployment Request ===") +print(f" Endpoint: {pi_edge_client.base_url}/deployedServiceFunction") +print(f" Headers: {pi_edge_client._get_headers()}") +print(f"Payload: {body}") +print("=== End of Deployment Request ===\n") + +# REPLACE WITH: +logger.debug( + "Sending deployment request", + extra={"endpoint": f"{pi_edge_client.base_url}/deployedServiceFunction", "app_id": app_id} +) +``` + +### 14.3 Fix the Dead Filter + +```python +# edge_cloud_controller.py — CURRENT (filter never applied) +def get_edge_cloud_zones(x_correlator=None, region=None, status=None): + query_params = EdgeCloudQueryParams(x_correlator=x_correlator, region=region, status=status) + def query_region_matches(zone): ... # defined, never called + def query_status_matches(zone): ... # defined, never called + response = [EdgeCloudZone(**zone).model_dump() for zone in get_all_cloud_zones()] + return jsonify(response), 200 + +# FIXED +def get_edge_cloud_zones(x_correlator=None, region=None, status=None): + try: + query_params = EdgeCloudQueryParams(x_correlator=x_correlator, region=region, status=status) + zones = [EdgeCloudZone(**z) for z in get_all_cloud_zones()] + if query_params.region is not None: + zones = [z for z in zones if z.edgeCloudRegion == query_params.region] + if query_params.status is not None: + zones = [z for z in zones if z.edgeCloudZoneStatus == query_params.status] + return jsonify([z.model_dump() for z in zones]), 200 + except ValidationError as e: + return jsonify({"status": 400, "code": "VALIDATION_ERROR", "message": e.errors()}), 400 + except Exception as e: + logger.exception("Failed to retrieve edge cloud zones") + return jsonify({"status": 500, "code": "INTERNAL_ERROR", "message": str(e)}), 500 +``` + +### 14.4 Fix Storage Service — Use Existing MongoManager + +```python +# storage_service.py — CURRENT (new connection per call) +def get_zone(zone_id: str): + myclient = pymongo.MongoClient(storage_url) + mydbmongo = myclient[mydb_mongo] + col = mydbmongo["zones"] + return col.find_one({'_id': zone_id}) + +# FIXED — use the MongoManager that already exists +from edge_cloud_management_api.managers.db_manager import MongoManager + +def get_zone(zone_id: str) -> dict | None: + with MongoManager() as db: + return db.find_document("zones", {"_id": zone_id}) + +def insert_zones(zone_list: list) -> None: + with MongoManager() as db: + for zone in zone_list: + db.update_document( + "zones", + {"_id": zone["_id"]}, + zone, + ) + # Note: MongoManager.update_document uses $set — add upsert support: +``` + +Add upsert support to `MongoManager`: +```python +# managers/db_manager.py — add this method +def upsert_document(self, collection_name: str, query: dict, data: dict) -> int: + collection = self.db[collection_name] + result = collection.update_one(query, {"$set": data}, upsert=True) + return result.modified_count + result.upserted_id is not None +``` + +### 14.5 Fix Token as Environment Variables + +```python +# federation_manager_controller.py — CURRENT +token_headers = { + 'Authorization': 'Basic b3JpZ2luYXRpbmctb3AtMTpkZDd2TndGcWpOcFl3YWdobEV3TWJ3MTBnMGtsV0RIYg==', + 'Content-Type': 'application/x-www-form-urlencoded' +} +data = {'grant_type': 'client_credentials', 'scope': 'fed-mgmt'} + +# FIXED +import base64 +_client_id = config.KEYCLOAK_CLIENT_ID +_client_secret = config.KEYCLOAK_CLIENT_SECRET +_credentials = base64.b64encode(f"{_client_id}:{_client_secret}".encode()).decode() + +token_headers = { + 'Authorization': f'Basic {_credentials}', + 'Content-Type': 'application/x-www-form-urlencoded' +} +data = {'grant_type': 'client_credentials', 'scope': config.KEYCLOAK_SCOPE} +``` + +And in `env_config.py`: +```python +class Configuration(BaseSettings): + ... + KEYCLOAK_CLIENT_ID: str = os.getenv("KEYCLOAK_CLIENT_ID", "") + KEYCLOAK_CLIENT_SECRET: str = os.getenv("KEYCLOAK_CLIENT_SECRET", "") + KEYCLOAK_SCOPE: str = os.getenv("KEYCLOAK_SCOPE", "fed-mgmt") +``` + +### 14.6 Fix `isLocal` Type + +```python +# CURRENT — stored and compared as string +zone['isLocal'] = 'true' # in edge_cloud_controller.py startup +zone['isLocal'] = 'false' # in federation_manager_controller.py +if zone.get('isLocal') == 'false': # in app_controllers.py + +# FIXED — use Python bool throughout +zone['isLocal'] = True # startup seeding +zone['isLocal'] = False # federation seeding +if not zone.get('isLocal', True): # cleaner boolean check +``` + +### 14.7 Move Startup Side Effects to a Startup Function + +```python +# edge_cloud_controller.py — CURRENT (runs at import time) +try: + pi_edge_factory = PiEdgeAPIClientFactory() + api_client = pi_edge_factory.create_pi_edge_api_client() + zones = api_client.edge_cloud_zones() + for zone in zones: + zone['_id'] = zone.get('edgeCloudZoneId') + zone['isLocal'] = True + insert_zones(zones) +except Exception as e: + logger.error(e.args) + +# FIXED — call this from app startup, not module import +def seed_local_zones() -> None: + """ + Seed the OEG zone registry with zones from SRM. + Idempotent — uses upsert. Safe to call multiple times. + """ + max_retries = 3 + for attempt in range(max_retries): + try: + pi_edge_factory = PiEdgeAPIClientFactory() + api_client = pi_edge_factory.create_pi_edge_api_client() + zones = api_client.edge_cloud_zones() + for zone in zones: + zone['_id'] = zone['edgeCloudZoneId'] + zone['isLocal'] = True + insert_zones(zones) # upsert-based + logger.info("Seeded %d local zones from SRM", len(zones)) + return + except Exception as e: + logger.warning("Zone seeding attempt %d/%d failed: %s", attempt + 1, max_retries, e) + if attempt < max_retries - 1: + time.sleep(2 ** attempt) # exponential backoff + logger.error("Zone seeding failed after %d attempts — local zone routing unavailable", max_retries) +``` + +### 14.8 Add a Health Endpoint + +```python +# app.py — add a health route +from flask import Blueprint, jsonify +import pymongo + +health_bp = Blueprint('health', __name__) + +@health_bp.route('/health') +def health(): + checks = {} + + # MongoDB check + try: + client = pymongo.MongoClient(config.MONGO_URI, serverSelectionTimeoutMS=2000) + client.server_info() + checks['mongodb'] = 'ok' + except Exception as e: + checks['mongodb'] = f'error: {e}' + + # SRM reachability check + try: + import requests as req + r = req.get(f"{config.SRM_HOST}/node", timeout=3, verify=False) + checks['srm'] = 'ok' if r.status_code < 500 else f'error: {r.status_code}' + except Exception as e: + checks['srm'] = f'error: {e}' + + status = 'ok' if all(v == 'ok' for v in checks.values()) else 'degraded' + code = 200 if status == 'ok' else 503 + return jsonify({'status': status, 'checks': checks}), code + + +# In get_app_instance(): +app.app.register_blueprint(health_bp) +``` + +### 14.9 Fix the Stale Tests + +```python +# test_app_controllers.py — CURRENT (tests deleted implementation) +def test_create_app_instance(mock_factory_class, test_app): + ... + assert "Application mock-app-id instantiation accepted" in data["message"] # ← tests old code + +# FIXED — test what the function actually does now +@patch("edge_cloud_management_api.controllers.app_controllers.get_zone") +@patch("edge_cloud_management_api.controllers.app_controllers.PiEdgeAPIClientFactory") +def test_create_app_instance_local_zone(mock_factory, mock_get_zone, test_app): + """Local zone instantiation should call deploy_service_function and return 202.""" + mock_get_zone.return_value = {"_id": "zone-1", "isLocal": True} + mock_client = MagicMock() + mock_client.deploy_service_function.return_value = {"appInstanceId": "inst-123", "status": "instantiating"} + mock_factory.return_value.create_pi_edge_api_client.return_value = mock_client + + body = {"appId": "app-uuid-1", "appZones": [{"EdgeCloudZone": {"edgeCloudZoneId": "zone-1"}}]} + with test_app.test_request_context(json=body, content_type='application/json'): + response, status_code = app_controllers.create_app_instance() + + assert status_code == 202 + mock_client.deploy_service_function.assert_called_once() +``` + +### 14.10 Add `timeout` to `edge_cloud_zones()` + +```python +# edge_cloud_services.py — CURRENT +def edge_cloud_zones(self): + url = f"{self.base_url}/node" + request_headers = self._get_headers() + response = requests.get(url, headers=request_headers, verify=False) + response.raise_for_status() + nodes = response.json() + if not nodes: + raise ValueError("No edge nodes found") + return nodes + +# FIXED +def edge_cloud_zones(self) -> list[dict]: + """Fetch all edge zones from SRM /node endpoint.""" + url = f"{self.base_url}/node" + try: + response = requests.get( + url, + headers=self._get_headers(), + verify=False, + timeout=10.0, # was missing + ) + response.raise_for_status() + nodes = response.json() + if not isinstance(nodes, list): + raise ValueError(f"Expected list, got {type(nodes).__name__}") + return nodes + except Timeout: + logger.error("Timeout fetching edge zones from SRM") + raise + except ConnectionError: + logger.error("Cannot connect to SRM to fetch edge zones") + raise + except requests.exceptions.HTTPError as e: + logger.error("SRM returned HTTP error fetching edge zones: %s", e) + raise +``` + +--- + +*This document should be reviewed alongside `OEG_ANALYSIS.md` (full implementation analysis) +and `COMPONENT_INTEGRATIONS.md` (component integration deep-dive). Together they form the +complete technical assessment of the Open Exposure Gateway.* diff --git a/docs/diagrams/api_map.png b/docs/diagrams/api_map.png new file mode 100644 index 0000000000000000000000000000000000000000..ba3d8f2106ca80dc428894a6f77edbe89f191de6 Binary files /dev/null and b/docs/diagrams/api_map.png differ diff --git a/docs/diagrams/architecture_diagram.png b/docs/diagrams/architecture_diagram.png new file mode 100644 index 0000000000000000000000000000000000000000..7c8e08af654062bf46f0e6b438d4ec8e6793d2c5 Binary files /dev/null and b/docs/diagrams/architecture_diagram.png differ diff --git a/docs/diagrams/sequence_app_federated.png b/docs/diagrams/sequence_app_federated.png new file mode 100644 index 0000000000000000000000000000000000000000..637645c3935f8d72248f24faf271f4b210814f54 Binary files /dev/null and b/docs/diagrams/sequence_app_federated.png differ diff --git a/docs/diagrams/sequence_app_local.png b/docs/diagrams/sequence_app_local.png new file mode 100644 index 0000000000000000000000000000000000000000..8b2386465a5b1f4ae4bd84c8ece2928fa41264b7 Binary files /dev/null and b/docs/diagrams/sequence_app_local.png differ diff --git a/docs/diagrams/sequence_federation.png b/docs/diagrams/sequence_federation.png new file mode 100644 index 0000000000000000000000000000000000000000..8ea5b78d643b0eddb4ad77ed2a2339749e8cead6 Binary files /dev/null and b/docs/diagrams/sequence_federation.png differ diff --git a/docs/diagrams/sequence_qod.png b/docs/diagrams/sequence_qod.png new file mode 100644 index 0000000000000000000000000000000000000000..f5041516106be315e3018e7c5d0430385086f6e0 Binary files /dev/null and b/docs/diagrams/sequence_qod.png differ diff --git a/edge_cloud_management_api/controllers/network_functions_controller.py b/edge_cloud_management_api/controllers/network_functions_controller.py index 965c6b98ad560870f66b171d4e6cd9902b1cb35d..87e88ef26a839c368cb3290c1ec95a9d42bdd001 100644 --- a/edge_cloud_management_api/controllers/network_functions_controller.py +++ b/edge_cloud_management_api/controllers/network_functions_controller.py @@ -139,3 +139,23 @@ def get_all_traffic_influence_resources(): jsonify({"error": "An unexpected error occurred", "details": str(e)}), 500, ) + +def retrieve_location(body: dict): + """ + Retrieve the location of a device via the CAMARA Location Retrieval API. + Forwards the request to the SRM which delegates to the configured network adapter. + """ + try: + pi_edge_factory = PiEdgeAPIClientFactory() + api_client = pi_edge_factory.create_pi_edge_api_client() + response = api_client.retrieve_location(body) + return response + + except ValidationError as e: + return jsonify({"error": "Invalid input", "details": e.errors()}), 400 + + except Exception as e: + return ( + jsonify({"error": "An unexpected error occurred", "details": str(e)}), + 500, + ) diff --git a/edge_cloud_management_api/services/edge_cloud_services.py b/edge_cloud_management_api/services/edge_cloud_services.py index 23219b45673b54020e683a31df00606a92c410c6..88358c8b51a3374ac06315e3357fadf91ec33eac 100644 --- a/edge_cloud_management_api/services/edge_cloud_services.py +++ b/edge_cloud_management_api/services/edge_cloud_services.py @@ -332,6 +332,24 @@ class PiEdgeAPIClient: elif response.status_code==500: return response.content + def retrieve_location(self, body: dict): + """ + Retrieve the location of a device via the CAMARA Location Retrieval API. + Forwards the request to the SRM's /location/retrieve endpoint. + """ + url = f"{self.base_url}/location/retrieve" + request_headers = self._get_headers() + response = requests.post(url, json=body, headers=request_headers, verify=False) + if response.status_code == 200: + return response.json() + else: + # Relay the SRM error response with its status code + try: + error_body = response.json() + except Exception: + error_body = {"error": response.text} + return error_body, response.status_code + class PiEdgeAPIClientFactory: """ Factory class to create instances of PiEdgeAPIClient. diff --git a/edge_cloud_management_api/specification/openapi.yaml b/edge_cloud_management_api/specification/openapi.yaml index f3656d6d2bf1e711c1bb6d31499fb7758b64d839..613263f5717df376db6b91cfa36995a75c1221b5 100644 --- a/edge_cloud_management_api/specification/openapi.yaml +++ b/edge_cloud_management_api/specification/openapi.yaml @@ -1417,6 +1417,93 @@ components: type: string accessTokenType: type: string + RetrievalLocationRequest: + type: object + properties: + device: + $ref: '#/components/schemas/Device' + maxAge: + type: integer + description: Maximum age of the location information accepted (in seconds). + example: 60 + maxSurface: + type: integer + description: Maximum surface in square meters accepted for the location retrieval. + minimum: 1 + example: 10000 + Device: + type: object + properties: + phoneNumber: + type: string + description: Phone number of the device in E.164 format. + example: "+123456789" + networkAccessIdentifier: + type: string + description: External identifier of the device (e.g. GPSI). + example: "device@testnet.net" + ipv4Address: + $ref: '#/components/schemas/DeviceIpv4Addr' + ipv6Address: + type: string + description: IPv6 address of the device. + example: "2001:db8::1" + DeviceIpv4Addr: + type: object + properties: + publicAddress: + type: string + example: "198.51.100.1" + privateAddress: + type: string + example: "10.0.0.1" + publicPort: + type: integer + example: 59765 + LocationResponse: + type: object + properties: + lastLocationTime: + type: string + format: date-time + description: Timestamp of the last known location. + example: "2024-06-01T12:00:00Z" + area: + $ref: '#/components/schemas/LocationArea' + LocationArea: + type: object + description: Geographic area of the location (Circle or Polygon). + properties: + areaType: + type: string + enum: + - CIRCLE + - POLYGON + example: CIRCLE + center: + $ref: '#/components/schemas/GeoPoint' + radius: + type: number + description: Radius in meters (when areaType is CIRCLE). + example: 800 + boundary: + type: array + description: List of points forming the polygon boundary (when areaType is POLYGON). + items: + $ref: '#/components/schemas/GeoPoint' + GeoPoint: + type: object + properties: + latitude: + type: number + minimum: -90 + maximum: 90 + example: 37.9553 + longitude: + type: number + minimum: -180 + maximum: 180 + example: 23.8522 AccessEndpoint: type: object description: | diff --git a/generate_diagrams.py b/generate_diagrams.py new file mode 100644 index 0000000000000000000000000000000000000000..57ec84a45f855c1abcc529dd154a2276c90dbd8a --- /dev/null +++ b/generate_diagrams.py @@ -0,0 +1,716 @@ +""" +Diagram generator for the Open Exposure Gateway (OEG) analysis. +Produces: + 1. architecture_diagram.png – system architecture + 2. sequence_app_local.png – app submit + local instantiation flow + 3. sequence_app_federated.png – federated instantiation flow + 4. sequence_federation.png – federation establishment flow + 5. sequence_qod.png – QoD session flow + 6. api_map.png – API endpoint map / structure +""" + +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt +import matplotlib.patches as mpatches +import matplotlib.patheffects as pe +from matplotlib.patches import FancyBboxPatch, FancyArrowPatch +import numpy as np + +OUT = "/home/dimi/oop/open-exposure-gateway/docs/diagrams" +import os +os.makedirs(OUT, exist_ok=True) + +# ───────────────────────────────────────────────────────────────── +# Colour palette +# ───────────────────────────────────────────────────────────────── +C = { + "bg": "#0F1117", + "panel": "#1A1F2E", + "border": "#2E3450", + "blue": "#4A90E2", + "cyan": "#00BCD4", + "green": "#4CAF50", + "orange": "#FF9800", + "red": "#F44336", + "purple": "#9C27B0", + "yellow": "#FFC107", + "teal": "#009688", + "white": "#E8EAF0", + "grey": "#78909C", + "darkblue": "#1565C0", + "lime": "#8BC34A", +} + + +def styled_box(ax, x, y, w, h, color, text, fontsize=9, text_color="#E8EAF0", + radius=0.015, alpha=0.92, bold=False, subtitle=None): + box = FancyBboxPatch((x - w/2, y - h/2), w, h, + boxstyle=f"round,pad=0.005,rounding_size={radius}", + facecolor=color, edgecolor=C["white"], + linewidth=1.2, alpha=alpha, zorder=3) + ax.add_patch(box) + weight = "bold" if bold else "normal" + ya = y if subtitle is None else y + h*0.12 + ax.text(x, ya, text, ha="center", va="center", + fontsize=fontsize, color=text_color, weight=weight, zorder=4) + if subtitle: + ax.text(x, y - h*0.18, subtitle, ha="center", va="center", + fontsize=fontsize - 1.5, color=C["grey"], zorder=4, style="italic") + + +def arrow(ax, x1, y1, x2, y2, color=C["cyan"], lw=1.5, label="", + label_color=None, style="->", offset=(0, 0.012), zorder=2): + ax.annotate("", xy=(x2, y2), xytext=(x1, y1), + arrowprops=dict(arrowstyle=style, color=color, + lw=lw, connectionstyle="arc3,rad=0.0"), + zorder=zorder) + if label: + mx, my = (x1 + x2) / 2 + offset[0], (y1 + y2) / 2 + offset[1] + lc = label_color or color + ax.text(mx, my, label, ha="center", va="bottom", + fontsize=7, color=lc, zorder=5, + bbox=dict(boxstyle="round,pad=0.15", fc=C["bg"], ec="none", alpha=0.7)) + + +# ══════════════════════════════════════════════════════════════════ +# 1. ARCHITECTURE DIAGRAM +# ══════════════════════════════════════════════════════════════════ +def draw_architecture(): + fig, ax = plt.subplots(figsize=(18, 12)) + fig.patch.set_facecolor(C["bg"]) + ax.set_facecolor(C["bg"]) + ax.set_xlim(0, 1) + ax.set_ylim(0, 1) + ax.axis("off") + + fig.text(0.5, 0.97, "Open Exposure Gateway — System Architecture", + ha="center", va="top", fontsize=16, color=C["white"], weight="bold") + fig.text(0.5, 0.94, "Current Implementation (Connexion/Flask) · CAMARA-compliant Northbound Gateway", + ha="center", va="top", fontsize=10, color=C["grey"]) + + # ── Zones ────────────────────────────────────────────────────── + def zone_rect(x, y, w, h, label, color): + rect = FancyBboxPatch((x, y), w, h, + boxstyle="round,pad=0.008", + facecolor=color, edgecolor=color, + linewidth=2, alpha=0.10, zorder=1) + ax.add_patch(rect) + ax.text(x + 0.01, y + h - 0.025, label, + fontsize=8, color=color, weight="bold", va="top", zorder=2) + + zone_rect(0.01, 0.67, 0.20, 0.24, "Application Providers / Clients", C["blue"]) + zone_rect(0.24, 0.05, 0.52, 0.86, "Operator Platform", C["green"]) + zone_rect(0.79, 0.30, 0.20, 0.40, "Partner OP", C["orange"]) + zone_rect(0.24, 0.05, 0.52, 0.25, "Infrastructure Layer", C["teal"]) + + # ── External clients ────────────────────────────────────────── + clients = [ + (0.11, 0.84, "App Provider\n(Developer)", C["blue"]), + (0.11, 0.74, "API Client\n(CAMARA)", C["blue"]), + ] + for x, y, t, c in clients: + styled_box(ax, x, y, 0.14, 0.055, c, t, fontsize=8, bold=False) + + # ── OEG ─────────────────────────────────────────────────────── + oeg_x, oeg_y = 0.50, 0.77 + styled_box(ax, oeg_x, oeg_y, 0.30, 0.13, C["darkblue"], + "Open Exposure Gateway (OEG)", fontsize=11, + bold=True, subtitle="Connexion/Flask · Port 8080") + + # OEG sub-modules + sub = [ + (0.38, 0.77, "App\nController", C["blue"]), + (0.46, 0.77, "EdgeCloud\nController", C["blue"]), + (0.54, 0.77, "Federation\nController", C["blue"]), + (0.62, 0.77, "Network Fn\nController", C["blue"]), + ] + for x, y, t, c in sub: + styled_box(ax, x, y, 0.075, 0.07, c, t, fontsize=7) + + # Ingress / Swagger + styled_box(ax, 0.50, 0.91, 0.28, 0.045, C["grey"], + "Traefik Ingress /oeg Swagger UI /docs", fontsize=8) + + # ── MongoDB (OEG) ───────────────────────────────────────────── + styled_box(ax, 0.50, 0.61, 0.18, 0.055, C["green"], + "MongoDB (OEG)", fontsize=9, bold=False, + subtitle="zones · federations") + + # ── SRM ─────────────────────────────────────────────────────── + srm_x, srm_y = 0.40, 0.32 + styled_box(ax, srm_x, srm_y, 0.22, 0.14, C["teal"], + "Service Resource Manager\n(SRM)", fontsize=9, bold=True) + + srm_subs = [ + (0.32, 0.32, "/serviceFunction\n(App Catalogue)", C["teal"]), + (0.40, 0.32, "/deployedService\nFunction", C["teal"]), + (0.48, 0.32, "/node\n(Edge Zones)", C["teal"]), + ] + for x, y, t, c in srm_subs: + styled_box(ax, x, y, 0.075, 0.06, c, t, fontsize=6.5) + + styled_box(ax, srm_x, 0.17, 0.18, 0.05, C["green"], + "MongoDB (SRM)", fontsize=9, subtitle="apps · nodes · instances") + + # ── Artefact Manager ────────────────────────────────────────── + styled_box(ax, 0.63, 0.32, 0.17, 0.08, C["teal"], + "Artefact Manager", fontsize=9, + subtitle="Helm / Container images") + + # ── Federation Manager ──────────────────────────────────────── + fm_x, fm_y = 0.50, 0.50 + styled_box(ax, fm_x, fm_y, 0.26, 0.07, C["purple"], + "Federation Manager (FM)", fontsize=9, bold=True, + subtitle="Keycloak OAuth2 · GSMA OPG APIs") + + styled_box(ax, 0.63, 0.17, 0.17, 0.05, C["green"], + "MongoDB (FM)", fontsize=9, subtitle="federations · zones") + + # ── Partner OP ──────────────────────────────────────────────── + styled_box(ax, 0.89, 0.60, 0.17, 0.07, C["orange"], + "Partner FM", fontsize=9, subtitle="/partner /artefact /lcm") + styled_box(ax, 0.89, 0.48, 0.17, 0.06, C["orange"], + "Partner SRM", fontsize=9) + styled_box(ax, 0.89, 0.36, 0.17, 0.06, C["orange"], + "Partner Infra", fontsize=9) + + # ── Infrastructure ──────────────────────────────────────────── + styled_box(ax, 0.35, 0.11, 0.14, 0.06, C["teal"], + "Kubernetes Adapter", fontsize=8) + styled_box(ax, 0.52, 0.11, 0.14, 0.06, C["teal"], + "VM / OpenStack\nAdapter", fontsize=8) + styled_box(ax, 0.69, 0.11, 0.12, 0.06, C["teal"], + "Edge Nodes", fontsize=8) + + # ── Arrows ──────────────────────────────────────────────────── + # Clients → OEG + arrow(ax, 0.18, 0.84, 0.355, 0.80, C["cyan"], 1.8, "CAMARA REST") + arrow(ax, 0.18, 0.74, 0.355, 0.76, C["cyan"], 1.8) + + # OEG → MongoDB + arrow(ax, 0.50, 0.71, 0.50, 0.638, C["green"], 1.5, "R/W zones &\nfederations", offset=(0.055, 0)) + + # OEG → SRM + arrow(ax, 0.435, 0.71, 0.40, 0.39, C["cyan"], 1.8, "HTTP REST\n/serviceFunction\n/deployedServiceFunction\n/node", offset=(0.07, 0)) + + # OEG → FM + arrow(ax, 0.50, 0.71, 0.50, 0.535, C["purple"], 1.8, "HTTP REST\n/partner /artefact", offset=(-0.08, 0)) + + # FM → Partner FM + arrow(ax, 0.63, 0.50, 0.80, 0.60, C["orange"], 1.8, "GSMA OPG\nFederation APIs", offset=(-0.01, 0.015)) + + # FM → MongoDB + arrow(ax, 0.63, 0.17, 0.63, 0.20, C["green"], 1.5) + + # SRM → MongoDB + arrow(ax, 0.40, 0.25, 0.40, 0.195, C["green"], 1.5) + + # SRM → Infrastructure + arrow(ax, 0.38, 0.25, 0.35, 0.14, C["teal"], 1.5, "kubectl\nAPI") + arrow(ax, 0.42, 0.25, 0.52, 0.14, C["teal"], 1.5) + arrow(ax, 0.50, 0.25, 0.69, 0.14, C["teal"], 1.5) + + # SRM → Artefact Mgr + arrow(ax, 0.51, 0.32, 0.545, 0.32, C["teal"], 1.5, "HTTP") + + # Partner chain + arrow(ax, 0.89, 0.57, 0.89, 0.51, C["orange"], 1.5) + arrow(ax, 0.89, 0.45, 0.89, 0.39, C["orange"], 1.5) + + # ── Legend ──────────────────────────────────────────────────── + legend_items = [ + (C["blue"], "API Layer (Controllers)"), + (C["teal"], "Infrastructure Layer (SRM)"), + (C["purple"], "Federation Layer (FM)"), + (C["orange"], "Partner OP"), + (C["green"], "Persistence (MongoDB)"), + (C["cyan"], "REST / HTTP call"), + ] + lx, ly = 0.02, 0.40 + for i, (col, label) in enumerate(legend_items): + ax.add_patch(plt.Rectangle((lx, ly - i*0.045), 0.018, 0.025, + facecolor=col, edgecolor="none", zorder=5)) + ax.text(lx + 0.025, ly - i*0.045 + 0.012, label, + fontsize=7.5, color=C["white"], va="center", zorder=5) + + fig.tight_layout(rect=[0, 0, 1, 0.93]) + fig.savefig(f"{OUT}/architecture_diagram.png", dpi=150, bbox_inches="tight", + facecolor=C["bg"]) + plt.close(fig) + print("✓ architecture_diagram.png") + + +# ══════════════════════════════════════════════════════════════════ +# Helper: sequence diagram renderer +# ══════════════════════════════════════════════════════════════════ +def sequence_diagram(title, subtitle, participants, messages, filename, + figsize=(16, 10)): + """ + participants: list of (label, color) + messages: list of (from_idx, to_idx, label, style, note) + style: 'sync' | 'async' | 'return' | 'self' | 'note' + """ + n = len(participants) + fig, ax = plt.subplots(figsize=figsize) + fig.patch.set_facecolor(C["bg"]) + ax.set_facecolor(C["bg"]) + ax.axis("off") + + fig.text(0.5, 0.98, title, ha="center", va="top", + fontsize=14, color=C["white"], weight="bold") + fig.text(0.5, 0.955, subtitle, ha="center", va="top", + fontsize=9, color=C["grey"]) + + MARGIN_TOP = 0.92 + MARGIN_BOT = 0.03 + LABEL_H = 0.055 + total_h = MARGIN_TOP - MARGIN_BOT - LABEL_H + step = total_h / (len(messages) + 1) + + col_x = np.linspace(0.07, 0.97, n) + ax.set_xlim(0, 1) + ax.set_ylim(0, 1) + + # participant boxes + for i, (label, color) in enumerate(participants): + styled_box(ax, col_x[i], MARGIN_TOP - LABEL_H/2, 0.12, LABEL_H, + color, label, fontsize=8, bold=True) + + # lifelines + for i in range(n): + ax.plot([col_x[i], col_x[i]], + [MARGIN_TOP - LABEL_H, MARGIN_BOT], + color=C["border"], lw=1.2, ls="--", zorder=1) + + # messages + for idx, msg in enumerate(messages): + y = MARGIN_TOP - LABEL_H - (idx + 1) * step + + if msg[3] == "note": + ni = msg[0] + ax.add_patch(FancyBboxPatch((col_x[ni] - 0.08, y - 0.018), + 0.16, 0.032, + boxstyle="round,pad=0.004", + facecolor=C["yellow"], alpha=0.18, + edgecolor=C["yellow"], lw=1, zorder=2)) + ax.text(col_x[ni], y - 0.002, msg[2], + ha="center", va="center", fontsize=7, + color=C["yellow"], zorder=3) + continue + + fi, ti = msg[0], msg[1] + x1, x2 = col_x[fi], col_x[ti] + + color = C["cyan"] + if msg[3] == "return": + color = C["grey"] + elif msg[3] == "async": + color = C["orange"] + elif msg[3] == "error": + color = C["red"] + + ls = "--" if msg[3] in ("return",) else "-" + + if fi == ti: # self-call + ax.annotate("", xy=(x1 + 0.04, y - 0.022), + xytext=(x1, y), + arrowprops=dict(arrowstyle="->", color=color, + lw=1.4, + connectionstyle="arc3,rad=-0.5"), + zorder=3) + else: + ax.annotate("", xy=(x2, y), xytext=(x1, y), + arrowprops=dict(arrowstyle="->", color=color, + lw=1.4, ls=ls, + connectionstyle="arc3,rad=0.0"), + zorder=3) + + lx = (x1 + x2) / 2 + lbl = msg[2] + ax.text(lx, y + 0.008, lbl, ha="center", va="bottom", + fontsize=7, color=color, zorder=4, + bbox=dict(boxstyle="round,pad=0.12", fc=C["bg"], + ec="none", alpha=0.8)) + + # step number + ax.text(0.005, y, str(idx + 1), ha="left", va="center", + fontsize=6.5, color=C["grey"], zorder=4) + + fig.savefig(f"{OUT}/{filename}", dpi=150, bbox_inches="tight", + facecolor=C["bg"]) + plt.close(fig) + print(f"✓ {filename}") + + +# ══════════════════════════════════════════════════════════════════ +# 2. APP SUBMIT + LOCAL INSTANTIATION +# ══════════════════════════════════════════════════════════════════ +def draw_seq_local(): + participants = [ + ("App Provider\n(Client)", C["blue"]), + ("OEG\nController", C["darkblue"]), + ("PiEdge\nAPI Client", C["cyan"]), + ("SRM\n/serviceFunction\n/node", C["teal"]), + ("MongoDB\n(OEG)", C["green"]), + ] + messages = [ + # Submit app + (0, 1, "POST /apps {AppManifest}", "sync", ""), + (1, 2, "create_pi_edge_api_client()", "sync", ""), + (2, 3, "POST /serviceFunction {body}", "sync", ""), + (3, 2, "200 OK {appId}", "return", ""), + (2, 1, "response.json()", "return", ""), + (1, 0, "201 {appId: uuid}", "return", ""), + + # Get zones + (0, 1, "GET /edge-cloud-zones", "sync", ""), + (1, 2, "create_pi_edge_api_client()", "sync", ""), + (2, 3, "GET /node", "sync", ""), + (3, 2, "200 [{edgeCloudZoneId, ...}]", "return", ""), + (2, 4, "insert_zones() [startup only]", "sync", ""), + (1, 0, "200 [{EdgeCloudZone}]", "return", ""), + + # Create local instance + (0, 1, "POST /appinstances {appId, appZones}", "sync", ""), + (1, 4, "get_zone(edgeCloudZoneId)", "sync", ""), + (4, 1, "zone {isLocal: 'true'}", "return", ""), + (1, 2, "create_pi_edge_api_client()", "sync", ""), + (2, 3, "POST /deployedServiceFunction {body}", "sync", ""), + (3, 2, "202 {appInstanceId, status}", "return", ""), + (1, 0, "202 {appInstances: [...]}", "return", ""), + ] + sequence_diagram( + "Sequence: Application Submit + Local Zone Instantiation", + "App Provider submits app metadata to OEG → SRM, then instantiates on a local edge zone", + participants, messages, "sequence_app_local.png", figsize=(18, 12) + ) + + +# ══════════════════════════════════════════════════════════════════ +# 3. FEDERATED INSTANTIATION +# ══════════════════════════════════════════════════════════════════ +def draw_seq_federated(): + participants = [ + ("App Provider\n(Client)", C["blue"]), + ("OEG\nController", C["darkblue"]), + ("MongoDB\n(OEG)", C["green"]), + ("FM Client", C["purple"]), + ("Federation\nManager", C["purple"]), + ("Partner\nOP FM", C["orange"]), + ] + messages = [ + (0, 1, "POST /appinstances {appId, appZones (partner zone)}", "sync", ""), + (1, 2, "get_zone(edgeCloudZoneId)", "sync", ""), + (2, 1, "zone {isLocal:'false', fedContextId}", "return", ""), + (1, 3, "get_app(appId) → SRM", "sync", ""), + (3, 1, "appManifest {name, version, repo...}", "return", ""), + + (1, 3, "Build GSMA artefact payload", "note", ""), + (1, 3, "create_artefact(artefact, fedContextId, token)", "sync", ""), + (3, 4, "POST /{fedContextId}/artefact", "sync", ""), + (4, 5, "Forward artefact to Partner OP", "async", ""), + (5, 4, "200 OK", "return", ""), + (4, 3, "200 OK {artefactId}", "return", ""), + + (1, 3, "Build GSMA onboard-app payload", "note", ""), + (1, 3, "onboard_application(fedContextId, onboard_app, token)", "sync", ""), + (3, 4, "POST /{fedContextId}/application/onboarding", "sync", ""), + (4, 5, "Forward to Partner OP", "async", ""), + (5, 4, "200 OK", "return", ""), + (4, 3, "200 OK", "return", ""), + + (1, 3, "Build GSMA deploy payload", "note", ""), + (1, 3, "deploy_app_partner(fedContextId, deploy_app, token)", "sync", ""), + (3, 4, "POST /{fedContextId}/application/lcm", "sync", ""), + (4, 5, "Deploy app at Partner OP", "async", ""), + (5, 4, "202 Accepted", "return", ""), + (4, 3, "raw response", "return", ""), + (3, 1, "raw response (status forwarded)", "return", ""), + (1, 0, "202 {result}", "return", ""), + ] + sequence_diagram( + "Sequence: Federated Application Instantiation", + "App Provider requests deployment to a partner operator's edge zone via GSMA OPG federation APIs", + participants, messages, "sequence_app_federated.png", figsize=(18, 14) + ) + + +# ══════════════════════════════════════════════════════════════════ +# 4. FEDERATION ESTABLISHMENT +# ══════════════════════════════════════════════════════════════════ +def draw_seq_federation(): + participants = [ + ("Operator\nAdmin", C["blue"]), + ("OEG\nFed Controller", C["darkblue"]), + ("Keycloak\n(Token EP)", C["red"]), + ("FM Client", C["purple"]), + ("Federation\nManager", C["purple"]), + ("Partner\nOP FM", C["orange"]), + ("MongoDB\n(OEG)", C["green"]), + ] + messages = [ + (0, 1, "POST /partner {origOPFederationId, partnerStatusLink, ...}", "sync", ""), + (1, 2, "POST /token {client_credentials, scope:fed-mgmt}", "sync", ""), + (2, 1, "200 {access_token}", "return", ""), + (1, 3, "post_partner(body, token)", "sync", ""), + (3, 4, "POST /partner (with Bearer + X-Partner-API-Root)", "sync", ""), + (4, 5, "Establish federation context", "async", ""), + (5, 4, "{federationContextId, offeredZones, endpoints}", "return", ""), + (4, 3, "200 {FederationResponseData}", "return", ""), + (3, 1, "response, 200", "return", ""), + (1, 6, "insert_zones(offeredAvailabilityZones)", "sync", ""), + (1, 6, "insert_federation({_id: fedContextId, token})", "sync", ""), + (1, 0, "200 {FederationResponseData}", "return", ""), + + (0, 1, "GET /{fedContextId}/partner", "sync", ""), + (1, 6, "get_fed(fedContextId)", "sync", ""), + (6, 1, "{token}", "return", ""), + (1, 3, "get_partner(fedContextId, token)", "sync", ""), + (3, 4, "GET /{fedContextId}/partner", "sync", ""), + (4, 3, "200 {zone details, network codes...}", "return", ""), + (1, 0, "200 {partner info}", "return", ""), + + (0, 1, "DELETE /{fedContextId}/partner", "sync", ""), + (1, 6, "get_fed(fedContextId)", "sync", ""), + (1, 3, "delete_partner(fedContextId, token)", "sync", ""), + (3, 4, "DELETE /{fedContextId}/partner", "sync", ""), + (4, 5, "Tear down federation", "async", ""), + (4, 3, "200", "return", ""), + (3, 1, "delete_fed + delete_partner_zones", "return", ""), + (1, 6, "delete_fed() + delete_partner_zones()", "sync", ""), + (1, 0, "200", "return", ""), + ] + sequence_diagram( + "Sequence: Federation Lifecycle (Establish · Query · Terminate)", + "Operator establishes uni-directional federation with partner OP, queries it, then tears it down", + participants, messages, "sequence_federation.png", figsize=(20, 15) + ) + + +# ══════════════════════════════════════════════════════════════════ +# 5. QoD SESSION FLOW +# ══════════════════════════════════════════════════════════════════ +def draw_seq_qod(): + participants = [ + ("App Client", C["blue"]), + ("OEG\nNetwork Fn\nController", C["darkblue"]), + ("PiEdge\nAPI Client", C["cyan"]), + ("SRM\n/sessions\n/traffic-influences", C["teal"]), + ] + messages = [ + (0, 1, "POST /sessions {device, qosProfile, duration, sink}", "sync", ""), + (1, 2, "create_pi_edge_api_client()", "sync", ""), + (2, 3, "POST /sessions {body}", "sync", ""), + (3, 2, "200 {sessionId, status}", "return", ""), + (2, 1, "response.json()", "return", ""), + (1, 0, "200 {sessionId}", "return", ""), + + (0, 1, "GET /sessions/{sessionId}", "sync", ""), + (1, 2, "get_qod_session(sessionId)", "sync", ""), + (2, 3, "GET /sessions/{sessionId}", "sync", ""), + (3, 2, "200 {session details}", "return", ""), + (1, 0, "200 {session details}", "return", ""), + + (0, 1, "POST /traffic-influences {appId, edgeCloudZoneId, filters}", "sync", ""), + (1, 2, "create_traffic_influence_resource(body)", "sync", ""), + (2, 3, "POST /traffic-influences {body}", "sync", ""), + (3, 2, "200 {id, status}", "return", ""), + (1, 0, "200 {resourceId}", "return", ""), + + (0, 1, "DELETE /sessions/{sessionId}", "sync", ""), + (1, 2, "delete_qod_session(sessionId)", "sync", ""), + (2, 3, "DELETE /sessions/{sessionId}", "sync", ""), + (3, 2, "200", "return", ""), + (1, 0, "200", "return", ""), + ] + sequence_diagram( + "Sequence: Quality on Demand (QoD) & Traffic Influence", + "App Client creates a QoD session, queries it, creates a Traffic Influence resource, then closes the session", + participants, messages, "sequence_qod.png", figsize=(16, 12) + ) + + +# ══════════════════════════════════════════════════════════════════ +# 6. API MAP +# ══════════════════════════════════════════════════════════════════ +def draw_api_map(): + fig, ax = plt.subplots(figsize=(18, 12)) + fig.patch.set_facecolor(C["bg"]) + ax.set_facecolor(C["bg"]) + ax.set_xlim(0, 1) + ax.set_ylim(0, 1) + ax.axis("off") + + fig.text(0.5, 0.975, "Open Exposure Gateway — API Endpoint Map", + ha="center", va="top", fontsize=15, color=C["white"], weight="bold") + fig.text(0.5, 0.950, "OpenAPI 3.0.3 · Base: /oeg/1.0.0 · Connexion/Flask backend", + ha="center", va="top", fontsize=9, color=C["grey"]) + + groups = [ + { + "title": "Application\nManagement", + "color": C["blue"], + "x": 0.13, "y": 0.82, + "endpoints": [ + ("POST", "/apps", "submitApp\n→ SRM /serviceFunction", C["green"]), + ("GET", "/apps", "getApps\n→ SRM /serviceFunction", C["green"]), + ("GET", "/apps/{appId}", "getApp\n→ SRM /serviceFunction/{id}", C["green"]), + ("DELETE", "/apps/{appId}", "deleteApp\n→ SRM /serviceFunction/{id}", C["red"]), + ("POST", "/appinstances", "createAppInstance\n→ SRM /deployedServiceFn\n or → FM (federated)", C["orange"]), + ("GET", "/appinstances", "getAppInstance\n→ SRM /deployedServiceFn", C["green"]), + ("DELETE", "/appinstances/{instanceId}", "deleteAppInstance\n→ SRM /deployedServiceFn/{id}", C["red"]), + ] + }, + { + "title": "Edge Cloud\nDiscovery", + "color": C["teal"], + "x": 0.38, "y": 0.82, + "endpoints": [ + ("GET", "/edge-cloud-zones", "getEdgeCloudZones\n→ SRM /node + FM partner zones", C["green"]), + ("GET", "/edge-cloud-zones/{zoneId}", "edgeCloudZoneDetails\n→ SRM /node/{id}", C["green"]), + ] + }, + { + "title": "Federation\nManagement", + "color": C["purple"], + "x": 0.62, "y": 0.82, + "endpoints": [ + ("POST", "/partner", "createFederation\n→ FM /partner\n[gets Keycloak token]", C["green"]), + ("GET", "/{fedContextId}/partner", "getFederation\n→ FM /{id}/partner", C["green"]), + ("DELETE", "/{fedContextId}/partner", "deleteFederation\n→ FM /{id}/partner\n[cleans MongoDB]", C["red"]), + ("GET", "/fed-context-id", "getFederationContextIds\n→ FM /fed-context-id", C["green"]), + ] + }, + { + "title": "Network\nFunctions", + "color": C["orange"], + "x": 0.875, "y": 0.82, + "endpoints": [ + ("POST", "/sessions", "createQodSession\n→ SRM /sessions", C["green"]), + ("GET", "/sessions/{sessionId}", "getQodSession\n→ SRM /sessions/{id}", C["green"]), + ("DELETE", "/sessions/{sessionId}", "deleteQodSession\n→ SRM /sessions/{id}", C["red"]), + ("POST", "/traffic-influences", "createTrafficInfluence\n→ SRM /traffic-influences", C["green"]), + ("GET", "/traffic-influences", "getAllTrafficInfluences\n→ SRM /traffic-influences", C["green"]), + ("GET", "/traffic-influences/{id}", "getTrafficInfluence\n→ SRM /traffic-influences/{id}", C["green"]), + ("DELETE", "/traffic-influences/{id}", "deleteTrafficInfluence\n→ SRM /traffic-influences/{id}", C["red"]), + ] + }, + ] + + METHOD_COLORS = { + "GET": C["green"], + "POST": C["blue"], + "DELETE": C["red"], + "PUT": C["orange"], + "PATCH": C["yellow"], + } + + for grp in groups: + gx, gy = grp["x"], grp["y"] + n_ep = len(grp["endpoints"]) + gh = 0.065 + n_ep * 0.082 + + # group header + styled_box(ax, gx, gy, 0.21, 0.055, grp["color"], + grp["title"], fontsize=10, bold=True) + + for i, (method, path, desc, _) in enumerate(grp["endpoints"]): + ey = gy - 0.055 - i * 0.085 + mc = METHOD_COLORS.get(method, C["grey"]) + + # method badge + ax.add_patch(FancyBboxPatch((gx - 0.105, ey - 0.025), 0.055, 0.040, + boxstyle="round,pad=0.004", + facecolor=mc, edgecolor="none", alpha=0.85, zorder=3)) + ax.text(gx - 0.0775, ey - 0.005, method, + ha="center", va="center", fontsize=7, color=C["white"], + weight="bold", zorder=4) + + # path + ax.text(gx - 0.040, ey + 0.008, path, + ha="left", va="center", fontsize=7.5, color=C["white"], zorder=4) + + # description / mapping + ax.text(gx - 0.040, ey - 0.014, desc, + ha="left", va="center", fontsize=6.5, color=C["grey"], + style="italic", zorder=4) + + # connector line + ax.plot([gx, gx], [gy - 0.028, ey], + color=grp["color"], lw=0.7, alpha=0.35, zorder=1) + + # ── Issues panel ────────────────────────────────────────────── + issues_title = "⚠ Known Issues in Current Implementation" + issues = [ + "• Security (OAuth/JWT) fully commented out — no auth enforcement", + "• Hardcoded Basic-auth credentials in federation_manager_controller.py", + "• No MongoDB connection pooling — new client per request in storage_service.py", + "• MongoManager class exists but is unused (duplicated logic)", + "• get_edge_cloud_zones() filter functions defined but never applied", + "• Module-level side effects: DB insert on startup in edge_cloud_controller.py", + "• Mixed response patterns: raw dicts, jsonify(), response objects", + "• SSL verify=False on all outbound HTTP calls", + "• print() debug statements in production code (create_app_instance)", + "• Federation controller functions not exposed in OpenAPI spec", + "• Inconsistent async handling — uv uvicorn running sync Flask", + "• No request ID propagation / distributed tracing", + "• get_app_instance() ignores appId / appInstanceId / region filters", + ] + px, py = 0.01, 0.35 + ax.add_patch(FancyBboxPatch((px - 0.005, py - len(issues)*0.028 - 0.01), + 0.49, len(issues)*0.028 + 0.045, + boxstyle="round,pad=0.01", + facecolor=C["red"], edgecolor=C["red"], + alpha=0.08, zorder=1)) + ax.text(px + 0.235, py + 0.025, issues_title, + ha="center", va="center", fontsize=9, color=C["red"], + weight="bold", zorder=2) + for i, line in enumerate(issues): + ax.text(px, py - i*0.028, line, + ha="left", va="top", fontsize=7.5, color=C["white"], zorder=2) + + # ── FastAPI migration hints ──────────────────────────────────── + hints_title = "✅ FastAPI Migration Improvements" + hints = [ + "• Replace Connexion + Flask with FastAPI + Pydantic v2 models", + "• Enable async routes — use httpx.AsyncClient for SRM/FM calls", + "• Use motor (async MongoDB driver) for all DB access", + "• Enforce OAuth2 / JWT via FastAPI Security dependencies", + "• Centralize MongoDB sessions with lifespan dependency injection", + "• Apply response_model= and status_code= on every route", + "• Add structured logging with correlation-ID middleware", + "• Separate router files per domain (apps, zones, federation, network)", + "• Add proper filtering in get_edge_cloud_zones()", + "• Replace print() with proper logger calls everywhere", + "• Add background tasks for async federation flows", + "• Enable SSL cert verification via CA bundle config", + ] + hx, hy = 0.52, 0.35 + ax.add_patch(FancyBboxPatch((hx - 0.005, hy - len(hints)*0.028 - 0.01), + 0.49, len(hints)*0.028 + 0.045, + boxstyle="round,pad=0.01", + facecolor=C["green"], edgecolor=C["green"], + alpha=0.08, zorder=1)) + ax.text(hx + 0.235, hy + 0.025, hints_title, + ha="center", va="center", fontsize=9, color=C["green"], + weight="bold", zorder=2) + for i, line in enumerate(hints): + ax.text(hx, hy - i*0.028, line, + ha="left", va="top", fontsize=7.5, color=C["white"], zorder=2) + + fig.savefig(f"{OUT}/api_map.png", dpi=150, bbox_inches="tight", + facecolor=C["bg"]) + plt.close(fig) + print("✓ api_map.png") + + +# ── Run all ─────────────────────────────────────────────────────── +if __name__ == "__main__": + draw_architecture() + draw_seq_local() + draw_seq_federated() + draw_seq_federation() + draw_seq_qod() + draw_api_map() + print("\nAll diagrams saved to:", OUT) diff --git a/helm/CHANGES.md b/helm/CHANGES.md new file mode 100644 index 0000000000000000000000000000000000000000..a88d6e9a701e06bf374276d599494ae4a9d07405 --- /dev/null +++ b/helm/CHANGES.md @@ -0,0 +1,85 @@ +# Helm Folder Restructuring — Change Log + +## Motivation + +The original helm folder had accumulated files over time that made it hard for a new person to understand what to run, where to configure things, and what was safe to ignore. The goals of this cleanup were: + +1. **One entry point** — a new developer should be able to deploy OOP by running exactly one script with no ambiguity. +2. **No secrets in git** — a Kubernetes service account token was hardcoded and committed in `values.yaml`, getting overwritten on every deploy via `sed -i`. +3. **Pure Helm chart** — a Helm chart directory should contain only Helm artifacts (templates, values, Chart.yaml). Scripts, makefiles, and documentation fragments inside a chart are non-standard and confusing. +4. **Environment-agnostic base config** — `values.yaml` was mixing kind-specific settings (NodePorts, hostPath mounts) with base configuration, making it hard to deploy anywhere other than kind without manual editing. +5. **Single source of truth for documentation** — there were five separate documentation files in parallel, all covering the same deployment steps in slightly different ways. + +--- + +## Changes + +### Deleted files + +| File | Reason | +|---|---| +| `oop-platform-chart/output.txt` | Debug artifact, not a Helm file | +| `oop-platform-chart/values.yaml.backup` | Backup artifact, should never be committed | +| `oop-platform-chart/QUICK_DEPLOY.md` | Duplicate of README content | +| `oop-platform-chart/QUICK_REFERENCE.txt` | Duplicate of README content | +| `oop-platform-chart/VERIFICATION.txt` | Duplicate of README content | +| `oop-platform-chart/README.md` | 537-line duplicate; useful parts consolidated into `helm/README.md` | +| `oop-platform-chart/charts/oeg/START_HERE.txt` | Non-Helm artifact inside a subchart | +| `oop-platform-chart/charts/oeg/Makefile` | Non-Helm artifact inside a subchart | +| `oop-platform-chart/deploy.sh` | Inner deploy script absorbed into the top-level `deploy-on-kind.sh` | +| `oop-platform-chart/values.kind.yaml` | Moved to `helm/` level — environment overrides belong with deployment tooling, not inside the chart | + +### Created files + +| File | Reason | +|---|---| +| `values.kind.yaml` | Kind-specific overrides for the OOP umbrella chart (SRM + OEG): NodePorts, hostPath mounts, storageClass. Lives at `helm/` level, next to the deploy script. | +| `values.fm.kind.yaml` | Kind-specific overrides for the Federation Manager subchart. Separate file required because the FM is installed as a standalone `helm install` directly from its subchart, where keys are at root level — not under a `federationManager:` wrapper as in the umbrella chart. | + +### Modified files + +#### `oop-platform-chart/values.yaml` +- Removed the hardcoded Kubernetes service account JWT token. It is now an empty placeholder (`kubernetesMasterToken: ""`). The token is generated fresh at deploy time and injected via `--set`, so it never touches git. +- Removed kind-specific settings (NodePorts, hostPath paths, `storageClass: manual`) — these moved to `values.kind.yaml`. +- Service types changed from `NodePort` to `ClusterIP` as the neutral default. + +#### `kind-oop-config.yaml` +- Added the missing `mongodb_fm` hostPath mount. The original only mounted `mongodb_srm` and `mongodb_oeg`, which meant Federation Manager's MongoDB had no persistent storage bound in the kind node. Without this, the FM MongoDB PersistentVolume would fail to bind on kind. + +#### `deploy-on-kind.sh` +- Absorbed the logic from the deleted `oop-platform-chart/deploy.sh` (namespace creation, service account setup, token generation, `helm install`). +- Now a self-contained single entry point: run it from the `helm/` directory and it handles everything end to end. +- Token is generated and passed as `--set srm.srmcontroller.env.kubernetesMasterToken="$TOKEN"` — never written to disk or to any tracked file. +- First `helm install` deploys SRM + OEG via the umbrella chart using `-f values.kind.yaml`, with `federationManager.enabled=false` to prevent the umbrella from also trying to deploy FM. +- Second `helm install` deploys Federation Manager as a standalone release from its subchart using `-f values.fm.kind.yaml` at the correct root-level key structure. +- Uses `SCRIPT_DIR` so the script works correctly regardless of which directory it is called from. +- Added `sudo ... || true` guards on storage directory creation so a sudo failure does not abort the script silently under `set -e`. +- Added Federation Manager uninstall command to the cleanup hints printed at the end. + +#### `README.md` +- Reduced from 253 lines to 116 lines. +- Removed references to the deleted files and the two-script workflow. +- Added a clear configuration table explaining the role of `values.yaml`, `values.kind.yaml`, and `values.fm.kind.yaml`. +- Added an upgrade command and a non-kind deployment example. + +--- + +## Resulting structure + +``` +helm/ +├── README.md # Single authoritative doc +├── CHANGES.md # This file +├── deploy-on-kind.sh # Single entry point for kind deployment +├── kind-oop-config.yaml # kind cluster definition +├── values.kind.yaml # kind overrides for SRM + OEG (umbrella chart) +├── values.fm.kind.yaml # kind overrides for Federation Manager (subchart) +└── oop-platform-chart/ # Pure Helm umbrella chart — no scripts, no env config + ├── Chart.yaml + ├── values.yaml # Base defaults — environment-agnostic + ├── .helmignore + └── charts/ + ├── srm/ + ├── oeg/ + └── federation-manager/ +``` diff --git a/helm/get_helm.sh b/helm/get_helm.sh new file mode 100755 index 0000000000000000000000000000000000000000..1c90bbad5bc978e187ee5ae59640442fa27c0dc4 --- /dev/null +++ b/helm/get_helm.sh @@ -0,0 +1,347 @@ +#!/usr/bin/env bash + +# Copyright The Helm Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# The install script is based off of the MIT-licensed script from glide, +# the package manager for Go: https://github.com/Masterminds/glide.sh/blob/master/get + +: ${BINARY_NAME:="helm"} +: ${USE_SUDO:="true"} +: ${DEBUG:="false"} +: ${VERIFY_CHECKSUM:="true"} +: ${VERIFY_SIGNATURES:="false"} +: ${HELM_INSTALL_DIR:="/usr/local/bin"} +: ${GPG_PUBRING:="pubring.kbx"} + +HAS_CURL="$(type "curl" &> /dev/null && echo true || echo false)" +HAS_WGET="$(type "wget" &> /dev/null && echo true || echo false)" +HAS_OPENSSL="$(type "openssl" &> /dev/null && echo true || echo false)" +HAS_GPG="$(type "gpg" &> /dev/null && echo true || echo false)" +HAS_GIT="$(type "git" &> /dev/null && echo true || echo false)" +HAS_TAR="$(type "tar" &> /dev/null && echo true || echo false)" + +# initArch discovers the architecture for this system. +initArch() { + ARCH=$(uname -m) + case $ARCH in + armv5*) ARCH="armv5";; + armv6*) ARCH="armv6";; + armv7*) ARCH="arm";; + aarch64) ARCH="arm64";; + x86) ARCH="386";; + x86_64) ARCH="amd64";; + i686) ARCH="386";; + i386) ARCH="386";; + esac +} + +# initOS discovers the operating system for this system. +initOS() { + OS=$(echo `uname`|tr '[:upper:]' '[:lower:]') + + case "$OS" in + # Minimalist GNU for Windows + mingw*|cygwin*) OS='windows';; + esac +} + +# runs the given command as root (detects if we are root already) +runAsRoot() { + if [ $EUID -ne 0 -a "$USE_SUDO" = "true" ]; then + sudo "${@}" + else + "${@}" + fi +} + +# verifySupported checks that the os/arch combination is supported for +# binary builds, as well whether or not necessary tools are present. +verifySupported() { + local supported="darwin-amd64\ndarwin-arm64\nlinux-386\nlinux-amd64\nlinux-arm\nlinux-arm64\nlinux-loong64\nlinux-ppc64le\nlinux-s390x\nlinux-riscv64\nwindows-amd64\nwindows-arm64" + if ! echo "${supported}" | grep -q "${OS}-${ARCH}"; then + echo "No prebuilt binary for ${OS}-${ARCH}." + echo "To build from source, go to https://github.com/helm/helm" + exit 1 + fi + + if [ "${HAS_CURL}" != "true" ] && [ "${HAS_WGET}" != "true" ]; then + echo "Either curl or wget is required" + exit 1 + fi + + if [ "${VERIFY_CHECKSUM}" == "true" ] && [ "${HAS_OPENSSL}" != "true" ]; then + echo "In order to verify checksum, openssl must first be installed." + echo "Please install openssl or set VERIFY_CHECKSUM=false in your environment." + exit 1 + fi + + if [ "${VERIFY_SIGNATURES}" == "true" ]; then + if [ "${HAS_GPG}" != "true" ]; then + echo "In order to verify signatures, gpg must first be installed." + echo "Please install gpg or set VERIFY_SIGNATURES=false in your environment." + exit 1 + fi + if [ "${OS}" != "linux" ]; then + echo "Signature verification is currently only supported on Linux." + echo "Please set VERIFY_SIGNATURES=false or verify the signatures manually." + exit 1 + fi + fi + + if [ "${HAS_GIT}" != "true" ]; then + echo "[WARNING] Could not find git. It is required for plugin installation." + fi + + if [ "${HAS_TAR}" != "true" ]; then + echo "[ERROR] Could not find tar. It is required to extract the helm binary archive." + exit 1 + fi +} + +# checkDesiredVersion checks if the desired version is available. +checkDesiredVersion() { + if [ "x$DESIRED_VERSION" == "x" ]; then + # Get tag from release URL + local latest_release_url="https://get.helm.sh/helm4-latest-version" + local latest_release_response="" + if [ "${HAS_CURL}" == "true" ]; then + latest_release_response=$( curl -L --silent --show-error --fail "$latest_release_url" 2>&1 || true ) + elif [ "${HAS_WGET}" == "true" ]; then + latest_release_response=$( wget "$latest_release_url" -q -O - 2>&1 || true ) + fi + TAG=$( echo "$latest_release_response" | grep '^v[0-9]' ) + if [ "x$TAG" == "x" ]; then + printf "Could not retrieve the latest release tag information from %s: %s\n" "${latest_release_url}" "${latest_release_response}" + exit 1 + fi + else + TAG=$DESIRED_VERSION + fi +} + +# checkHelmInstalledVersion checks which version of helm is installed and +# if it needs to be changed. +checkHelmInstalledVersion() { + if [[ -f "${HELM_INSTALL_DIR}/${BINARY_NAME}" ]]; then + local version=$("${HELM_INSTALL_DIR}/${BINARY_NAME}" version --template="{{ .Version }}") + if [[ "$version" == "$TAG" ]]; then + echo "Helm ${version} is already ${DESIRED_VERSION:-latest}" + return 0 + else + echo "Helm ${TAG} is available. Changing from version ${version}." + return 1 + fi + else + return 1 + fi +} + +# downloadFile downloads the latest binary package and also the checksum +# for that binary. +downloadFile() { + HELM_DIST="helm-$TAG-$OS-$ARCH.tar.gz" + DOWNLOAD_URL="https://get.helm.sh/$HELM_DIST" + CHECKSUM_URL="$DOWNLOAD_URL.sha256" + HELM_TMP_ROOT="$(mktemp -dt helm-installer-XXXXXX)" + HELM_TMP_FILE="$HELM_TMP_ROOT/$HELM_DIST" + HELM_SUM_FILE="$HELM_TMP_ROOT/$HELM_DIST.sha256" + echo "Downloading $DOWNLOAD_URL" + if [ "${HAS_CURL}" == "true" ]; then + curl -SsL "$CHECKSUM_URL" -o "$HELM_SUM_FILE" + curl -SsL "$DOWNLOAD_URL" -o "$HELM_TMP_FILE" + elif [ "${HAS_WGET}" == "true" ]; then + wget -q -O "$HELM_SUM_FILE" "$CHECKSUM_URL" + wget -q -O "$HELM_TMP_FILE" "$DOWNLOAD_URL" + fi +} + +# verifyFile verifies the SHA256 checksum of the binary package +# and the GPG signatures for both the package and checksum file +# (depending on settings in environment). +verifyFile() { + if [ "${VERIFY_CHECKSUM}" == "true" ]; then + verifyChecksum + fi + if [ "${VERIFY_SIGNATURES}" == "true" ]; then + verifySignatures + fi +} + +# installFile installs the Helm binary. +installFile() { + HELM_TMP="$HELM_TMP_ROOT/$BINARY_NAME" + mkdir -p "$HELM_TMP" + tar xf "$HELM_TMP_FILE" -C "$HELM_TMP" + HELM_TMP_BIN="$HELM_TMP/$OS-$ARCH/helm" + echo "Preparing to install $BINARY_NAME into ${HELM_INSTALL_DIR}" + runAsRoot cp "$HELM_TMP_BIN" "$HELM_INSTALL_DIR/$BINARY_NAME" + echo "$BINARY_NAME installed into $HELM_INSTALL_DIR/$BINARY_NAME" +} + +# verifyChecksum verifies the SHA256 checksum of the binary package. +verifyChecksum() { + printf "Verifying checksum... " + local sum=$(openssl sha1 -sha256 ${HELM_TMP_FILE} | awk '{print $2}') + local expected_sum=$(cat ${HELM_SUM_FILE}) + if [ "$sum" != "$expected_sum" ]; then + echo "SHA sum of ${HELM_TMP_FILE} does not match. Aborting." + exit 1 + fi + echo "Done." +} + +# verifySignatures obtains the latest KEYS file from GitHub main branch +# as well as the signature .asc files from the specific GitHub release, +# then verifies that the release artifacts were signed by a maintainer's key. +verifySignatures() { + printf "Verifying signatures... " + local keys_filename="KEYS" + local github_keys_url="https://raw.githubusercontent.com/helm/helm/main/${keys_filename}" + if [ "${HAS_CURL}" == "true" ]; then + curl -SsL "${github_keys_url}" -o "${HELM_TMP_ROOT}/${keys_filename}" + elif [ "${HAS_WGET}" == "true" ]; then + wget -q -O "${HELM_TMP_ROOT}/${keys_filename}" "${github_keys_url}" + fi + local gpg_keyring="${HELM_TMP_ROOT}/keyring.gpg" + local gpg_homedir="${HELM_TMP_ROOT}/gnupg" + mkdir -p -m 0700 "${gpg_homedir}" + local gpg_stderr_device="/dev/null" + if [ "${DEBUG}" == "true" ]; then + gpg_stderr_device="/dev/stderr" + fi + gpg --batch --quiet --homedir="${gpg_homedir}" --import "${HELM_TMP_ROOT}/${keys_filename}" 2> "${gpg_stderr_device}" + gpg --batch --no-default-keyring --keyring "${gpg_homedir}/${GPG_PUBRING}" --export > "${gpg_keyring}" + local github_release_url="https://github.com/helm/helm/releases/download/${TAG}" + if [ "${HAS_CURL}" == "true" ]; then + curl -SsL "${github_release_url}/helm-${TAG}-${OS}-${ARCH}.tar.gz.sha256.asc" -o "${HELM_TMP_ROOT}/helm-${TAG}-${OS}-${ARCH}.tar.gz.sha256.asc" + curl -SsL "${github_release_url}/helm-${TAG}-${OS}-${ARCH}.tar.gz.asc" -o "${HELM_TMP_ROOT}/helm-${TAG}-${OS}-${ARCH}.tar.gz.asc" + elif [ "${HAS_WGET}" == "true" ]; then + wget -q -O "${HELM_TMP_ROOT}/helm-${TAG}-${OS}-${ARCH}.tar.gz.sha256.asc" "${github_release_url}/helm-${TAG}-${OS}-${ARCH}.tar.gz.sha256.asc" + wget -q -O "${HELM_TMP_ROOT}/helm-${TAG}-${OS}-${ARCH}.tar.gz.asc" "${github_release_url}/helm-${TAG}-${OS}-${ARCH}.tar.gz.asc" + fi + local error_text="If you think this might be a potential security issue," + error_text="${error_text}\nplease see here: https://github.com/helm/community/blob/master/SECURITY.md" + local num_goodlines_sha=$(gpg --verify --keyring="${gpg_keyring}" --status-fd=1 "${HELM_TMP_ROOT}/helm-${TAG}-${OS}-${ARCH}.tar.gz.sha256.asc" 2> "${gpg_stderr_device}" | grep -c -E '^\[GNUPG:\] (GOODSIG|VALIDSIG)') + if [[ ${num_goodlines_sha} -lt 2 ]]; then + echo "Unable to verify the signature of helm-${TAG}-${OS}-${ARCH}.tar.gz.sha256!" + echo -e "${error_text}" + exit 1 + fi + local num_goodlines_tar=$(gpg --verify --keyring="${gpg_keyring}" --status-fd=1 "${HELM_TMP_ROOT}/helm-${TAG}-${OS}-${ARCH}.tar.gz.asc" 2> "${gpg_stderr_device}" | grep -c -E '^\[GNUPG:\] (GOODSIG|VALIDSIG)') + if [[ ${num_goodlines_tar} -lt 2 ]]; then + echo "Unable to verify the signature of helm-${TAG}-${OS}-${ARCH}.tar.gz!" + echo -e "${error_text}" + exit 1 + fi + echo "Done." +} + +# fail_trap is executed if an error occurs. +fail_trap() { + result=$? + if [ "$result" != "0" ]; then + if [[ -n "$INPUT_ARGUMENTS" ]]; then + echo "Failed to install $BINARY_NAME with the arguments provided: $INPUT_ARGUMENTS" + help + else + echo "Failed to install $BINARY_NAME" + fi + echo -e "\tFor support, go to https://github.com/helm/helm." + fi + cleanup + exit $result +} + +# testVersion tests the installed client to make sure it is working. +testVersion() { + set +e + HELM="$(command -v $BINARY_NAME)" + if [ "$?" = "1" ]; then + echo "$BINARY_NAME not found. Is $HELM_INSTALL_DIR on your "'$PATH?' + exit 1 + fi + set -e +} + +# help provides possible cli installation arguments +help () { + echo "Accepted cli arguments are:" + echo -e "\t[--help|-h ] ->> prints this help" + echo -e "\t[--version|-v ] . When not defined it fetches the latest release tag from the Helm CDN" + echo -e "\te.g. --version v4.0.0 or -v canary" + echo -e "\t[--no-sudo] ->> install without sudo" +} + +# cleanup temporary files to avoid https://github.com/helm/helm/issues/2977 +cleanup() { + if [[ -d "${HELM_TMP_ROOT:-}" ]]; then + rm -rf "$HELM_TMP_ROOT" + fi +} + +# Execution + +#Stop execution on any error +trap "fail_trap" EXIT +set -e + +# Set debug if desired +if [ "${DEBUG}" == "true" ]; then + set -x +fi + +# Parsing input arguments (if any) +export INPUT_ARGUMENTS="${@}" +set -u +while [[ $# -gt 0 ]]; do + case $1 in + '--version'|-v) + shift + if [[ $# -ne 0 ]]; then + export DESIRED_VERSION="${1}" + if [[ "$1" != "v"* ]]; then + echo "Expected version arg ('${DESIRED_VERSION}') to begin with 'v', fixing..." + export DESIRED_VERSION="v${1}" + fi + else + echo -e "Please provide the desired version. e.g. --version v4.0.0 or -v canary" + exit 0 + fi + ;; + '--no-sudo') + USE_SUDO="false" + ;; + '--help'|-h) + help + exit 0 + ;; + *) exit 1 + ;; + esac + shift +done +set +u + +initArch +initOS +verifySupported +checkDesiredVersion +if ! checkHelmInstalledVersion; then + downloadFile + verifyFile + installFile +fi +testVersion +cleanup diff --git a/helm/kubectl b/helm/kubectl new file mode 100644 index 0000000000000000000000000000000000000000..ff031a1dc3c62453a59e37a3d3791d5a023692e7 Binary files /dev/null and b/helm/kubectl differ diff --git a/helm/kubectl.sha256 b/helm/kubectl.sha256 new file mode 100644 index 0000000000000000000000000000000000000000..958c55d90a6ecaf459d0f7c201394fe64b4767c2 --- /dev/null +++ b/helm/kubectl.sha256 @@ -0,0 +1 @@ +a2e984a18a0c063279d692533031c1eff93a262afcc0afdc517375432d060989 \ No newline at end of file diff --git a/helm/values.fm.kind.yaml b/helm/values.fm.kind.yaml new file mode 100644 index 0000000000000000000000000000000000000000..189cde663651d24376a26f281c49c817f6c6c80d --- /dev/null +++ b/helm/values.fm.kind.yaml @@ -0,0 +1,25 @@ +# ==================================================================== +# Kind-specific overrides for the Federation Manager subchart +# ==================================================================== +# Used by deploy-on-kind.sh: +# helm install federation-manager ./oop-platform-chart/charts/federation-manager \ +# -f values.fm.kind.yaml +# +# Note: keys are at root level because this targets the subchart directly, +# not through the umbrella chart (where they would be under federationManager:). +# ==================================================================== + +mongodb: + persistence: + storageClass: manual + hostPath: /mnt/data/mongodb_fm + +keycloak: + service: + type: NodePort + nodePort: 30081 + +federationManager: + service: + type: NodePort + nodePort: 30989 diff --git a/helm/values.kind.yaml b/helm/values.kind.yaml new file mode 100644 index 0000000000000000000000000000000000000000..20920ea14e6f16a1021e8c87e78917c36d338a10 --- /dev/null +++ b/helm/values.kind.yaml @@ -0,0 +1,38 @@ +# ==================================================================== +# Kind-specific overrides for the OOP umbrella chart (SRM + OEG) +# ==================================================================== +# Used by deploy-on-kind.sh: +# helm install oop-platform ./oop-platform-chart -f values.kind.yaml +# +# For other environments supply your own values file. +# ==================================================================== + +srm: + mongodb: + persistence: + storageClass: manual + hostPath: + enabled: true + path: /mnt/data/mongodb_srm + createPV: true + srmcontroller: + service: + type: NodePort + nodePort: 32415 + artifactManager: + service: + type: NodePort + nodePort: 30080 + +oeg: + mongodb: + persistence: + storageClass: manual + hostPath: + enabled: true + path: /mnt/data/mongodb_oeg + createPV: true + oegcontroller: + service: + type: NodePort + nodePort: 32263 diff --git a/tests/unit/controllers/test_network_functions_controller.py b/tests/unit/controllers/test_network_functions_controller.py new file mode 100644 index 0000000000000000000000000000000000000000..1ad0683795fc9ff89a16a9776fe2d186a84484c5 --- /dev/null +++ b/tests/unit/controllers/test_network_functions_controller.py @@ -0,0 +1,99 @@ +import pytest +from unittest.mock import patch, MagicMock +from flask import Flask +from edge_cloud_management_api.controllers import network_functions_controller + + +@pytest.fixture +def test_app(): + app = Flask(__name__) + app.config["TESTING"] = True + with app.app_context(): + yield app + + +SAMPLE_LOCATION_REQUEST = { + "device": {"phoneNumber": "+123456789"}, + "maxAge": 60, +} + +SAMPLE_LOCATION_RESPONSE = { + "lastLocationTime": "2024-06-01T12:00:00Z", + "area": { + "areaType": "CIRCLE", + "center": {"latitude": 37.9553, "longitude": 23.8522}, + "radius": 800, + }, +} + + +@pytest.mark.unit +@patch("edge_cloud_management_api.controllers.network_functions_controller.PiEdgeAPIClientFactory") +def test_retrieve_location_success(mock_factory_class, test_app: Flask): + """Successful location retrieval returns the SRM response as-is.""" + mock_client = MagicMock() + mock_client.retrieve_location.return_value = SAMPLE_LOCATION_RESPONSE + mock_factory_class.return_value.create_pi_edge_api_client.return_value = mock_client + + with test_app.test_request_context(): + result = network_functions_controller.retrieve_location(SAMPLE_LOCATION_REQUEST) + + assert result == SAMPLE_LOCATION_RESPONSE + mock_client.retrieve_location.assert_called_once_with(SAMPLE_LOCATION_REQUEST) + + +@pytest.mark.unit +@patch("edge_cloud_management_api.controllers.network_functions_controller.PiEdgeAPIClientFactory") +def test_retrieve_location_srm_error_is_relayed(mock_factory_class, test_app: Flask): + """When the SRM returns an error tuple (body, status), the controller relays it.""" + error_body = {"error": "Device not found"} + mock_client = MagicMock() + mock_client.retrieve_location.return_value = (error_body, 404) + mock_factory_class.return_value.create_pi_edge_api_client.return_value = mock_client + + with test_app.test_request_context(): + result = network_functions_controller.retrieve_location(SAMPLE_LOCATION_REQUEST) + + assert result == (error_body, 404) + + +@pytest.mark.unit +@patch("edge_cloud_management_api.controllers.network_functions_controller.PiEdgeAPIClientFactory") +def test_retrieve_location_connection_error(mock_factory_class, test_app: Flask): + """When the SRM is unreachable, the controller returns a 500 error.""" + mock_client = MagicMock() + mock_client.retrieve_location.side_effect = ConnectionError("Connection refused") + mock_factory_class.return_value.create_pi_edge_api_client.return_value = mock_client + + with test_app.test_request_context(): + response, status = network_functions_controller.retrieve_location(SAMPLE_LOCATION_REQUEST) + + assert status == 500 + data = response.get_json() + assert data["error"] == "An unexpected error occurred" + assert "Connection refused" in data["details"] + + +@pytest.mark.unit +@patch("edge_cloud_management_api.controllers.network_functions_controller.PiEdgeAPIClientFactory") +def test_retrieve_location_with_ipv4(mock_factory_class, test_app: Flask): + """Location retrieval works with IPv4 device identifier.""" + ipv4_request = { + "device": { + "ipv4Address": { + "publicAddress": "198.51.100.1", + "publicPort": 59765, + } + }, + "maxAge": 120, + "maxSurface": 10000, + } + mock_client = MagicMock() + mock_client.retrieve_location.return_value = SAMPLE_LOCATION_RESPONSE + mock_factory_class.return_value.create_pi_edge_api_client.return_value = mock_client + + with test_app.test_request_context(): + result = network_functions_controller.retrieve_location(ipv4_request) + + assert result == SAMPLE_LOCATION_RESPONSE + mock_client.retrieve_location.assert_called_once_with(ipv4_request)