From 4e6da4e81a8a4ab61db85f25c63db98ca64f85ff Mon Sep 17 00:00:00 2001 From: CodeMaster4711 Date: Tue, 24 Mar 2026 20:43:03 +0100 Subject: [PATCH 01/16] fix: nix container version --- nixos-node/modules/server-configuration.nix | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/nixos-node/modules/server-configuration.nix b/nixos-node/modules/server-configuration.nix index e50c760..187728d 100644 --- a/nixos-node/modules/server-configuration.nix +++ b/nixos-node/modules/server-configuration.nix @@ -236,7 +236,7 @@ services: restart: unless-stopped api-gateway: - image: ghcr.io/csfx-cloud/csf-ce-api-gateway:0.2.2-alpha.42 + image: ghcr.io/csfx-cloud/csf-ce-api-gateway:0.2.2-alpha.44 container_name: csf-api-gateway env_file: - /etc/csf-core/gateway.env @@ -266,7 +266,7 @@ services: start_period: 30s registry: - image: ghcr.io/csfx-cloud/csf-ce-registry:0.2.2-alpha.42 + image: ghcr.io/csfx-cloud/csf-ce-registry:0.2.2-alpha.44 container_name: csf-registry environment: DATABASE_URL: postgres://csf:csfpassword@patroni:5432/csf_core @@ -282,7 +282,7 @@ services: restart: unless-stopped scheduler: - image: ghcr.io/csfx-cloud/csf-ce-scheduler:0.2.2-alpha.42 + image: ghcr.io/csfx-cloud/csf-ce-scheduler:0.2.2-alpha.44 container_name: csf-scheduler environment: DATABASE_URL: postgres://csf:csfpassword@patroni:5432/csf_core @@ -297,7 +297,7 @@ services: restart: unless-stopped volume-manager: - image: ghcr.io/csfx-cloud/csf-ce-volume-manager:0.2.2-alpha.42 + image: ghcr.io/csfx-cloud/csf-ce-volume-manager:0.2.2-alpha.44 container_name: csf-volume-manager environment: DATABASE_URL: postgres://csf:csfpassword@patroni:5432/csf_core @@ -314,7 +314,7 @@ services: restart: unless-stopped failover-controller: - image: ghcr.io/csfx-cloud/csf-ce-failover-controller:0.2.2-alpha.42 + image: ghcr.io/csfx-cloud/csf-ce-failover-controller:0.2.2-alpha.44 container_name: csf-failover-controller environment: DATABASE_URL: postgres://csf:csfpassword@patroni:5432/csf_core @@ -330,7 +330,7 @@ services: restart: unless-stopped sdn-controller: - image: ghcr.io/csfx-cloud/csf-ce-sdn-controller:0.2.2-alpha.42 + image: ghcr.io/csfx-cloud/csf-ce-sdn-controller:0.2.2-alpha.44 container_name: csf-sdn-controller environment: DATABASE_URL: postgres://csf:csfpassword@patroni:5432/csf_core From a6b0bbe86e6de90e9bcab2a1df9a4bed74b33e55 Mon Sep 17 00:00:00 2001 From: CodeMaster4711 Date: Tue, 24 Mar 2026 20:51:55 +0100 Subject: [PATCH 02/16] fix: agent error --- nixos-node/modules/csf-daemon.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/nixos-node/modules/csf-daemon.nix b/nixos-node/modules/csf-daemon.nix index 22f0626..2e7605b 100644 --- a/nixos-node/modules/csf-daemon.nix +++ b/nixos-node/modules/csf-daemon.nix @@ -98,6 +98,7 @@ in RestrictRealtime = true; SystemCallFilter = "@system-service"; ReadWritePaths = [ "/var/lib/csf-daemon" ]; + ReadOnlyPaths = [ "/var/lib/csf-updater/bin" ]; }; }; }; From bbb8694fc2c7a3a065c5a08af073d2009b8a9fd4 Mon Sep 17 00:00:00 2001 From: CodeMaster4711 Date: Tue, 24 Mar 2026 21:14:05 +0100 Subject: [PATCH 03/16] fix: updater error with images and pull --- control-plane/csf-updater/src/verify.rs | 12 ++++++++++++ nixos-node/modules/server-configuration.nix | 6 +++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/control-plane/csf-updater/src/verify.rs b/control-plane/csf-updater/src/verify.rs index c074b47..5fae4e2 100644 --- a/control-plane/csf-updater/src/verify.rs +++ b/control-plane/csf-updater/src/verify.rs @@ -81,6 +81,18 @@ async fn remote_digest(client: &reqwest::Client, image: &str, tag: &str, ghcr_au } fn local_digest(image: &str) -> Result { + let pull = std::process::Command::new("docker") + .args(["pull", "--quiet", image]) + .output()?; + + if !pull.status.success() { + bail!( + "docker pull failed for {}: {}", + image, + String::from_utf8_lossy(&pull.stderr).trim() + ); + } + let output = std::process::Command::new("docker") .args(["image", "inspect", "--format", "{{json .RepoDigests}}", image]) .output()?; diff --git a/nixos-node/modules/server-configuration.nix b/nixos-node/modules/server-configuration.nix index 187728d..56a3153 100644 --- a/nixos-node/modules/server-configuration.nix +++ b/nixos-node/modules/server-configuration.nix @@ -102,6 +102,10 @@ in }; users.groups.csf-updater = {}; + systemd.tmpfiles.rules = [ + "d /var/lib/csf-updater 0710 csf-updater csf-daemon -" + ]; + systemd.services.csf-updater = { description = "CSF Control Plane Updater"; after = [ "docker.service" "network-online.target" "csf-control-plane.service" ]; @@ -158,7 +162,7 @@ in system.activationScripts.csf-binaries = { text = '' mkdir -p ${binDir} - chown csf-updater:csf-updater ${binDir} + chown csf-updater:csf-daemon ${binDir} chmod 750 ${binDir} if [ ! -f ${binDir}/csf-updater ]; then cp ${csfUpdaterBin}/bin/csf-updater ${binDir}/csf-updater From cd9b47ce0cbfb59292ba28fdfc173c5c0f2e9914 Mon Sep 17 00:00:00 2001 From: CodeMaster4711 Date: Tue, 24 Mar 2026 21:32:05 +0100 Subject: [PATCH 04/16] fix: image version --- nixos-node/modules/server-configuration.nix | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/nixos-node/modules/server-configuration.nix b/nixos-node/modules/server-configuration.nix index 56a3153..904afc6 100644 --- a/nixos-node/modules/server-configuration.nix +++ b/nixos-node/modules/server-configuration.nix @@ -240,7 +240,7 @@ services: restart: unless-stopped api-gateway: - image: ghcr.io/csfx-cloud/csf-ce-api-gateway:0.2.2-alpha.44 + image: ghcr.io/csfx-cloud/csf-ce-api-gateway:0.2.2-alpha.47 container_name: csf-api-gateway env_file: - /etc/csf-core/gateway.env @@ -270,7 +270,7 @@ services: start_period: 30s registry: - image: ghcr.io/csfx-cloud/csf-ce-registry:0.2.2-alpha.44 + image: ghcr.io/csfx-cloud/csf-ce-registry:0.2.2-alpha.47 container_name: csf-registry environment: DATABASE_URL: postgres://csf:csfpassword@patroni:5432/csf_core @@ -286,7 +286,7 @@ services: restart: unless-stopped scheduler: - image: ghcr.io/csfx-cloud/csf-ce-scheduler:0.2.2-alpha.44 + image: ghcr.io/csfx-cloud/csf-ce-scheduler:0.2.2-alpha.47 container_name: csf-scheduler environment: DATABASE_URL: postgres://csf:csfpassword@patroni:5432/csf_core @@ -301,7 +301,7 @@ services: restart: unless-stopped volume-manager: - image: ghcr.io/csfx-cloud/csf-ce-volume-manager:0.2.2-alpha.44 + image: ghcr.io/csfx-cloud/csf-ce-volume-manager:0.2.2-alpha.47 container_name: csf-volume-manager environment: DATABASE_URL: postgres://csf:csfpassword@patroni:5432/csf_core @@ -318,7 +318,7 @@ services: restart: unless-stopped failover-controller: - image: ghcr.io/csfx-cloud/csf-ce-failover-controller:0.2.2-alpha.44 + image: ghcr.io/csfx-cloud/csf-ce-failover-controller:0.2.2-alpha.47 container_name: csf-failover-controller environment: DATABASE_URL: postgres://csf:csfpassword@patroni:5432/csf_core @@ -334,7 +334,7 @@ services: restart: unless-stopped sdn-controller: - image: ghcr.io/csfx-cloud/csf-ce-sdn-controller:0.2.2-alpha.44 + image: ghcr.io/csfx-cloud/csf-ce-sdn-controller:0.2.2-alpha.47 container_name: csf-sdn-controller environment: DATABASE_URL: postgres://csf:csfpassword@patroni:5432/csf_core From 13d209faf3ee8d6aeb859afa007cd648d6c6cc69 Mon Sep 17 00:00:00 2001 From: CodeMaster4711 Date: Mon, 6 Apr 2026 19:46:20 +0200 Subject: [PATCH 05/16] refactor: migrate csf-updater and api-gateway update routes to gitops flake-rev model --- Cargo.lock | 3 + .../api-gateway/src/routes/update.rs | 167 ++-------- control-plane/csf-updater/src/config.rs | 17 +- control-plane/csf-updater/src/etcd.rs | 6 +- control-plane/csf-updater/src/main.rs | 28 +- control-plane/csf-updater/src/secret.rs | 26 -- control-plane/csf-updater/src/updater.rs | 190 ++--------- control-plane/csf-updater/src/verify.rs | 112 ------- docs/UPDATER_PLAN.md | 305 ++++++++++++++++++ 9 files changed, 368 insertions(+), 486 deletions(-) delete mode 100644 control-plane/csf-updater/src/secret.rs delete mode 100644 control-plane/csf-updater/src/verify.rs create mode 100644 docs/UPDATER_PLAN.md diff --git a/Cargo.lock b/Cargo.lock index 7f3eebb..dec81df 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1136,11 +1136,14 @@ dependencies = [ "aes-gcm", "anyhow", "base64 0.22.1", + "bytes", "dotenvy", "etcd-client", + "hex", "reqwest 0.11.27", "serde", "serde_json", + "sha2", "tempfile", "tokio", "tracing", diff --git a/control-plane/api-gateway/src/routes/update.rs b/control-plane/api-gateway/src/routes/update.rs index 44db094..274d357 100644 --- a/control-plane/api-gateway/src/routes/update.rs +++ b/control-plane/api-gateway/src/routes/update.rs @@ -3,40 +3,32 @@ use etcd_client::Client; use serde::{Deserialize, Serialize}; use std::env; -use crate::auth::crypto::{decrypt_secret, encrypt_secret}; use crate::auth::rbac::CanManageSystem; use crate::AppState; -const ETCD_DESIRED_VERSION_KEY: &str = "/csf/config/desired_cp_version"; -const ETCD_UPDATE_RESULT_KEY: &str = "/csf/config/last_update_result"; -const ETCD_GHCR_TOKEN_KEY: &str = "/csf/config/ghcr_token"; +const ETCD_DESIRED_FLAKE_REV_KEY: &str = "/csf/config/desired_flake_rev"; +const ETCD_BUILD_STATUS_KEY: &str = "/csf/config/cp_build_status"; +const ETCD_RESULT_KEY: &str = "/csf/config/last_build_result"; const ETCD_PAUSED_KEY: &str = "/csf/config/update_paused"; #[derive(Debug, Deserialize)] pub struct UpdateRequest { - pub version: String, + pub flake_rev: String, } #[derive(Debug, Serialize)] pub struct UpdateResponse { pub status: String, - pub version: String, + pub flake_rev: String, } #[derive(Debug, Serialize)] pub struct UpdateStatusResponse { pub current_version: String, - pub desired_version: Option, + pub desired_flake_rev: Option, + pub build_status: Option, pub last_result: Option, pub paused: bool, - pub agent_version: Option, - pub updater_version: Option, -} - -#[derive(Debug, Deserialize)] -pub struct GhcrTokenRequest { - pub token: String, - pub username: String, } pub fn routes() -> Router { @@ -45,7 +37,6 @@ pub fn routes() -> Router { .route("/system/update/status", get(update_status)) .route("/system/update/pause", post(pause_updates)) .route("/system/update/resume", post(resume_updates)) - .route("/system/ghcr-token", post(set_ghcr_token)) } async fn etcd_client() -> Result { @@ -64,35 +55,25 @@ async fn trigger_update( State(_state): State, Json(req): Json, ) -> Result, StatusCode> { - if !is_valid_semver(&req.version) { + if !is_valid_sha(&req.flake_rev) { return Err(StatusCode::UNPROCESSABLE_ENTITY); } let mut client = etcd_client().await?; client - .put(ETCD_DESIRED_VERSION_KEY, req.version.as_bytes(), None) - .await - .map_err(|e| { - tracing::error!(error = %e, "failed to write desired version to etcd"); - StatusCode::INTERNAL_SERVER_ERROR - })?; - - client - .put(ETCD_UPDATE_RESULT_KEY, b"in_progress", None) + .put(ETCD_DESIRED_FLAKE_REV_KEY, req.flake_rev.as_bytes(), None) .await .map_err(|e| { - tracing::error!(error = %e, "failed to write update result to etcd"); + tracing::error!(error = %e, "failed to write desired flake rev to etcd"); StatusCode::INTERNAL_SERVER_ERROR })?; - tracing::info!(version = %req.version, "update requested"); - - spawn_update(req.version.clone()); + tracing::info!(flake_rev = %req.flake_rev, "update requested"); Ok(Json(UpdateResponse { status: "update_scheduled".to_string(), - version: req.version, + flake_rev: req.flake_rev, })) } @@ -102,34 +83,20 @@ async fn update_status( ) -> Result, StatusCode> { let mut client = etcd_client().await?; - let desired = etcd_get(&mut client, ETCD_DESIRED_VERSION_KEY).await?; - let last_result = etcd_get(&mut client, ETCD_UPDATE_RESULT_KEY).await?; + let desired_flake_rev = etcd_get(&mut client, ETCD_DESIRED_FLAKE_REV_KEY).await?; + let build_status = etcd_get(&mut client, ETCD_BUILD_STATUS_KEY).await?; + let last_result = etcd_get(&mut client, ETCD_RESULT_KEY).await?; let paused = etcd_get(&mut client, ETCD_PAUSED_KEY).await?.as_deref() == Some("true"); - let binary_dir = env::var("BINARY_DIR").unwrap_or_else(|_| "/usr/local/bin".to_string()); - let agent_version = binary_version(&format!("{}/csf-agent", binary_dir)).await; - let updater_version = binary_version(&format!("{}/csf-updater", binary_dir)).await; - Ok(Json(UpdateStatusResponse { current_version: env!("CARGO_PKG_VERSION").to_string(), - desired_version: desired, + desired_flake_rev, + build_status, last_result, paused, - agent_version, - updater_version, })) } -async fn binary_version(path: &str) -> Option { - let output = tokio::process::Command::new(path) - .arg("--version") - .output() - .await - .ok()?; - let raw = String::from_utf8(output.stdout).ok()?; - raw.split_whitespace().last().map(|s| s.trim().to_string()) -} - async fn etcd_get(client: &mut Client, key: &str) -> Result, StatusCode> { let resp = client.get(key, None).await.map_err(|e| { tracing::error!(error = %e, key = key, "failed to read from etcd"); @@ -143,71 +110,8 @@ async fn etcd_get(client: &mut Client, key: &str) -> Result, Stat .map(|s| s.to_string())) } -fn is_valid_semver(version: &str) -> bool { - let v = version.strip_prefix('v').unwrap_or(version); - let (base, _pre) = match v.split_once('-') { - Some((b, p)) => (b, Some(p)), - None => (v, None), - }; - let parts: Vec<&str> = base.split('.').collect(); - parts.len() == 3 && parts.iter().all(|p| p.parse::().is_ok()) -} - -fn spawn_update(version: String) { - tokio::spawn(async move { - if let Err(e) = run_update(&version).await { - tracing::error!(error = %e, version = %version, "update failed"); - write_result("failed").await; - } else { - tracing::info!(version = %version, "update completed"); - write_result("success").await; - } - }); -} - -async fn run_update(version: &str) -> Result<(), String> { - let compose_file = env::var("COMPOSE_FILE") - .unwrap_or_else(|_| "docker-compose.prod.yml".to_string()); - let ghcr_org = env::var("GHCR_ORG").map_err(|_| "GHCR_ORG not set".to_string())?; - - pull_images(&compose_file, &ghcr_org, version).await?; - restart_services(&compose_file, &ghcr_org, version).await -} - -async fn pull_images(compose_file: &str, ghcr_org: &str, version: &str) -> Result<(), String> { - let status = tokio::process::Command::new("docker") - .args(["compose", "-f", compose_file, "pull"]) - .env("GHCR_ORG", ghcr_org) - .env("CSF_VERSION", version) - .status() - .await - .map_err(|e| format!("docker compose pull failed: {}", e))?; - - if !status.success() { - return Err(format!("docker compose pull exited with {}", status)); - } - Ok(()) -} - -async fn restart_services(compose_file: &str, ghcr_org: &str, version: &str) -> Result<(), String> { - let status = tokio::process::Command::new("docker") - .args(["compose", "-f", compose_file, "up", "-d"]) - .env("GHCR_ORG", ghcr_org) - .env("CSF_VERSION", version) - .status() - .await - .map_err(|e| format!("docker compose up failed: {}", e))?; - - if !status.success() { - return Err(format!("docker compose up exited with {}", status)); - } - Ok(()) -} - -async fn write_result(result: &str) { - if let Ok(mut client) = etcd_client().await { - let _ = client.put(ETCD_UPDATE_RESULT_KEY, result.as_bytes(), None).await; - } +fn is_valid_sha(rev: &str) -> bool { + rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) } async fn pause_updates( @@ -241,36 +145,3 @@ async fn resume_updates( tracing::info!("updates resumed"); Ok(StatusCode::NO_CONTENT) } - -async fn set_ghcr_token( - _auth: CanManageSystem, - State(_state): State, - Json(req): Json, -) -> Result { - if req.token.is_empty() || req.username.is_empty() { - return Err(StatusCode::UNPROCESSABLE_ENTITY); - } - - let encryption_key = env::var("SECRET_ENCRYPTION_KEY").map_err(|_| { - tracing::error!("SECRET_ENCRYPTION_KEY not set"); - StatusCode::INTERNAL_SERVER_ERROR - })?; - - let payload = format!("{}:{}", req.username, req.token); - let encrypted = encrypt_secret(&payload, &encryption_key).map_err(|e| { - tracing::error!(error = %e, "failed to encrypt ghcr token"); - StatusCode::INTERNAL_SERVER_ERROR - })?; - - let mut client = etcd_client().await?; - client - .put(ETCD_GHCR_TOKEN_KEY, encrypted.as_bytes(), None) - .await - .map_err(|e| { - tracing::error!(error = %e, "failed to write ghcr token to etcd"); - StatusCode::INTERNAL_SERVER_ERROR - })?; - - tracing::info!(username = %req.username, "ghcr token updated"); - Ok(StatusCode::NO_CONTENT) -} diff --git a/control-plane/csf-updater/src/config.rs b/control-plane/csf-updater/src/config.rs index 2b18015..97e25e6 100644 --- a/control-plane/csf-updater/src/config.rs +++ b/control-plane/csf-updater/src/config.rs @@ -3,12 +3,8 @@ use std::env; pub struct Config { pub etcd_endpoints: Vec, - pub ghcr_org: String, - pub compose_file: String, pub poll_interval_secs: u64, - pub secret_encryption_key: String, - pub binary_dir: String, - pub github_release_base_url: String, + pub infra_repo_mirror_url: String, } impl Config { @@ -19,19 +15,12 @@ impl Config { .split(',') .map(|s| s.trim().to_string()) .collect(), - ghcr_org: env::var("GHCR_ORG").context("GHCR_ORG must be set")?, - compose_file: env::var("COMPOSE_FILE") - .unwrap_or_else(|_| "/etc/csf-core/docker-compose.yml".to_string()), poll_interval_secs: env::var("POLL_INTERVAL_SECS") .ok() .and_then(|v| v.parse().ok()) .unwrap_or(30), - secret_encryption_key: env::var("SECRET_ENCRYPTION_KEY") - .context("SECRET_ENCRYPTION_KEY must be set")?, - binary_dir: env::var("BINARY_DIR") - .unwrap_or_else(|_| "/usr/local/bin".to_string()), - github_release_base_url: env::var("GITHUB_RELEASE_BASE_URL") - .unwrap_or_else(|_| "https://github.com/csfx-cloud/CSF-Core/releases/download".to_string()), + infra_repo_mirror_url: env::var("INFRA_REPO_MIRROR_URL") + .context("INFRA_REPO_MIRROR_URL must be set")?, }) } } diff --git a/control-plane/csf-updater/src/etcd.rs b/control-plane/csf-updater/src/etcd.rs index fff8468..6e0ac3f 100644 --- a/control-plane/csf-updater/src/etcd.rs +++ b/control-plane/csf-updater/src/etcd.rs @@ -2,9 +2,9 @@ use anyhow::Result; use crate::config::Config; -pub const DESIRED_VERSION_KEY: &str = "/csf/config/desired_cp_version"; -pub const RESULT_KEY: &str = "/csf/config/last_update_result"; -pub const GHCR_TOKEN_KEY: &str = "/csf/config/ghcr_token"; +pub const DESIRED_FLAKE_REV_KEY: &str = "/csf/config/desired_flake_rev"; +pub const BUILD_STATUS_KEY: &str = "/csf/config/cp_build_status"; +pub const RESULT_KEY: &str = "/csf/config/last_build_result"; pub const PAUSED_KEY: &str = "/csf/config/update_paused"; pub struct Client { diff --git a/control-plane/csf-updater/src/main.rs b/control-plane/csf-updater/src/main.rs index d474148..149d8cb 100644 --- a/control-plane/csf-updater/src/main.rs +++ b/control-plane/csf-updater/src/main.rs @@ -1,8 +1,6 @@ mod config; mod etcd; -mod secret; mod updater; -mod verify; use std::time::Duration; use tracing::info; @@ -44,7 +42,7 @@ async fn run_once(cfg: &config::Config, last_applied: &str) -> anyhow::Result v, None => return Ok(None), }; @@ -53,35 +51,31 @@ async fn run_once(cfg: &config::Config, last_applied: &str) -> anyhow::Result { + etcd.put(etcd::BUILD_STATUS_KEY, "ready").await?; etcd.put(etcd::RESULT_KEY, "success").await?; - info!(version = %desired, "update complete"); + info!(flake_rev = %desired, "update complete"); Ok(Some(desired)) } Err(e) => { - tracing::error!(error = %e, version = %desired, "update failed"); + tracing::error!(error = %e, flake_rev = %desired, "update failed"); + etcd.put(etcd::BUILD_STATUS_KEY, "failed").await?; etcd.put(etcd::RESULT_KEY, "failed").await?; Ok(Some(desired)) } } } -fn is_valid_version(v: &str) -> bool { - let v = v.trim_start_matches('v'); - let (base, _pre) = match v.split_once('-') { - Some((b, p)) => (b, Some(p)), - None => (v, None), - }; - let parts: Vec<&str> = base.split('.').collect(); - parts.len() == 3 && parts.iter().all(|p| p.parse::().is_ok()) +fn is_valid_sha(rev: &str) -> bool { + rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) } diff --git a/control-plane/csf-updater/src/secret.rs b/control-plane/csf-updater/src/secret.rs deleted file mode 100644 index 5d51b60..0000000 --- a/control-plane/csf-updater/src/secret.rs +++ /dev/null @@ -1,26 +0,0 @@ -use aes_gcm::{aead::{Aead, KeyInit}, Aes256Gcm, Nonce}; -use anyhow::{bail, Result}; -use base64::Engine; - -pub fn decrypt_secret(encoded: &str, key_b64: &str) -> Result { - let key_bytes = base64::engine::general_purpose::STANDARD.decode(key_b64)?; - if key_bytes.len() != 32 { - bail!("invalid encryption key length"); - } - - let combined = base64::engine::general_purpose::STANDARD.decode(encoded)?; - if combined.len() < 12 { - bail!("invalid ciphertext"); - } - - let (nonce_bytes, ciphertext) = combined.split_at(12); - let cipher = Aes256Gcm::new_from_slice(&key_bytes) - .map_err(|e| anyhow::anyhow!("cipher init failed: {}", e))?; - let nonce = Nonce::from_slice(nonce_bytes); - - let plaintext = cipher - .decrypt(nonce, ciphertext) - .map_err(|e| anyhow::anyhow!("decryption failed: {}", e))?; - - Ok(String::from_utf8(plaintext)?) -} diff --git a/control-plane/csf-updater/src/updater.rs b/control-plane/csf-updater/src/updater.rs index 71e451a..e22c524 100644 --- a/control-plane/csf-updater/src/updater.rs +++ b/control-plane/csf-updater/src/updater.rs @@ -1,194 +1,52 @@ use anyhow::{bail, Result}; -use sha2::{Digest, Sha256}; -use std::process::Stdio; -use tokio::process::Command; use tracing::info; use crate::config::Config; use crate::etcd; -use crate::secret::decrypt_secret; -use crate::verify; -pub async fn run(cfg: &Config, version: &str, etcd: &mut etcd::Client) -> Result<()> { - let (docker_config_dir, ghcr_auth) = setup_docker_auth(cfg, etcd).await?; - pull(cfg, version, docker_config_dir.as_deref()).await?; - verify::verify_images(cfg, version, ghcr_auth.as_deref()).await?; - up(cfg, version, docker_config_dir.as_deref()).await?; - health_check(cfg, version).await?; - update_agent_binary(cfg, version).await?; - update_self_binary(cfg, version).await +pub async fn run(cfg: &Config, flake_rev: &str, _etcd: &mut etcd::Client) -> Result<()> { + nix_build(cfg, flake_rev).await?; + nix_switch(cfg, flake_rev).await } -async fn setup_docker_auth(cfg: &Config, etcd: &mut etcd::Client) -> Result<(Option, Option)> { - let encrypted = match etcd.get(etcd::GHCR_TOKEN_KEY).await? { - Some(v) => v, - None => return Ok((None, None)), - }; +async fn nix_build(cfg: &Config, flake_rev: &str) -> Result<()> { + info!(flake_rev = %flake_rev, "running nix build"); - let payload = decrypt_secret(&encrypted, &cfg.secret_encryption_key)?; - let (username, token) = payload - .split_once(':') - .ok_or_else(|| anyhow::anyhow!("invalid ghcr token payload"))?; - - let dir = tempfile::tempdir()?; - let config_path = dir.path().join("config.json"); - - let auth_raw = format!("{}:{}", username, token); - let auth_b64 = base64::Engine::encode( - &base64::engine::general_purpose::STANDARD, - auth_raw.as_bytes(), + let flake_url = format!( + "git+http://{}?rev={}", + cfg.infra_repo_mirror_url, flake_rev ); - let config = serde_json::json!({ - "auths": { - "ghcr.io": { - "auth": auth_b64 - } - } - }); - - tokio::fs::write(&config_path, serde_json::to_string(&config)?).await?; - let dir_path = dir.into_path().to_string_lossy().to_string(); - Ok((Some(dir_path), Some(auth_b64))) -} - -async fn pull(cfg: &Config, version: &str, docker_config_dir: Option<&str>) -> Result<()> { - info!(version = %version, "pulling images"); - compose(cfg, version, docker_config_dir, &["pull"]).await -} -async fn up(cfg: &Config, version: &str, docker_config_dir: Option<&str>) -> Result<()> { - info!(version = %version, "restarting services"); - compose(cfg, version, docker_config_dir, &["up", "-d", "--remove-orphans"]).await -} - -async fn health_check(cfg: &Config, version: &str) -> Result<()> { - info!("waiting for health checks"); - tokio::time::sleep(std::time::Duration::from_secs(15)).await; - - let output = Command::new("docker") - .args(["compose", "-f", &cfg.compose_file, "ps", "--format", "json"]) - .env("GHCR_ORG", &cfg.ghcr_org) - .env("CSF_VERSION", version) - .output() + let status = tokio::process::Command::new("nixos-rebuild") + .args(["build", "--flake", &flake_url]) + .status() .await?; - let stdout = String::from_utf8_lossy(&output.stdout); - for line in stdout.lines() { - if let Ok(svc) = serde_json::from_str::(line) { - if svc["Health"].as_str() == Some("unhealthy") { - bail!("service {} is unhealthy after update", svc["Name"].as_str().unwrap_or("unknown")); - } - } + if !status.success() { + bail!("nix build failed for rev {}", flake_rev); } - info!("all services healthy"); + info!(flake_rev = %flake_rev, "nix build complete"); Ok(()) } -async fn update_agent_binary(cfg: &Config, version: &str) -> Result<()> { - info!(version = %version, "updating csf-agent binary"); - let arch = detect_arch(); - let url = format!( - "{}/v{}/csf-agent-{}", - cfg.github_release_base_url, version, arch - ); - let dest = format!("{}/csf-agent", cfg.binary_dir); - download_and_swap(&url, &dest).await?; - restart_unit("csf-daemon").await -} +async fn nix_switch(cfg: &Config, flake_rev: &str) -> Result<()> { + info!(flake_rev = %flake_rev, "running nixos-rebuild switch"); -async fn update_self_binary(cfg: &Config, version: &str) -> Result<()> { - info!(version = %version, "updating csf-updater binary"); - let arch = detect_arch(); - let url = format!( - "{}/v{}/csf-updater-{}", - cfg.github_release_base_url, version, arch + let flake_url = format!( + "git+http://{}?rev={}", + cfg.infra_repo_mirror_url, flake_rev ); - let dest = format!("{}/csf-updater", cfg.binary_dir); - download_and_swap(&url, &dest).await?; - restart_unit("csf-updater").await -} - -async fn download_and_swap(url: &str, dest: &str) -> Result<()> { - let tmp = format!("{}.new", dest); - - let bytes = fetch(url).await?; - let expected = fetch_checksum(&format!("{}.sha256", url)).await?; - verify_checksum(&bytes, &expected)?; - - tokio::fs::write(&tmp, &bytes).await?; - - let mut perms = tokio::fs::metadata(&tmp).await?.permissions(); - std::os::unix::fs::PermissionsExt::set_mode(&mut perms, 0o750); - tokio::fs::set_permissions(&tmp, perms).await?; - - tokio::fs::rename(&tmp, dest).await?; - info!(dest = %dest, "binary swapped"); - Ok(()) -} -async fn fetch(url: &str) -> Result { - let resp = reqwest::get(url).await?; - if !resp.status().is_success() { - bail!("failed to download {}: {}", url, resp.status()); - } - Ok(resp.bytes().await?) -} - -async fn fetch_checksum(url: &str) -> Result { - let resp = reqwest::get(url).await?; - if !resp.status().is_success() { - bail!("failed to download checksum {}: {}", url, resp.status()); - } - let text = resp.text().await?; - text.split_whitespace() - .next() - .map(|s| s.to_string()) - .ok_or_else(|| anyhow::anyhow!("empty checksum file at {}", url)) -} - -fn verify_checksum(data: &[u8], expected: &str) -> Result<()> { - let digest = hex::encode(Sha256::digest(data)); - if digest != expected { - bail!("checksum mismatch: expected={} got={}", expected, digest); - } - info!("checksum verified"); - Ok(()) -} - -async fn restart_unit(unit: &str) -> Result<()> { - let status = Command::new("sudo") - .args(["systemctl", "restart", unit]) + let status = tokio::process::Command::new("nixos-rebuild") + .args(["switch", "--flake", &flake_url]) .status() .await?; - if !status.success() { - bail!("systemctl restart {} failed: {}", unit, status); - } - Ok(()) -} - -fn detect_arch() -> &'static str { - if cfg!(target_arch = "aarch64") { "arm64" } else { "amd64" } -} -async fn compose(cfg: &Config, version: &str, docker_config_dir: Option<&str>, args: &[&str]) -> Result<()> { - let mut cmd_args = vec!["compose", "-f", cfg.compose_file.as_str()]; - cmd_args.extend_from_slice(args); - - let mut cmd = Command::new("docker"); - cmd.args(&cmd_args) - .env("GHCR_ORG", &cfg.ghcr_org) - .env("CSF_VERSION", version) - .stdout(Stdio::inherit()) - .stderr(Stdio::inherit()); - - if let Some(dir) = docker_config_dir { - cmd.env("DOCKER_CONFIG", dir); - } - - let status = cmd.status().await?; if !status.success() { - bail!("docker compose {} failed: {}", args.join(" "), status); + bail!("nixos-rebuild switch failed for rev {}", flake_rev); } + + info!(flake_rev = %flake_rev, "nixos-rebuild switch complete"); Ok(()) } diff --git a/control-plane/csf-updater/src/verify.rs b/control-plane/csf-updater/src/verify.rs deleted file mode 100644 index 5fae4e2..0000000 --- a/control-plane/csf-updater/src/verify.rs +++ /dev/null @@ -1,112 +0,0 @@ -use anyhow::{bail, Result}; -use tracing::info; - -use crate::config::Config; - -const SERVICES: &[&str] = &[ - "api-gateway", - "registry", - "scheduler", - "volume-manager", - "failover-controller", - "sdn-controller", -]; - -pub async fn verify_images(cfg: &Config, version: &str, ghcr_auth: Option<&str>) -> Result<()> { - let client = reqwest::Client::new(); - - for svc in SERVICES { - let image = format!("{}/csf-ce-{}", cfg.ghcr_org, svc); - let remote = remote_digest(&client, &image, version, ghcr_auth).await?; - let local = local_digest(&format!("ghcr.io/{}/csf-ce-{}:{}", cfg.ghcr_org, svc, version))?; - - if remote != local { - bail!( - "digest mismatch for {}: remote={} local={}", - svc, remote, local - ); - } - - info!(service = svc, digest = %remote, "image verified"); - } - - Ok(()) -} - -async fn exchange_token(client: &reqwest::Client, image: &str, basic_auth: &str) -> Result { - let url = format!( - "https://ghcr.io/token?scope=repository:{}:pull", - image - ); - let resp = client - .get(&url) - .header("Authorization", format!("Basic {}", basic_auth)) - .send() - .await?; - - if !resp.status().is_success() { - bail!("GHCR token exchange failed for {}: {}", image, resp.status()); - } - - let body: serde_json::Value = resp.json().await?; - body["token"] - .as_str() - .map(|s| s.to_string()) - .ok_or_else(|| anyhow::anyhow!("no token in GHCR token response for {}", image)) -} - -async fn remote_digest(client: &reqwest::Client, image: &str, tag: &str, ghcr_auth: Option<&str>) -> Result { - let bearer = match ghcr_auth { - Some(auth) => exchange_token(client, image, auth).await?, - None => bail!("no GHCR auth configured"), - }; - - let url = format!("https://ghcr.io/v2/{}/manifests/{}", image, tag); - let resp = client - .head(&url) - .header("Authorization", format!("Bearer {}", bearer)) - .header("Accept", "application/vnd.docker.distribution.manifest.v2+json") - .send() - .await?; - - if !resp.status().is_success() { - bail!("GHCR manifest request failed for {}: {}", image, resp.status()); - } - - resp.headers() - .get("docker-content-digest") - .and_then(|v| v.to_str().ok()) - .map(|s| s.to_string()) - .ok_or_else(|| anyhow::anyhow!("no docker-content-digest header for {}", image)) -} - -fn local_digest(image: &str) -> Result { - let pull = std::process::Command::new("docker") - .args(["pull", "--quiet", image]) - .output()?; - - if !pull.status.success() { - bail!( - "docker pull failed for {}: {}", - image, - String::from_utf8_lossy(&pull.stderr).trim() - ); - } - - let output = std::process::Command::new("docker") - .args(["image", "inspect", "--format", "{{json .RepoDigests}}", image]) - .output()?; - - if !output.status.success() { - bail!("docker inspect failed for {}", image); - } - - let raw = String::from_utf8(output.stdout)?; - let digests: Vec = serde_json::from_str(raw.trim()) - .map_err(|e| anyhow::anyhow!("failed to parse RepoDigests for {}: {}", image, e))?; - - digests - .into_iter() - .find_map(|d| d.split('@').nth(1).map(|s| s.to_string())) - .ok_or_else(|| anyhow::anyhow!("no repo digest found for {}", image)) -} diff --git a/docs/UPDATER_PLAN.md b/docs/UPDATER_PLAN.md new file mode 100644 index 0000000..4da90fa --- /dev/null +++ b/docs/UPDATER_PLAN.md @@ -0,0 +1,305 @@ +# CSF Updater — Architekturplan + +## Aktueller Stand (vollständig analysiert) + +### CI/CD Pipeline + +**GitHub Actions Workflows:** +- `release-please.yml`: Läuft auf `main` — erstellt automatisch GitHub Releases via Conventional Commits, bumped `Cargo.toml` workspace version, aktuell bei `0.2.2` +- `docker-build.yml`: Trigggert nach erfolgreichem Release-Please-Run **oder** `workflow_dispatch` **oder** `push` auf `develop` + - Matrix-Build: 6 Services × 2 Architekturen (amd64 + arm64) via native GitHub Runners (`ubuntu-latest` + `ubuntu-24.04-arm`) + - Build-Strategie: `push-by-digest` → separater `manifest`-Job erstellt Multi-Arch-Manifest + - Images landen auf `ghcr.io//csf-ce-:` + `:latest` + - Dockerfile: `control-plane/Dockerfile.prod.shared` mit `cargo-chef` für Layer-Caching + - `build-binaries`-Job: baut `csf-updater` und `csf-agent` als statische musl-Binaries (amd64 + arm64) + - `attach-binaries-release`-Job: uploaded Binaries + SHA256-Dateien zum GitHub Release +- `prerelease.yml`: Identischer Flow für `develop`-Branch → Pre-release mit `-alpha.` Tag +- `lint.yml`: `cargo clippy -D warnings` + `cargo fmt --check` + `cargo audit` auf PRs und `main` +- `renovate.yml`: automatische Dependency-Updates (vermutlich) + +**Dockerfile-Struktur (`Dockerfile.prod.shared`):** +- Stage 1 (`planner`): `cargo chef prepare` — generiert `recipe.json` +- Stage 2 (`builder`): `cargo chef cook` (Dependency-Cache) + `cargo build --profile docker-release --bin --bin csf-migrate` +- Stage 3 (`runtime`): `debian:bookworm-slim`, beide Binaries (`/app/service` + `/csf-migrate`) kopiert +- Build-Arg `CSF_BUILD_VERSION` wird an den Build übergeben (für `build.rs`) + +**`Dockerfile.csf-updater`:** +- Separates Dockerfile nur für `csf-updater`, exportiert Binary via `FROM scratch AS export` +- Wird nicht vom CI verwendet — CI baut `csf-updater` als musl-Binary direkt via `cargo build` +- Dieses Dockerfile ist totes Deployment-Artefakt, das nicht mehr zum CI-Flow passt + +### Runtime-Komponenten + +**`csf-updater` Binary** (`control-plane/csf-updater/`): +- Pollt etcd alle N Sekunden auf `/csf/config/desired_cp_version` +- Validiert Semver-Format, setzt `/csf/config/last_update_result` als Statusindikator +- Lädt GHCR-Token verschlüsselt aus etcd (AES-256-GCM via `secret.rs`) +- Führt `docker compose pull` → Digest-Verify → `docker compose up -d` aus +- Digest-Verify: GHCR Registry API (remote) vs. `docker image inspect` (lokal) — aber `local_digest()` macht intern nochmal `docker pull` +- Wartet 15s pauschal, prüft dann `docker compose ps` auf unhealthy Services +- Downloadet `csf-agent` und `csf-updater` Binaries von GitHub Releases, verifiziert SHA256, swappt atomar via `rename(2)` +- Startet Units via `sudo systemctl restart ` + +**Shell-Fallback** (`deployments/systemd/csf-updater.sh`): +- Identische Logik in Bash: etcd-Poll via curl + jq, docker-compose-Flow, Digest-Verify +- Kein Binary-Download, kein Self-Update +- Kein Health-Check nach up (nur `sleep 15` + `jq`-Filter) + +**Systemd-Unit** (`deployments/systemd/csf-updater.service`): +- `ExecStart` zeigt auf `csf-updater.sh` (Shell-Script), nicht auf das Rust-Binary +- Fehlende Env-Var: `SECRET_ENCRYPTION_KEY` (vom Rust-Binary required, im Shell-Script nicht gebraucht) +- `ETCD_ENDPOINT` (Singular) statt `ETCD_ENDPOINTS` (Liste, wie Config erwartet) +- Kein Hardening: kein `ProtectSystem`, kein `NoNewPrivileges`, kein `CapabilityBoundingSet` +- User `csf-updater` ist in Gruppe `docker` — kann alle Container auf dem Host steuern + +--- + +## Probleme und Schwachstellen + +### P1 — systemd-Unit startet Shell-Script statt Rust-Binary +`ExecStart=/opt/csf/csf-updater.sh` — das Rust-Binary wird gebaut, deployed, aber nie gestartet. +Das Secret-Handling (AES-256-GCM), das persistente etcd-RESULT_KEY-Schreiben und die SHA256-Verify laufen damit in Prod nie. Die Shell-Version hat keine Verschlüsselung und kein Binary-Download. + +### P2 — sudo ohne sudoers-Regel bricht in Prod +`restart_unit()` ruft `sudo systemctl restart ` auf. Der User `csf-updater` hat keine sudoers-Regel — jeder Update-Cycle schlägt beim systemctl-Call fehl, ohne Rollback. + +### P3 — Kein Rollback +Wenn `health_check()` einen unhealthy Service meldet, wird `RESULT_KEY` auf `failed` gesetzt und der Cycle endet. Die Services laufen weiterhin mit dem neuen (kaputten) Image. Kein `docker compose up -d` mit dem vorherigen Tag. + +### P4 — Self-Update-Race +`update_self_binary()` downloaded das neue Binary und macht `systemctl restart csf-updater`. Der eigene Prozess wird gekillt bevor er `RESULT_KEY = success` schreiben kann — jeder Self-Update-Cycle hinterlässt `in_progress` in etcd. + +### P5 — `last_applied` nur im RAM +Nach Crash oder Restart versucht der Updater sofort wieder dieselbe Version zu applyen. Bei einem kaputten Setup → endloser Retry-Loop. + +### P6 — 15s Sleep ist nicht deterministisch +`health_check()` wartet pauschal 15 Sekunden. Bei großen Images oder langsamen Nodes reicht das nicht. Bei schnellen Nodes ist es Verschwendung. + +### P7 — Kein Distributed Lock +Wenn zwei Master-Nodes gleichzeitig denselben `desired_cp_version`-Key sehen, laufen beide gleichzeitig `docker compose up -d`. Kein Lock in etcd. + +### P8 — Reines Polling, keine etcd-Watches +Der Updater reconnected zu etcd jede Poll-Iteration und macht ein synchrones GET. Ein etcd-Watch wäre reaktiver und ressourcenschonender. + +### P9 — `local_digest()` macht internen zweiten `docker pull` +In `verify_images()` wird `docker pull --quiet` in `local_digest()` aufgerufen — obwohl `pull()` das Image bereits wenige Sekunden vorher gezogen hat. Verdoppelt die Download-Zeit. + +### P10 — Agent-Binary-Update inkompatibel mit NixOS +`update_agent_binary()` schreibt nach `/usr/local/bin/csf-agent` und startet `csf-daemon` neu. Auf NixOS überlebt das Binary keinen `nixos-rebuild switch` — die systemd-Unit zeigt auf einen Nix-Store-Pfad, nicht auf `/usr/local/bin`. Der Ansatz funktioniert nur auf nicht-NixOS-Systemen. + +### P11 — `Dockerfile.csf-updater` ist orphaned +Das separate Dockerfile baut `csf-updater` als statisches Binary, exportiert es via `FROM scratch`. Der CI-Flow (`docker-build.yml`) nutzt es nicht — er baut `csf-updater` direkt via `cargo build --target musl`. Das Dockerfile ist toter Code und führt zu Verwirrung bei der Frage welcher Build-Pfad der kanonische ist. + +### P12 — `update-versions.sh` referenziert `backend/Cargo.toml` das nicht existiert +Das Script in `.github/scripts/update-versions.sh` patcht `backend/Cargo.toml`. Das Projekt heißt aber `CSF-Core` mit `Cargo.toml` im Root als Workspace. `backend/` existiert nicht. Das Script ist toter Code aus einem früheren Projekt-Layout. + +### P13 — `csf-updater` im selben `Dockerfile.prod.shared` wie Services +Der `build`-Job in `docker-build.yml` baut alle 6 Services mit `Dockerfile.prod.shared`. `csf-updater` hat ein eigenes `Dockerfile.csf-updater`. Der `build-binaries`-Job baut `csf-updater` als musl-Binary. Drei verschiedene Build-Pfade für dasselbe Binary — unklar welcher kanonisch ist. + +--- + +## Zielarchitektur + +### Schicht 1 — Control Plane Updates (Docker-basiert) + +``` +GitHub Release v1.2.3 + → CI baut Images + musl-Binaries + → Images auf ghcr.io//csf-ce-:1.2.3 + → Binaries als Release-Assets (csf-agent-amd64, csf-updater-amd64 etc.) + → Admin setzt etcd: /csf/config/desired_cp_version = "1.2.3" + +etcd-Watch (kein Poll) triggert csf-updater: + 1. acquire_lock (etcd Lease, 60s TTL) — verhindert parallele Updates + 2. pull images (alle 6 Services parallel via goroutines/tasks) + 3. verify digests (remote GHCR API vs lokaler docker inspect, KEIN zweiter pull) + 4. docker compose up -d --remove-orphans + 5. wait_healthy (Retry-Loop, 5s Interval, konfigurierbarer Timeout) + → bei timeout: docker compose up -d mit PREV_VERSION (Rollback) + 6. release_lock + 7. put applied_cp_version = version, put last_update_result = success + +bei Fehler in Schritt 4/5: + 8. docker compose up -d mit applied_cp_version (Rollback) + 9. put last_update_result = rolled_back +``` + +**etcd-Keys:** +``` +/csf/config/desired_cp_version → Zielversion (Admin schreibt diesen Key) +/csf/config/applied_cp_version → zuletzt erfolgreich gerollte Version (persistentes last_applied) +/csf/config/last_update_result → in_progress | success | failed | rolled_back +/csf/config/update_paused → true/false (bereits implementiert) +/csf/config/update_lock → Distributed Lock (etcd Lease) +/csf/config/ghcr_token → AES-256-GCM verschlüsseltes Token (bereits implementiert) +/csf/config/desired_agent_version → Zielversion für csf-agent (Registry liest, Heartbeat trägt aus) +``` + +### Schicht 2 — Agent-Updates + +**NixOS-Nodes (Primärpfad):** +``` +Registry liest desired_agent_version aus etcd + → Heartbeat-Response: { desired_version: "1.2.3" } + → Agent vergleicht mit env!("CARGO_PKG_VERSION") aus build.rs + → wenn neuer: schreibe /var/lib/csf-daemon/desired_version + → triggere systemctl start csf-agent-update.service (PolicyKit-Regel) + → Oneshot-Unit führt nixos-rebuild switch aus + → systemd startet csf-daemon nach rebuild neu (neues Binary aus Nix-Store) +``` + +**Nicht-NixOS-Fallback:** +``` +Agent: + 1. Download Binary in tmpfile (/var/lib/csf-daemon/csf-agent.new) + 2. verifiziere SHA256 gegen Release-Asset + 3. chmod 0o750 + 4. rename(2) → atomarer swap nach /var/lib/csf-daemon/csf-agent + 5. exec() sich selbst (in-place restart, kein PID-Wechsel) + bei exec()-Fehler: systemctl restart csf-daemon via D-Bus (kein sudo) +``` + +Der `csf-updater` ist nicht zuständig für Agent-Updates. Er schreibt nur `/csf/config/desired_agent_version`. Die Verteilung läuft ausschließlich über den Heartbeat-Mechanismus. + +### Schicht 3 — Self-Update des Updaters + +Empfehlung: `csf-updater` Self-Update entfernen. + +Begründung: `csf-updater` ist kein Service der laufend upgedatet werden muss. Er wird beim Aufsetzen eines neuen Nodes deployed (via NixOS-Modul oder Ansible). Neue Versionen des Updaters kommen mit dem nächsten Node-Provisioning. Der Self-Update-Race (P4) entfällt komplett. + +Falls Self-Update doch gewünscht: `success` + `applied_cp_version` in etcd schreiben, **dann** Binary tauschen + Unit neustarten. Die neue Instanz liest `applied_cp_version` beim Start und überspringt die Version. + +--- + +## Konkrete Änderungen (priorisiert) + +### 1 — systemd-Unit auf Rust-Binary umstellen [blocking] +`ExecStart` von `csf-updater.sh` auf `/usr/local/bin/csf-updater` ändern. +`ETCD_ENDPOINT` → `ETCD_ENDPOINTS` (kommaseparierte Liste). +`SECRET_ENCRYPTION_KEY` als Env-Var ergänzen (aus `/opt/csf/.env`). + +### 2 — Persistentes `applied_version` in etcd [blocking] +Beim Start: `etcd.get(APPLIED_VERSION_KEY)` als initialen `last_applied`. +`APPLIED_VERSION_KEY` nach erfolgreichem Update schreiben. +Eliminiert idempotenten Retry-Loop nach Restart. + +### 3 — Rollback-Logik in `updater.rs` +Vor Update: `prev_version = etcd.get(APPLIED_VERSION_KEY)`. +Nach fehlgeschlagenem health_check: `compose(cfg, &prev, docker_config_dir, &["up", "-d"])`. +`RESULT_KEY = "rolled_back"`. + +### 4 — Health-Check: Retry-Loop statt pauschaler Sleep +```rust +let timeout = Duration::from_secs(cfg.health_check_timeout_secs); +let deadline = Instant::now() + timeout; +loop { + if all_healthy(cfg, version).await? { return Ok(()); } + if Instant::now() > deadline { bail!("health check timeout"); } + sleep(Duration::from_secs(5)).await; +} +``` +Neues Config-Feld: `health_check_timeout_secs` (Default: 120). + +### 5 — `local_digest()`: internen Pull entfernen +`local_digest()` soll nur `docker image inspect` aufrufen. Der Pull ist bereits in `pull()` passiert. +Wenn `inspect` fehlschlägt → bail, nicht erneut pullen. + +### 6 — Distributed Lock in `etcd.rs` +```rust +pub async fn acquire_lock(&mut self, ttl_secs: i64) -> Result // returns lease_id +pub async fn release_lock(&mut self, lease_id: i64) -> Result<()> +``` +`acquire_lock` nutzt `etcd_client::Client::lease_grant` + `put` mit `LeaseId` auf `LOCK_KEY`. +Vor jedem Update-Cycle: lock acquiren. Bei Fehler (Lock bereits gehalten): `info!` + skip (kein Fehler). + +### 7 — etcd-Watch in `main.rs` +etcd-Client hält eine persistente Verbindung, `watch()` auf `DESIRED_VERSION_KEY`. +Fallback-Poll alle 5 Minuten (Watch kann bei Netzwerkproblemen abreißen). +Eliminiert das unnötige Reconnect bei jedem Poll-Cycle. + +### 8 — sudoers-Datei oder D-Bus-Restart +Einfachste Lösung: `/etc/sudoers.d/90-csf-updater`: +``` +csf-updater ALL=(root) NOPASSWD: /usr/bin/systemctl restart csf-daemon +``` +Dieses File muss Teil des NixOS-Moduls / Deployment-Skripts sein. +Mittelfristig: `zbus`-Crate für D-Bus-nativen systemd-Unit-Restart ohne sudo. + +### 9 — Self-Update aus `updater.rs` entfernen +`update_agent_binary()` und `update_self_binary()` aus `updater::run()` entfernen. +Agent-Updates laufen via Heartbeat-Response (Schicht 2). +Updater-Updates laufen via Node-Provisioning. + +### 10 — `Dockerfile.csf-updater` entfernen +Totes Artefakt — CI nutzt es nicht. Verursacht Verwirrung über den kanonischen Build-Pfad. +Kanonisch ist `build-binaries`-Job in `docker-build.yml` (musl, statisches Binary). + +### 11 — `update-versions.sh` fixen oder entfernen +Script referenziert `backend/Cargo.toml` (existiert nicht). Versioning läuft über `release-please` + `Cargo.toml` workspace. Script ist funktionslos, sollte entfernt werden. + +### 12 — NixOS-Modul: `csf-agent-update.service` Oneshot-Unit +```nix +systemd.services.csf-agent-update = { + description = "CSF Agent NixOS Update"; + serviceConfig = { + Type = "oneshot"; + ExecStart = "${pkgs.nixos-rebuild}/bin/nixos-rebuild switch"; + User = "root"; + }; +}; +security.polkit.extraConfig = '' + polkit.addRule(function(action, subject) { + if (action.id === "org.freedesktop.systemd1.manage-units" && + action.lookup("unit") === "csf-agent-update.service" && + subject.user === "csf-daemon") { + return polkit.Result.YES; + } + }); +''; +``` + +--- + +## Was nicht geändert werden soll + +- AES-256-GCM Secret-Handling (`secret.rs`) ist korrekt. +- Semver-Validierung in `main.rs` ist ausreichend. +- GHCR-Token-Exchange-Logik in `verify.rs` ist korrekt. +- `docker compose up -d --remove-orphans` ist der richtige Rolling-Restart-Mechanismus. +- Multi-Arch-Matrix-Build-Strategie (digest-first + manifest) in CI ist korrekt. +- `cargo-chef`-Layer-Caching in `Dockerfile.prod.shared` ist korrekt. +- `release-please` + Conventional Commits als Release-Trigger ist korrekt. +- SHA256-Verify + atomares `rename(2)` beim Binary-Swap ist korrekt. + +--- + +## Deployment-Checkliste + +``` +[ ] systemd-Unit auf Rust-Binary umgestellt (ExecStart, ETCD_ENDPOINTS, SECRET_ENCRYPTION_KEY) +[ ] applied_cp_version Key beim Start geladen (persistentes last_applied) +[ ] applied_cp_version nach erfolgreichem Update in etcd geschrieben +[ ] Rollback-Logik in updater.rs (compose up mit prev_version bei health-check-Fehler) +[ ] Health-Check: Retry-Loop mit konfigurierbarem Timeout statt pauschalen 15s +[ ] local_digest() ohne internen docker pull Aufruf +[ ] Distributed Lock (acquire/release) in etcd.rs +[ ] etcd-Watch in main.rs (mit Fallback-Poll) +[ ] sudoers-Datei im Deployment oder D-Bus-basierter Restart +[ ] Self-Update (update_agent_binary, update_self_binary) aus updater::run() entfernt +[ ] Dockerfile.csf-updater entfernt +[ ] update-versions.sh entfernt oder auf Workspace-Cargo.toml korrigiert +[ ] desired_agent_version in etcd schreiben (Admin-API oder Registry-Seite) +[ ] HeartbeatResponse: desired_version Feld ergänzen (Registry + Agent) +[ ] Agent: Version-Check + Update-Trigger (NixOS-Pfad + Fallback) +[ ] NixOS-Modul: csf-agent-update.service Oneshot-Unit + PolicyKit-Regel +[ ] systemd-Unit Hardening (NoNewPrivileges, ProtectSystem, CapabilityBoundingSet) +``` + +--- + +## Nicht in Scope (bewusst ausgeschlossen) + +- Watchtower: Dev-only, kein Digest-Verify, kein Rollback — nicht Prod-fähig +- Kubernetes-style Rolling Updates pro Replica: nicht relevant, Docker-Compose-Instanz pro Node +- Automatische Datenbankmigrationen im Updater: `csf-migrate` Init-Container ist korrekt und bleibt getrennt +- Separate Version-Tracks pro Service: alle Services laufen auf derselben Workspace-Version From d08736eac471364bdbd3893d6004fa9d96c1bf8f Mon Sep 17 00:00:00 2001 From: CodeMaster4711 Date: Mon, 6 Apr 2026 19:54:18 +0200 Subject: [PATCH 06/16] feat: implement gitops poller, git mirror, and nix build pipeline in csf-updater --- Cargo.lock | 6 - .../api-gateway/src/routes/update.rs | 4 + control-plane/csf-updater/Cargo.toml | 6 - control-plane/csf-updater/src/config.rs | 11 +- control-plane/csf-updater/src/etcd.rs | 1 + control-plane/csf-updater/src/git_mirror.rs | 54 +++++++++ control-plane/csf-updater/src/main.rs | 107 +++++++++++++++--- control-plane/csf-updater/src/nix_build.rs | 36 ++++++ control-plane/csf-updater/src/poller.rs | 55 +++++++++ control-plane/csf-updater/src/updater.rs | 36 +----- 10 files changed, 259 insertions(+), 57 deletions(-) create mode 100644 control-plane/csf-updater/src/git_mirror.rs create mode 100644 control-plane/csf-updater/src/nix_build.rs create mode 100644 control-plane/csf-updater/src/poller.rs diff --git a/Cargo.lock b/Cargo.lock index dec81df..7901781 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1133,18 +1133,12 @@ dependencies = [ name = "csf-updater" version = "0.2.2" dependencies = [ - "aes-gcm", "anyhow", - "base64 0.22.1", - "bytes", "dotenvy", "etcd-client", - "hex", "reqwest 0.11.27", "serde", "serde_json", - "sha2", - "tempfile", "tokio", "tracing", "tracing-subscriber", diff --git a/control-plane/api-gateway/src/routes/update.rs b/control-plane/api-gateway/src/routes/update.rs index 274d357..8a75570 100644 --- a/control-plane/api-gateway/src/routes/update.rs +++ b/control-plane/api-gateway/src/routes/update.rs @@ -6,6 +6,7 @@ use std::env; use crate::auth::rbac::CanManageSystem; use crate::AppState; +const ETCD_AVAILABLE_FLAKE_REV_KEY: &str = "/csf/config/available_flake_rev"; const ETCD_DESIRED_FLAKE_REV_KEY: &str = "/csf/config/desired_flake_rev"; const ETCD_BUILD_STATUS_KEY: &str = "/csf/config/cp_build_status"; const ETCD_RESULT_KEY: &str = "/csf/config/last_build_result"; @@ -25,6 +26,7 @@ pub struct UpdateResponse { #[derive(Debug, Serialize)] pub struct UpdateStatusResponse { pub current_version: String, + pub available_flake_rev: Option, pub desired_flake_rev: Option, pub build_status: Option, pub last_result: Option, @@ -83,6 +85,7 @@ async fn update_status( ) -> Result, StatusCode> { let mut client = etcd_client().await?; + let available_flake_rev = etcd_get(&mut client, ETCD_AVAILABLE_FLAKE_REV_KEY).await?; let desired_flake_rev = etcd_get(&mut client, ETCD_DESIRED_FLAKE_REV_KEY).await?; let build_status = etcd_get(&mut client, ETCD_BUILD_STATUS_KEY).await?; let last_result = etcd_get(&mut client, ETCD_RESULT_KEY).await?; @@ -90,6 +93,7 @@ async fn update_status( Ok(Json(UpdateStatusResponse { current_version: env!("CARGO_PKG_VERSION").to_string(), + available_flake_rev, desired_flake_rev, build_status, last_result, diff --git a/control-plane/csf-updater/Cargo.toml b/control-plane/csf-updater/Cargo.toml index 2f63777..39a3b9c 100644 --- a/control-plane/csf-updater/Cargo.toml +++ b/control-plane/csf-updater/Cargo.toml @@ -19,9 +19,3 @@ etcd-client = { workspace = true } reqwest = { version = "0.11", features = ["json", "rustls-tls-webpki-roots"], default-features = false } serde = { workspace = true } serde_json = { workspace = true } -aes-gcm = { workspace = true } -base64 = { workspace = true } -sha2 = { workspace = true } -hex = "0.4" -bytes = "1" -tempfile = "3" diff --git a/control-plane/csf-updater/src/config.rs b/control-plane/csf-updater/src/config.rs index 97e25e6..577a476 100644 --- a/control-plane/csf-updater/src/config.rs +++ b/control-plane/csf-updater/src/config.rs @@ -4,7 +4,10 @@ use std::env; pub struct Config { pub etcd_endpoints: Vec, pub poll_interval_secs: u64, + pub infra_repo_mirror_dir: String, pub infra_repo_mirror_url: String, + pub infra_repo_github: String, + pub infra_repo_branch: String, } impl Config { @@ -18,9 +21,15 @@ impl Config { poll_interval_secs: env::var("POLL_INTERVAL_SECS") .ok() .and_then(|v| v.parse().ok()) - .unwrap_or(30), + .unwrap_or(120), + infra_repo_mirror_dir: env::var("INFRA_REPO_MIRROR_DIR") + .unwrap_or_else(|_| "/var/lib/csf-updater/infra.git".to_string()), infra_repo_mirror_url: env::var("INFRA_REPO_MIRROR_URL") .context("INFRA_REPO_MIRROR_URL must be set")?, + infra_repo_github: env::var("INFRA_REPO_GITHUB") + .context("INFRA_REPO_GITHUB must be set (e.g. csfx-cloud/CSFX-Infra)")?, + infra_repo_branch: env::var("INFRA_REPO_BRANCH") + .unwrap_or_else(|_| "main".to_string()), }) } } diff --git a/control-plane/csf-updater/src/etcd.rs b/control-plane/csf-updater/src/etcd.rs index 6e0ac3f..4ee8246 100644 --- a/control-plane/csf-updater/src/etcd.rs +++ b/control-plane/csf-updater/src/etcd.rs @@ -2,6 +2,7 @@ use anyhow::Result; use crate::config::Config; +pub const AVAILABLE_FLAKE_REV_KEY: &str = "/csf/config/available_flake_rev"; pub const DESIRED_FLAKE_REV_KEY: &str = "/csf/config/desired_flake_rev"; pub const BUILD_STATUS_KEY: &str = "/csf/config/cp_build_status"; pub const RESULT_KEY: &str = "/csf/config/last_build_result"; diff --git a/control-plane/csf-updater/src/git_mirror.rs b/control-plane/csf-updater/src/git_mirror.rs new file mode 100644 index 0000000..9153205 --- /dev/null +++ b/control-plane/csf-updater/src/git_mirror.rs @@ -0,0 +1,54 @@ +use anyhow::{bail, Result}; +use std::path::Path; +use tokio::process::Command; +use tracing::info; + +pub async fn sync(mirror_dir: &str, remote_url: &str) -> Result<()> { + if Path::new(mirror_dir).join("HEAD").exists() { + fetch(mirror_dir).await + } else { + clone(mirror_dir, remote_url).await + } +} + +async fn clone(mirror_dir: &str, remote_url: &str) -> Result<()> { + info!(mirror_dir = %mirror_dir, remote_url = %remote_url, "cloning infra repo mirror"); + + let status = Command::new("git") + .args(["clone", "--mirror", remote_url, mirror_dir]) + .status() + .await?; + + if !status.success() { + bail!("git clone --mirror failed for {}", remote_url); + } + + info!(mirror_dir = %mirror_dir, "mirror clone complete"); + Ok(()) +} + +async fn fetch(mirror_dir: &str) -> Result<()> { + info!(mirror_dir = %mirror_dir, "fetching infra repo mirror"); + + let status = Command::new("git") + .args(["--git-dir", mirror_dir, "fetch", "--prune"]) + .status() + .await?; + + if !status.success() { + bail!("git fetch --prune failed in {}", mirror_dir); + } + + info!(mirror_dir = %mirror_dir, "mirror fetch complete"); + Ok(()) +} + +pub async fn rev_exists(mirror_dir: &str, rev: &str) -> Result { + let output = Command::new("git") + .args(["--git-dir", mirror_dir, "cat-file", "-t", rev]) + .output() + .await?; + + Ok(output.status.success() + && String::from_utf8_lossy(&output.stdout).trim() == "commit") +} diff --git a/control-plane/csf-updater/src/main.rs b/control-plane/csf-updater/src/main.rs index 149d8cb..1c00315 100644 --- a/control-plane/csf-updater/src/main.rs +++ b/control-plane/csf-updater/src/main.rs @@ -1,8 +1,12 @@ mod config; mod etcd; +mod git_mirror; +mod nix_build; +mod poller; mod updater; use std::time::Duration; +use tokio::sync::watch; use tracing::info; #[tokio::main] @@ -14,31 +18,85 @@ async fn main() -> anyhow::Result<()> { .init(); let cfg = config::Config::from_env()?; - let poll_interval = Duration::from_secs(cfg.poll_interval_secs); - info!(poll_interval_secs = cfg.poll_interval_secs, "csf-updater started"); + info!( + poll_interval_secs = cfg.poll_interval_secs, + infra_repo_github = %cfg.infra_repo_github, + "csf-updater started" + ); - let mut last_applied = String::new(); + let cfg = std::sync::Arc::new(cfg); + let cfg_poller = cfg.clone(); + let cfg_executor = cfg.clone(); + + let poller_task = tokio::spawn(async move { + run_poller_loop(&cfg_poller).await; + }); + + let executor_task = tokio::spawn(async move { + run_executor_loop(&cfg_executor).await; + }); + + tokio::select! { + _ = poller_task => tracing::error!("poller task exited unexpectedly"), + _ = executor_task => tracing::error!("executor task exited unexpectedly"), + } + + Ok(()) +} + +async fn run_poller_loop(cfg: &config::Config) { + let mut last_etag: Option = None; + let interval = Duration::from_secs(cfg.poll_interval_secs); loop { - match run_once(&cfg, &last_applied).await { - Ok(Some(version)) => { - last_applied = version; + match git_mirror::sync(&cfg.infra_repo_mirror_dir, &cfg.infra_repo_mirror_url).await { + Ok(()) => {} + Err(e) => { + tracing::error!(error = %e, "git mirror sync failed"); + tokio::time::sleep(interval).await; + continue; } - Ok(None) => {} + } + + let mut etcd = match etcd::Client::connect(cfg).await { + Ok(c) => c, Err(e) => { - tracing::error!(error = %e, "update cycle error"); + tracing::error!(error = %e, "etcd connect failed in poller"); + tokio::time::sleep(interval).await; + continue; } + }; + + match poller::poll_and_update(cfg, &mut etcd, &mut last_etag).await { + Ok(Some(sha)) => info!(sha = %sha, "available_flake_rev updated"), + Ok(None) => {} + Err(e) => tracing::error!(error = %e, "poll failed"), + } + + tokio::time::sleep(interval).await; + } +} + +async fn run_executor_loop(cfg: &config::Config) { + let mut last_applied = String::new(); + let interval = Duration::from_secs(10); + + loop { + tokio::time::sleep(interval).await; + + match execute_once(cfg, &last_applied).await { + Ok(Some(rev)) => last_applied = rev, + Ok(None) => {} + Err(e) => tracing::error!(error = %e, "executor cycle failed"), } - tokio::time::sleep(poll_interval).await; } } -async fn run_once(cfg: &config::Config, last_applied: &str) -> anyhow::Result> { +async fn execute_once(cfg: &config::Config, last_applied: &str) -> anyhow::Result> { let mut etcd = etcd::Client::connect(cfg).await?; if etcd.get(etcd::PAUSED_KEY).await?.as_deref() == Some("true") { - tracing::info!("updates paused, skipping"); return Ok(None); } @@ -57,10 +115,33 @@ async fn run_once(cfg: &config::Config, last_applied: &str) -> anyhow::Result {} + Err(e) => { + tracing::error!(error = %e, flake_rev = %desired, "nix build failed"); + etcd.put(etcd::BUILD_STATUS_KEY, "failed").await?; + etcd.put(etcd::RESULT_KEY, "failed").await?; + return Ok(Some(desired)); + } + } + + match updater::switch(cfg, &desired).await { Ok(()) => { etcd.put(etcd::BUILD_STATUS_KEY, "ready").await?; etcd.put(etcd::RESULT_KEY, "success").await?; @@ -68,7 +149,7 @@ async fn run_once(cfg: &config::Config, last_applied: &str) -> anyhow::Result { - tracing::error!(error = %e, flake_rev = %desired, "update failed"); + tracing::error!(error = %e, flake_rev = %desired, "nixos-rebuild switch failed"); etcd.put(etcd::BUILD_STATUS_KEY, "failed").await?; etcd.put(etcd::RESULT_KEY, "failed").await?; Ok(Some(desired)) diff --git a/control-plane/csf-updater/src/nix_build.rs b/control-plane/csf-updater/src/nix_build.rs new file mode 100644 index 0000000..bed7e34 --- /dev/null +++ b/control-plane/csf-updater/src/nix_build.rs @@ -0,0 +1,36 @@ +use anyhow::{bail, Result}; +use tokio::process::Command; +use tokio::sync::watch; +use tracing::info; + +pub async fn build(mirror_dir: &str, rev: &str, mut cancel: watch::Receiver) -> Result<()> { + let flake_url = format!("git+file://{}?rev={}", mirror_dir, rev); + + info!(flake_rev = %rev, "starting nix build"); + + let mut child = Command::new("nixos-rebuild") + .args(["build", "--flake", &flake_url]) + .spawn()?; + + tokio::select! { + result = child.wait() => { + let status = result?; + if !status.success() { + bail!("nix build failed for rev {}", rev); + } + info!(flake_rev = %rev, "nix build complete"); + Ok(()) + } + _ = cancel.changed() => { + if *cancel.borrow() { + let _ = child.kill().await; + bail!("nix build cancelled for rev {}", rev); + } + let status = child.wait().await?; + if !status.success() { + bail!("nix build failed for rev {}", rev); + } + Ok(()) + } + } +} diff --git a/control-plane/csf-updater/src/poller.rs b/control-plane/csf-updater/src/poller.rs new file mode 100644 index 0000000..68c80fd --- /dev/null +++ b/control-plane/csf-updater/src/poller.rs @@ -0,0 +1,55 @@ +use anyhow::Result; +use reqwest::header::{ETAG, IF_NONE_MATCH}; +use serde::Deserialize; +use tracing::info; + +use crate::config::Config; +use crate::etcd; + +#[derive(Debug, Deserialize)] +struct GitHubCommit { + sha: String, +} + +pub async fn poll_and_update(cfg: &Config, etcd: &mut etcd::Client, last_etag: &mut Option) -> Result> { + let url = format!( + "https://api.github.com/repos/{}/commits/{}", + cfg.infra_repo_github, cfg.infra_repo_branch + ); + + let mut req = reqwest::Client::new() + .get(&url) + .header("User-Agent", "csf-updater") + .header("Accept", "application/vnd.github.v3+json"); + + if let Some(etag) = last_etag.as_deref() { + req = req.header(IF_NONE_MATCH, etag); + } + + let resp = req.send().await?; + + if resp.status() == reqwest::StatusCode::NOT_MODIFIED { + return Ok(None); + } + + if !resp.status().is_success() { + anyhow::bail!("GitHub API returned {}", resp.status()); + } + + if let Some(etag) = resp.headers().get(ETAG) { + *last_etag = Some(etag.to_str()?.to_string()); + } + + let commit: GitHubCommit = resp.json().await?; + let sha = commit.sha; + + let current = etcd.get(etcd::AVAILABLE_FLAKE_REV_KEY).await?; + if current.as_deref() == Some(&sha) { + return Ok(None); + } + + etcd.put(etcd::AVAILABLE_FLAKE_REV_KEY, &sha).await?; + info!(sha = %sha, "new flake rev available"); + + Ok(Some(sha)) +} diff --git a/control-plane/csf-updater/src/updater.rs b/control-plane/csf-updater/src/updater.rs index e22c524..fa0d092 100644 --- a/control-plane/csf-updater/src/updater.rs +++ b/control-plane/csf-updater/src/updater.rs @@ -1,44 +1,18 @@ use anyhow::{bail, Result}; +use tokio::process::Command; use tracing::info; use crate::config::Config; -use crate::etcd; - -pub async fn run(cfg: &Config, flake_rev: &str, _etcd: &mut etcd::Client) -> Result<()> { - nix_build(cfg, flake_rev).await?; - nix_switch(cfg, flake_rev).await -} - -async fn nix_build(cfg: &Config, flake_rev: &str) -> Result<()> { - info!(flake_rev = %flake_rev, "running nix build"); +pub async fn switch(cfg: &Config, flake_rev: &str) -> Result<()> { let flake_url = format!( - "git+http://{}?rev={}", - cfg.infra_repo_mirror_url, flake_rev + "git+file://{}?rev={}", + cfg.infra_repo_mirror_dir, flake_rev ); - let status = tokio::process::Command::new("nixos-rebuild") - .args(["build", "--flake", &flake_url]) - .status() - .await?; - - if !status.success() { - bail!("nix build failed for rev {}", flake_rev); - } - - info!(flake_rev = %flake_rev, "nix build complete"); - Ok(()) -} - -async fn nix_switch(cfg: &Config, flake_rev: &str) -> Result<()> { info!(flake_rev = %flake_rev, "running nixos-rebuild switch"); - let flake_url = format!( - "git+http://{}?rev={}", - cfg.infra_repo_mirror_url, flake_rev - ); - - let status = tokio::process::Command::new("nixos-rebuild") + let status = Command::new("nixos-rebuild") .args(["switch", "--flake", &flake_url]) .status() .await?; From 97914a6e052340d11571efce801b00d82b45583b Mon Sep 17 00:00:00 2001 From: CodeMaster4711 Date: Mon, 6 Apr 2026 20:03:07 +0200 Subject: [PATCH 07/16] feat: propagate desired_flake_rev via heartbeat response to agent update trigger --- Cargo.lock | 1 + agent/src/client.rs | 11 ++++- agent/src/main.rs | 13 ++++- agent/src/update_watch.rs | 51 ++++++++++++++++++++ control-plane/registry/Cargo.toml | 1 + control-plane/registry/src/handlers/agent.rs | 19 ++++++++ control-plane/registry/src/main.rs | 4 ++ control-plane/registry/src/models/agent.rs | 1 + control-plane/registry/src/server.rs | 1 + 9 files changed, 99 insertions(+), 3 deletions(-) create mode 100644 agent/src/update_watch.rs diff --git a/Cargo.lock b/Cargo.lock index 7901781..ccc6d17 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3851,6 +3851,7 @@ dependencies = [ "chrono", "dotenvy", "entity", + "etcd-client", "migration", "opentelemetry", "opentelemetry-otlp", diff --git a/agent/src/client.rs b/agent/src/client.rs index ab6bcbf..b3d806c 100644 --- a/agent/src/client.rs +++ b/agent/src/client.rs @@ -56,6 +56,11 @@ pub struct ContainerStatus { pub status: String, } +#[derive(Debug, Deserialize)] +pub struct HeartbeatResponse { + pub desired_flake_rev: Option, +} + #[derive(Debug, Deserialize)] pub struct AssignedWorkload { pub id: String, @@ -143,7 +148,7 @@ impl ApiClient { api_key: &str, container_statuses: Option>, metrics: Option, - ) -> Result<()> { + ) -> Result { let url = format!( "{}/api/registry/agents/{}/heartbeat", self.gateway_url, agent_id @@ -189,7 +194,9 @@ impl ApiClient { anyhow::bail!("Heartbeat failed status={}", status); } - Ok(()) + resp.json::() + .await + .context("Failed to parse heartbeat response") } pub async fn fetch_assigned_workloads( diff --git a/agent/src/main.rs b/agent/src/main.rs index 320afa6..aaa76c2 100644 --- a/agent/src/main.rs +++ b/agent/src/main.rs @@ -4,6 +4,7 @@ mod docker; mod pki; mod rbd; mod system; +mod update_watch; use anyhow::{Context, Result}; use std::collections::HashMap; @@ -169,6 +170,7 @@ async fn run_heartbeat_loop( ) { let mut interval = tokio::time::interval(Duration::from_secs(interval_secs)); let mut failure_count: u32 = 0; + let mut current_flake_rev = String::new(); loop { tokio::select! { @@ -183,11 +185,20 @@ async fn run_heartbeat_loop( let metrics = system::collect_metrics(); match client.heartbeat(agent_id, api_key, Some(statuses), Some(metrics)).await { - Ok(_) => { + Ok(resp) => { if failure_count > 0 { info!(agent_id = %agent_id, "Heartbeat recovered after {} failures", failure_count); failure_count = 0; } + + if let Some(rev) = resp.desired_flake_rev { + let rev_clone = rev.clone(); + let current = current_flake_rev.clone(); + tokio::spawn(async move { + update_watch::handle(agent_id, &rev_clone, ¤t).await; + }); + current_flake_rev = rev; + } } Err(e) => { failure_count += 1; diff --git a/agent/src/update_watch.rs b/agent/src/update_watch.rs new file mode 100644 index 0000000..90dcd2c --- /dev/null +++ b/agent/src/update_watch.rs @@ -0,0 +1,51 @@ +use std::time::Duration; +use tokio::fs; +use tracing::{info, warn}; +use uuid::Uuid; + +const TRIGGER_FILE: &str = "/var/lib/csf/update_trigger"; +const MAX_JITTER_SECS: u64 = 300; + +pub async fn handle(agent_id: Uuid, desired_flake_rev: &str, current_flake_rev: &str) { + if desired_flake_rev == current_flake_rev { + return; + } + + if !is_valid_sha(desired_flake_rev) { + warn!(flake_rev = %desired_flake_rev, "received invalid flake rev in heartbeat response"); + return; + } + + let jitter = jitter_delay(agent_id); + info!( + flake_rev = %desired_flake_rev, + jitter_secs = jitter, + "update signal received, waiting before writing trigger" + ); + + tokio::time::sleep(Duration::from_secs(jitter)).await; + + if let Err(e) = write_trigger(desired_flake_rev).await { + warn!(error = %e, flake_rev = %desired_flake_rev, "failed to write update trigger file"); + } else { + info!(flake_rev = %desired_flake_rev, "update trigger written"); + } +} + +async fn write_trigger(flake_rev: &str) -> anyhow::Result<()> { + if let Some(parent) = std::path::Path::new(TRIGGER_FILE).parent() { + fs::create_dir_all(parent).await?; + } + fs::write(TRIGGER_FILE, flake_rev).await?; + Ok(()) +} + +fn jitter_delay(agent_id: Uuid) -> u64 { + let bytes = agent_id.as_bytes(); + let val = u64::from_le_bytes(bytes[..8].try_into().unwrap_or([0u8; 8])); + val % MAX_JITTER_SECS +} + +fn is_valid_sha(rev: &str) -> bool { + rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) +} diff --git a/control-plane/registry/Cargo.toml b/control-plane/registry/Cargo.toml index c603976..744e278 100644 --- a/control-plane/registry/Cargo.toml +++ b/control-plane/registry/Cargo.toml @@ -38,6 +38,7 @@ chrono = { workspace = true, features = ["serde"] } sea-orm = { workspace = true } reqwest = { workspace = true } +etcd-client = { workspace = true } # Crypto sha2 = { workspace = true } diff --git a/control-plane/registry/src/handlers/agent.rs b/control-plane/registry/src/handlers/agent.rs index 7727626..3136756 100644 --- a/control-plane/registry/src/handlers/agent.rs +++ b/control-plane/registry/src/handlers/agent.rs @@ -202,9 +202,12 @@ pub async fn heartbeat( } } + let desired_flake_rev = read_desired_flake_rev(&state.etcd_endpoints).await; + Ok(Json(HeartbeatResponse { success: true, message: "Heartbeat recorded".to_string(), + desired_flake_rev, })) } Err(e) => Err(( @@ -216,6 +219,22 @@ pub async fn heartbeat( } } +async fn read_desired_flake_rev(etcd_endpoints: &str) -> Option { + let mut client = etcd_client::Client::connect([etcd_endpoints], None) + .await + .ok()?; + + let resp = client + .get("/csf/config/desired_flake_rev", None) + .await + .ok()?; + + resp.kvs() + .first() + .and_then(|kv| std::str::from_utf8(kv.value()).ok()) + .map(|s| s.to_string()) +} + async fn forward_container_statuses( state: &crate::server::AppState, statuses: Vec, diff --git a/control-plane/registry/src/main.rs b/control-plane/registry/src/main.rs index 79d0fc4..2a4a9a5 100644 --- a/control-plane/registry/src/main.rs +++ b/control-plane/registry/src/main.rs @@ -51,6 +51,9 @@ async fn main() -> anyhow::Result<()> { .build() .expect("Failed to build HTTP client"); + let etcd_endpoints = std::env::var("ETCD_ENDPOINTS") + .unwrap_or_else(|_| "http://localhost:2379".to_string()); + let state = server::AppState { token_manager: token_manager.clone(), bootstrap_token_manager: bootstrap_token_manager.clone(), @@ -61,6 +64,7 @@ async fn main() -> anyhow::Result<()> { scheduler_url, gateway_url, http_client, + etcd_endpoints, }; let token_cleanup_handle = { diff --git a/control-plane/registry/src/models/agent.rs b/control-plane/registry/src/models/agent.rs index 4804ec6..f6c7a4d 100644 --- a/control-plane/registry/src/models/agent.rs +++ b/control-plane/registry/src/models/agent.rs @@ -135,6 +135,7 @@ pub struct HeartbeatRequest { pub struct HeartbeatResponse { pub success: bool, pub message: String, + pub desired_flake_rev: Option, } #[derive(Debug, Serialize, Deserialize)] diff --git a/control-plane/registry/src/server.rs b/control-plane/registry/src/server.rs index fbdccc6..a84966b 100644 --- a/control-plane/registry/src/server.rs +++ b/control-plane/registry/src/server.rs @@ -31,6 +31,7 @@ pub struct AppState { pub scheduler_url: String, pub gateway_url: String, pub http_client: Client, + pub etcd_endpoints: String, } pub async fn health_check() -> impl IntoResponse { From 7f0d06ffe99cc54f40b4b95e5ddb8eab5f858125 Mon Sep 17 00:00:00 2001 From: CodeMaster4711 Date: Mon, 6 Apr 2026 20:04:51 +0200 Subject: [PATCH 08/16] ci: add update-infra job to push versions.nix to CSFX-Infra on release --- .github/workflows/docker-build.yml | 81 ++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 1909865..490ecb6 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -297,6 +297,87 @@ jobs: env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + update-infra: + name: Update CSFX-Infra versions.nix + runs-on: ubuntu-latest + needs: [prepare, manifest, build-binaries, attach-binaries-release] + if: needs.prepare.outputs.is_release == 'true' + steps: + - uses: actions/checkout@v4 + with: + repository: ${{ github.repository_owner }}/CSFX-Infra + token: ${{ secrets.INFRA_REPO_TOKEN }} + path: infra + + - uses: actions/download-artifact@v4 + with: + pattern: digest-* + path: /tmp/digests + merge-multiple: true + + - uses: actions/download-artifact@v4 + with: + pattern: csf-agent-* + path: /tmp/binaries + merge-multiple: true + + - name: Write versions.nix + run: | + VERSION="${{ needs.prepare.outputs.version }}" + ORG=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') + REPO="${{ github.repository }}" + RELEASE_BASE="https://github.com/${REPO}/releases/download/v${VERSION}" + + get_digest() { + local svc=$1 + local amd=$(tr -d '[:space:]' < /tmp/digests/${svc}-amd64.txt 2>/dev/null || echo "") + local arm=$(tr -d '[:space:]' < /tmp/digests/${svc}-arm64.txt 2>/dev/null || echo "") + echo "ghcr.io/${ORG}/csf-ce-${svc}@${amd}" + } + + get_sha256() { + local binary=$1 + local arch=$2 + cat /tmp/binaries/${binary}-${arch}.sha256 2>/dev/null | awk '{print $1}' + } + + cat > infra/versions.nix < Date: Mon, 6 Apr 2026 20:13:06 +0200 Subject: [PATCH 09/16] feat: implement watchdog heartbeat counter in registry and csf-updater --- control-plane/csf-updater/src/etcd.rs | 9 ++++++++ control-plane/csf-updater/src/main.rs | 1 + control-plane/registry/src/handlers/agent.rs | 22 ++++++++++++++++++++ 3 files changed, 32 insertions(+) diff --git a/control-plane/csf-updater/src/etcd.rs b/control-plane/csf-updater/src/etcd.rs index 4ee8246..3ea61c0 100644 --- a/control-plane/csf-updater/src/etcd.rs +++ b/control-plane/csf-updater/src/etcd.rs @@ -7,6 +7,7 @@ pub const DESIRED_FLAKE_REV_KEY: &str = "/csf/config/desired_flake_rev"; pub const BUILD_STATUS_KEY: &str = "/csf/config/cp_build_status"; pub const RESULT_KEY: &str = "/csf/config/last_build_result"; pub const PAUSED_KEY: &str = "/csf/config/update_paused"; +pub const NODE_HEARTBEAT_PREFIX: &str = "/csf/nodes/"; pub struct Client { inner: etcd_client::Client, @@ -32,4 +33,12 @@ impl Client { self.inner.put(key, value.as_bytes(), None).await?; Ok(()) } + + pub async fn delete_prefix(&mut self, prefix: &str) -> Result<()> { + use etcd_client::DeleteOptions; + self.inner + .delete(prefix, Some(DeleteOptions::new().with_prefix())) + .await?; + Ok(()) + } } diff --git a/control-plane/csf-updater/src/main.rs b/control-plane/csf-updater/src/main.rs index 1c00315..4d60a10 100644 --- a/control-plane/csf-updater/src/main.rs +++ b/control-plane/csf-updater/src/main.rs @@ -128,6 +128,7 @@ async fn execute_once(cfg: &config::Config, last_applied: &str) -> anyhow::Resul info!(flake_rev = %desired, last_applied = %last_applied, "starting update"); etcd.put(etcd::BUILD_STATUS_KEY, "building").await?; + etcd.delete_prefix(etcd::NODE_HEARTBEAT_PREFIX).await?; let (_cancel_tx, cancel_rx) = watch::channel(false); diff --git a/control-plane/registry/src/handlers/agent.rs b/control-plane/registry/src/handlers/agent.rs index 3136756..6499e03 100644 --- a/control-plane/registry/src/handlers/agent.rs +++ b/control-plane/registry/src/handlers/agent.rs @@ -203,6 +203,7 @@ pub async fn heartbeat( } let desired_flake_rev = read_desired_flake_rev(&state.etcd_endpoints).await; + increment_post_update_heartbeats(&state.etcd_endpoints, agent_id).await; Ok(Json(HeartbeatResponse { success: true, @@ -235,6 +236,27 @@ async fn read_desired_flake_rev(etcd_endpoints: &str) -> Option { .map(|s| s.to_string()) } +async fn increment_post_update_heartbeats(etcd_endpoints: &str, agent_id: Uuid) { + let key = format!("/csf/nodes/{}/post_update_heartbeats", agent_id); + + let mut client = match etcd_client::Client::connect([etcd_endpoints], None).await { + Ok(c) => c, + Err(_) => return, + }; + + let current: u32 = client + .get(key.as_str(), None) + .await + .ok() + .and_then(|r| r.kvs().first().map(|kv| kv.value().to_vec())) + .and_then(|v| std::str::from_utf8(&v).ok().and_then(|s| s.parse().ok())) + .unwrap_or(0); + + let _ = client + .put(key.as_str(), (current + 1).to_string().as_bytes(), None) + .await; +} + async fn forward_container_statuses( state: &crate::server::AppState, statuses: Vec, From 88c005146f9afc59051f707160ca3d2eb1aff8bc Mon Sep 17 00:00:00 2001 From: CodeMaster4711 Date: Mon, 6 Apr 2026 20:19:33 +0200 Subject: [PATCH 10/16] feat: propagate post_update_heartbeats counter to agent for watchdog health check --- agent/src/client.rs | 1 + agent/src/main.rs | 4 ++++ agent/src/update_watch.rs | 8 +++++++ control-plane/registry/src/handlers/agent.rs | 24 ++++++++++++-------- control-plane/registry/src/models/agent.rs | 1 + 5 files changed, 29 insertions(+), 9 deletions(-) diff --git a/agent/src/client.rs b/agent/src/client.rs index b3d806c..ec33ebc 100644 --- a/agent/src/client.rs +++ b/agent/src/client.rs @@ -59,6 +59,7 @@ pub struct ContainerStatus { #[derive(Debug, Deserialize)] pub struct HeartbeatResponse { pub desired_flake_rev: Option, + pub post_update_heartbeats: Option, } #[derive(Debug, Deserialize)] diff --git a/agent/src/main.rs b/agent/src/main.rs index aaa76c2..595a39e 100644 --- a/agent/src/main.rs +++ b/agent/src/main.rs @@ -191,6 +191,10 @@ async fn run_heartbeat_loop( failure_count = 0; } + if let Some(count) = resp.post_update_heartbeats { + update_watch::write_heartbeat_counter(count).await; + } + if let Some(rev) = resp.desired_flake_rev { let rev_clone = rev.clone(); let current = current_flake_rev.clone(); diff --git a/agent/src/update_watch.rs b/agent/src/update_watch.rs index 90dcd2c..1fd1004 100644 --- a/agent/src/update_watch.rs +++ b/agent/src/update_watch.rs @@ -4,6 +4,7 @@ use tracing::{info, warn}; use uuid::Uuid; const TRIGGER_FILE: &str = "/var/lib/csf/update_trigger"; +const HEARTBEAT_COUNTER_FILE: &str = "/var/lib/csf/post_update_heartbeats"; const MAX_JITTER_SECS: u64 = 300; pub async fn handle(agent_id: Uuid, desired_flake_rev: &str, current_flake_rev: &str) { @@ -32,6 +33,13 @@ pub async fn handle(agent_id: Uuid, desired_flake_rev: &str, current_flake_rev: } } +pub async fn write_heartbeat_counter(count: u32) { + if let Some(parent) = std::path::Path::new(HEARTBEAT_COUNTER_FILE).parent() { + let _ = fs::create_dir_all(parent).await; + } + let _ = fs::write(HEARTBEAT_COUNTER_FILE, count.to_string()).await; +} + async fn write_trigger(flake_rev: &str) -> anyhow::Result<()> { if let Some(parent) = std::path::Path::new(TRIGGER_FILE).parent() { fs::create_dir_all(parent).await?; diff --git a/control-plane/registry/src/handlers/agent.rs b/control-plane/registry/src/handlers/agent.rs index 6499e03..2431afb 100644 --- a/control-plane/registry/src/handlers/agent.rs +++ b/control-plane/registry/src/handlers/agent.rs @@ -203,12 +203,14 @@ pub async fn heartbeat( } let desired_flake_rev = read_desired_flake_rev(&state.etcd_endpoints).await; - increment_post_update_heartbeats(&state.etcd_endpoints, agent_id).await; + let post_update_heartbeats = + increment_post_update_heartbeats(&state.etcd_endpoints, agent_id).await; Ok(Json(HeartbeatResponse { success: true, message: "Heartbeat recorded".to_string(), desired_flake_rev, + post_update_heartbeats, })) } Err(e) => Err(( @@ -236,13 +238,12 @@ async fn read_desired_flake_rev(etcd_endpoints: &str) -> Option { .map(|s| s.to_string()) } -async fn increment_post_update_heartbeats(etcd_endpoints: &str, agent_id: Uuid) { +async fn increment_post_update_heartbeats(etcd_endpoints: &str, agent_id: Uuid) -> Option { let key = format!("/csf/nodes/{}/post_update_heartbeats", agent_id); - let mut client = match etcd_client::Client::connect([etcd_endpoints], None).await { - Ok(c) => c, - Err(_) => return, - }; + let mut client = etcd_client::Client::connect([etcd_endpoints], None) + .await + .ok()?; let current: u32 = client .get(key.as_str(), None) @@ -252,9 +253,14 @@ async fn increment_post_update_heartbeats(etcd_endpoints: &str, agent_id: Uuid) .and_then(|v| std::str::from_utf8(&v).ok().and_then(|s| s.parse().ok())) .unwrap_or(0); - let _ = client - .put(key.as_str(), (current + 1).to_string().as_bytes(), None) - .await; + let next = current + 1; + + client + .put(key.as_str(), next.to_string().as_bytes(), None) + .await + .ok()?; + + Some(next) } async fn forward_container_statuses( diff --git a/control-plane/registry/src/models/agent.rs b/control-plane/registry/src/models/agent.rs index f6c7a4d..7f5e857 100644 --- a/control-plane/registry/src/models/agent.rs +++ b/control-plane/registry/src/models/agent.rs @@ -136,6 +136,7 @@ pub struct HeartbeatResponse { pub success: bool, pub message: String, pub desired_flake_rev: Option, + pub post_update_heartbeats: Option, } #[derive(Debug, Serialize, Deserialize)] From abdc5105d60b0a397f2c2613cf7f62e2587d4281 Mon Sep 17 00:00:00 2001 From: CodeMaster4711 Date: Wed, 8 Apr 2026 12:29:11 +0200 Subject: [PATCH 11/16] ci: new nix build process --- .github/workflows/docker-build.yml | 32 +- .github/workflows/prerelease.yml | 84 +++++ nixos-node/flake.nix | 52 ++- nixos-node/modules/csf-daemon.nix | 105 ------ nixos-node/modules/iso-configuration.nix | 345 ++++++++------------ nixos-node/modules/node-configuration.nix | 54 --- nixos-node/modules/server-configuration.nix | 254 ++++++-------- 7 files changed, 362 insertions(+), 564 deletions(-) delete mode 100644 nixos-node/modules/csf-daemon.nix delete mode 100644 nixos-node/modules/node-configuration.nix diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 490ecb6..de390fb 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -321,6 +321,16 @@ jobs: path: /tmp/binaries merge-multiple: true + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Write versions.nix run: | VERSION="${{ needs.prepare.outputs.version }}" @@ -328,17 +338,17 @@ jobs: REPO="${{ github.repository }}" RELEASE_BASE="https://github.com/${REPO}/releases/download/v${VERSION}" - get_digest() { + get_manifest_digest() { local svc=$1 - local amd=$(tr -d '[:space:]' < /tmp/digests/${svc}-amd64.txt 2>/dev/null || echo "") - local arm=$(tr -d '[:space:]' < /tmp/digests/${svc}-arm64.txt 2>/dev/null || echo "") - echo "ghcr.io/${ORG}/csf-ce-${svc}@${amd}" + local image="ghcr.io/${ORG}/csf-ce-${svc}:${VERSION}" + docker buildx imagetools inspect "${image}" \ + --format '{{json .Manifest}}' | jq -r '.digest' } get_sha256() { local binary=$1 local arch=$2 - cat /tmp/binaries/${binary}-${arch}.sha256 2>/dev/null | awk '{print $1}' + awk '{print $1}' /tmp/binaries/${binary}-${arch}.sha256 2>/dev/null } cat > infra/versions.nix </dev/null + } + + cat > infra/versions.nix <&2 + exit 1 + fi + + echo "[csf-install] target disk: $DISK" + + if [[ "$DISK" == *nvme* ]]; then + PART_BOOT="${DISK}p1" + PART_ROOT="${DISK}p2" + else + PART_BOOT="${DISK}1" + PART_ROOT="${DISK}2" + fi + + parted "$DISK" -- mklabel gpt + parted "$DISK" -- mkpart ESP fat32 1MB 512MB + parted "$DISK" -- mkpart primary ext4 512MB 100% + parted "$DISK" -- set 1 esp on + + mkfs.fat -F 32 -n boot "$PART_BOOT" + mkfs.ext4 -L nixos "$PART_ROOT" + + mount "$PART_ROOT" /mnt + mkdir -p /mnt/boot + mount "$PART_BOOT" /mnt/boot + + echo "[csf-install] partitioning complete, running nixos-install" + + nixos-install \ + --no-root-passwd \ + --flake /iso/csf-flake#csf-server + + echo "[csf-install] installation complete — rebooting in 5s" + sleep 5 + reboot + ''; + + logoText = builtins.readFile ../logo.txt; + + motd = pkgs.writeText "csf-motd" '' + ${logoText} + + ╔══════════════════════════════════════════════════════════════════╗ + ║ CSF Node Installer ║ + ║ ║ + ║ Automatische Installation startet in 10 Sekunden. ║ + ║ CTRL+C zum Abbrechen und manuellem Eingriff. ║ + ║ ║ + ║ Nach der Installation: ║ + ║ - csf-agent verbindet sich mit dem API Gateway ║ + ║ - Updates laufen automatisch via GitOps ║ + ║ ║ + ╚══════════════════════════════════════════════════════════════════╝ + ''; +in { imports = [ + updateUnitsModule ]; - # System configuration - system.stateVersion = "24.11"; + system.stateVersion = "25.05"; + + isoImage.volumeID = "CSF-NODE"; + isoImage.edition = lib.mkForce "csf"; + isoImage.prependToMenuLabel = "CSF Node Installer — "; + isoImage.makeEfiBootable = true; + isoImage.makeUsbBootable = true; + + isoImage.storeContents = [ + csf.agentPackage + csf.updaterPackage + ]; + + isoImage.contents = [ + { + source = ../../../CSFX-Infra; + target = "/csf-flake"; + } + ]; + + boot.kernelParams = [ + "console=ttyS0,115200n8" + "console=tty0" + "quiet" + ]; + + boot.loader.timeout = lib.mkForce 10; - # Networking networking = { - hostName = "csf-docker-test"; - firewall = { - enable = true; - allowedTCPPorts = [ - 8080 # Test nginx container - ]; - }; + hostName = "csf-installer"; + useDHCP = true; + firewall.enable = false; }; - # Enable Docker - virtualisation.docker.enable = true; + time.timeZone = "UTC"; - # System packages - environment.systemPackages = with pkgs; [ - # Docker tools - docker-compose - docker + services.getty.autologinUser = lib.mkForce "root"; - # Utilities - curl - wget - vim - htop - ]; + users.users.root = { + initialPassword = ""; + shell = pkgs.bash; + }; - # Auto-login as root on boot (for ISO convenience) - services.getty.autologinUser = "root"; + services.openssh = { + enable = true; + settings = { + PermitRootLogin = "yes"; + PasswordAuthentication = true; + }; + }; - # Docker Compose service for nginx test - systemd.services.docker-compose-test = { - description = "Docker Compose Test Service (nginx)"; - after = [ "docker.service" ]; - requires = [ "docker.service" ]; - wantedBy = [ "multi-user.target" ]; + environment.etc."motd".source = motd; + systemd.services.csf-autoinstall = { + description = "CSF automatic node installer"; + after = [ "network-online.target" "getty.target" ]; + wants = [ "network-online.target" ]; + wantedBy = [ "multi-user.target" ]; serviceConfig = { Type = "oneshot"; - RemainAfterExit = true; - WorkingDirectory = "/etc/docker-test"; - ExecStart = "${pkgs.docker-compose}/bin/docker-compose up -d"; - ExecStop = "${pkgs.docker-compose}/bin/docker-compose down"; + ExecStartPre = "${pkgs.coreutils}/bin/sleep 10"; + ExecStart = installScript; + StandardOutput = "journal+console"; + StandardError = "journal+console"; }; }; - # Activation script to setup Docker Compose - system.activationScripts.docker-setup = { - text = '' - # Create docker-compose directory - mkdir -p /etc/docker-test - - # Create docker-compose.yml - cat > /etc/docker-test/docker-compose.yml < /etc/docker-test/nginx.conf < /etc/docker-test/html/index.html < - - - CSF-Core Docker Test - - - -
-

CSF-Core Docker Test

-

Docker & Docker Compose funktionieren!

-

Diese Seite wird von nginx in einem Docker Container serviert.

-

Health Check

-
- - -EOF - - # Create test script - cat > /root/test-docker.sh <.*' || echo "Port 8080 not responding" -echo "" -echo "Health check:" -curl -s http://localhost:8080/health || echo "Health check failed" -echo "" -echo "=== Test Complete ===" -EOF - chmod +x /root/test-docker.sh - ''; - deps = []; + nix.settings = { + experimental-features = [ "nix-command" "flakes" ]; + trusted-users = [ "root" ]; }; - # Boot message with logo - environment.etc."issue".text = '' - - - - - ..,,,,,,,,,,,,,,,,,,,,,,,;,,,,,,,,,,,,,,'.. . - ..ckXXNNNNNNNNNNNNNNNNNNNNNNNNNNXXXXXXXXKx;. - ..cONWMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWOc.. - ..ckNWMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMW0l.. . .. - ..ckNWMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMW0l.. . - ..ckXWMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMW0l.. - . .;kXWMMMMMMMMWKOkkkkkkkkkkkkkkkkkkkkkkkkxc.. . - .oNWMMMMMMMMNx,.......................... - .oNMMMMMMMMMK:. - .oNMMMMMMMMMK; ......................... - .oNMMMMMMMMMK; .'lddddddddddddddddddddddc'. . - .oNMMMMMMMMMK; .'o0NWWWWWWWWWWWWWWWWWWWWKd,. - .oNMMMMMMMMMK; .'l0NWMMMMMMMMMMMMMMMMMMWXx,. - .oNMMMMMMMMMK; .'l0NWMMMMMMMMMMMMMMMMMMWXx;. - .oNWMMMMMMMMK:'l0NWMMMMMMMMMMMMMMMMMMWXx;. - .cKWMMMMMMMMXOOXWMMMMWWWWWWWWWWWWWWWXx;. - .,dKWMMMMMMXo;lKMMMNOl:;;;;;;;;;;;;,. - .,dKWMMMM0; 'kWMMMNOl'. - .,dKWMM0; 'kWMMMMMN0o,... - .,dKW0; 'kWMMMMMMMWKkc. - .,ox, 'kWMMMMMMMMNXo. - ... 'kWMMMMMMMMNXd. - 'kWMMMMMMMMNXd. - 'kWMMMMMMMMNXd. - 'kWMMMMMMMMNXo. - 'kWMMMMMMMMNXo. - 'kWMMMMMMMMNXo. - 'kWMMMMMMMNkl;. - 'kWMMMMMNk:... - 'kWMMMNk:. - 'kWWXx;. - . 'kKx;. - .;,. - .. - - - - - - - - - ╔═══════════════════════════════════════════════════════════╗ - ║ ║ - ║ CSF-Core Docker Test ISO ║ - ║ ║ - ║ Einfache Docker & Docker Compose Testumgebung ║ - ║ ║ - ║ Services: ║ - ║ - Docker: systemctl status docker ║ - ║ - Nginx Test: http://localhost:8080 ║ - ║ ║ - ║ Test commands: ║ - ║ ./test-docker.sh - Run comprehensive test ║ - ║ docker ps -a - List containers ║ - ║ docker-compose ps - Compose status ║ - ║ ║ - ╚═══════════════════════════════════════════════════════════╝ - - ''; -} \ No newline at end of file + environment.systemPackages = with pkgs; [ + git + curl + parted + dosfstools + e2fsprogs + jq + vim + ]; +} diff --git a/nixos-node/modules/node-configuration.nix b/nixos-node/modules/node-configuration.nix deleted file mode 100644 index 41b2f18..0000000 --- a/nixos-node/modules/node-configuration.nix +++ /dev/null @@ -1,54 +0,0 @@ -{ config, pkgs, lib, csf, ... }: - -{ - system.stateVersion = "25.05"; - - boot.loader.grub = { - enable = true; - device = "/dev/sda"; - }; - - networking = { - hostName = "csf-node"; - firewall = { - enable = true; - allowedTCPPorts = []; - }; - }; - - time.timeZone = "UTC"; - - users.users.root.hashedPassword = "!"; - - services.openssh = { - enable = true; - settings = { - PermitRootLogin = "no"; - PasswordAuthentication = false; - }; - }; - - services.csf-daemon = { - enable = true; - package = csf.agentPackage; - apiGateway = "http://gateway.csf.local:8000"; - heartbeatInterval = 60; - logLevel = "info"; - }; - - nix = { - settings = { - experimental-features = [ "nix-command" "flakes" ]; - auto-optimise-store = true; - }; - gc = { - automatic = true; - dates = "weekly"; - options = "--delete-older-than 30d"; - }; - }; - - environment.systemPackages = with pkgs; [ - curl - ]; -} diff --git a/nixos-node/modules/server-configuration.nix b/nixos-node/modules/server-configuration.nix index 904afc6..9e77852 100644 --- a/nixos-node/modules/server-configuration.nix +++ b/nixos-node/modules/server-configuration.nix @@ -1,38 +1,39 @@ -{ config, pkgs, lib, csf, ... }: +{ config, pkgs, lib, csf, versions, ... }: let + updateUnitsModule = import ../../../CSFX-Infra/modules/update-units.nix; composeDir = "/etc/csf-core"; - binDir = "/var/lib/csf-updater/bin"; - csfUpdaterBin = csf.updaterPackage; - csfAgentBin = csf.agentPackage; in { - system.stateVersion = "25.11"; + imports = [ updateUnitsModule ]; + + system.stateVersion = "25.05"; boot = { loader.grub = { enable = true; device = "/dev/sda"; - useOSProber = true; }; initrd.availableKernelModules = [ "ata_piix" "uhci_hcd" "virtio_pci" "virtio_scsi" "sd_mod" "sr_mod" ]; - initrd.kernelModules = []; - kernelModules = []; - extraModulePackages = []; }; fileSystems."/" = { - device = "/dev/disk/by-uuid/e4b27226-e75f-4cef-9dec-fc0c6f2185ac"; + device = "/dev/disk/by-label/nixos"; fsType = "ext4"; }; + fileSystems."/boot" = { + device = "/dev/disk/by-label/boot"; + fsType = "vfat"; + }; + swapDevices = []; nixpkgs.hostPlatform = lib.mkDefault "x86_64-linux"; networking = { hostName = "csf-node"; - networkmanager.enable = true; + useDHCP = true; firewall = { enable = true; allowedTCPPorts = [ 22 8000 ]; @@ -49,136 +50,93 @@ in }; }; - users.users.rootcsf = { + users.users.admin = { isNormalUser = true; - description = "rootcsf"; - extraGroups = [ "networkmanager" "wheel" "docker" ]; + extraGroups = [ "wheel" "docker" ]; + openssh.authorizedKeys.keys = []; }; security.sudo.wheelNeedsPassword = false; - security.sudo.extraRules = [ - { - users = [ "csf-updater" ]; - commands = [ - { command = "/run/current-system/sw/bin/systemctl restart csf-daemon"; options = [ "NOPASSWD" ]; } - { command = "/run/current-system/sw/bin/systemctl restart csf-updater"; options = [ "NOPASSWD" ]; } - ]; - } - ]; - virtualisation.docker = { enable = true; enableOnBoot = true; }; - services.csf-daemon = { - enable = true; - package = csf.agentPackage; - binaryPath = "${binDir}/csf-agent"; - apiGateway = "http://localhost:8000"; - heartbeatInterval = 60; - logLevel = "info"; - }; - - environment.systemPackages = with pkgs; [ - docker-compose - curl - wget - vim - htop - git - tmux - lsof - ]; - - users.users.csf-updater = { + users.users.csf-agent = { isSystemUser = true; - group = "csf-updater"; - extraGroups = [ "docker" ]; - home = "/var/lib/csf-updater"; + group = "csf-agent"; + home = "/var/lib/csf-daemon"; createHome = true; - shell = pkgs.shadow; }; + users.groups.csf-agent = {}; users.groups.csf-updater = {}; systemd.tmpfiles.rules = [ - "d /var/lib/csf-updater 0710 csf-updater csf-daemon -" + "d /var/lib/csf-daemon 0750 csf-agent csf-agent -" + "d /var/lib/csf 0750 csf-agent csf-updater -" + "f /var/lib/csf/update_trigger 0660 csf-agent csf-updater -" + "d /var/lib/csf-updater 0750 root root -" + "d /var/lib/csf-updater/infra.git 0750 root root -" ]; - systemd.services.csf-updater = { - description = "CSF Control Plane Updater"; - after = [ "docker.service" "network-online.target" "csf-control-plane.service" ]; - requires = [ "docker.service" ]; - wants = [ "network-online.target" ]; + systemd.services.csf-agent = { + description = "CSF Agent Daemon"; wantedBy = [ "multi-user.target" ]; - + after = [ "network-online.target" ]; + wants = [ "network-online.target" ]; serviceConfig = { - Type = "simple"; - User = "csf-updater"; - Group = "csf-updater"; - EnvironmentFile = "/etc/csf-core/updater.env"; - ExecStart = "${binDir}/csf-updater"; - Restart = "always"; - RestartSec = "10"; + ExecStart = "${csf.agentPackage}/bin/csf-agent"; + User = "csf-agent"; + Group = "csf-agent"; + Restart = "on-failure"; + RestartSec = "10s"; + PrivateTmp = true; ProtectSystem = "strict"; - ProtectHome = true; - ReadWritePaths = [ composeDir "/tmp" binDir ]; + ReadWritePaths = [ "/var/lib/csf-daemon" "/var/lib/csf" ]; + NoNewPrivileges = true; }; - environment = { - ETCD_ENDPOINTS = "http://localhost:2379"; - ETCD_USERNAME = "csf"; - COMPOSE_FILE = "${composeDir}/docker-compose.yml"; - GHCR_ORG = "csfx-cloud"; - POLL_INTERVAL_SECS = "30"; + CSF_GATEWAY_URL = "http://localhost:8000"; + CSF_HEARTBEAT_INTERVAL = "60"; RUST_LOG = "info"; - BINARY_DIR = binDir; - GITHUB_RELEASE_BASE_URL = "https://github.com/csfx-cloud/CSF-Core/releases/download"; - PATH = lib.mkForce "/run/wrappers/bin:/nix/var/nix/profiles/default/bin:/run/current-system/sw/bin"; }; }; - systemd.services.csf-control-plane = { - description = "CSF Control Plane (Docker Compose)"; - after = [ "docker.service" "network-online.target" ]; - requires = [ "docker.service" ]; - wants = [ "network-online.target" ]; - partOf = [ "docker.service" ]; + systemd.services.csf-updater = { + description = "CSF GitOps Updater"; wantedBy = [ "multi-user.target" ]; - + after = [ "network-online.target" ]; + wants = [ "network-online.target" ]; serviceConfig = { - Type = "oneshot"; - RemainAfterExit = true; - WorkingDirectory = composeDir; - ExecStartPre = "${pkgs.docker}/bin/docker compose pull --quiet"; - ExecStart = "${pkgs.docker}/bin/docker compose up -d --remove-orphans"; - ExecStop = "${pkgs.docker}/bin/docker compose down"; - TimeoutStartSec = "600"; - TimeoutStopSec = "120"; + ExecStart = "${csf.updaterPackage}/bin/csf-updater"; + Restart = "on-failure"; + RestartSec = "10s"; + StateDirectory = "csf-updater"; + }; + environment = { + ETCD_ENDPOINTS = "http://localhost:2379"; + INFRA_REPO_GITHUB = "csfx-cloud/CSFX-Infra"; + INFRA_REPO_BRANCH = "main"; + INFRA_REPO_MIRROR_URL = "https://github.com/csfx-cloud/CSFX-Infra.git"; + INFRA_REPO_MIRROR_DIR = "/var/lib/csf-updater/infra.git"; + POLL_INTERVAL_SECS = "120"; + RUST_LOG = "info"; }; }; - system.activationScripts.csf-binaries = { - text = '' - mkdir -p ${binDir} - chown csf-updater:csf-daemon ${binDir} - chmod 750 ${binDir} - if [ ! -f ${binDir}/csf-updater ]; then - cp ${csfUpdaterBin}/bin/csf-updater ${binDir}/csf-updater - chown csf-updater:csf-updater ${binDir}/csf-updater - chmod 750 ${binDir}/csf-updater - fi - if [ ! -f ${binDir}/csf-agent ]; then - cp ${csfAgentBin}/bin/csf-agent ${binDir}/csf-agent - chown csf-updater:csf-updater ${binDir}/csf-agent - chmod 750 ${binDir}/csf-agent - fi - ''; - deps = []; + services.csf-update-units = { + enable = true; + nixCacheUrl = "http://localhost:5000"; + nixCachePublicKey = ""; }; - system.activationScripts.csf-core-setup = { + nix.settings = { + experimental-features = [ "nix-command" "flakes" ]; + trusted-users = [ "root" ]; + }; + + system.activationScripts.csf-core-compose = { text = '' mkdir -p ${composeDir} @@ -189,15 +147,13 @@ services: container_name: csf-etcd command: - etcd - - --advertise-client-urls=http://etcd:2379 + - --advertise-client-urls=http://0.0.0.0:2379 - --listen-client-urls=http://0.0.0.0:2379 - --data-dir=/etcd-data volumes: - etcd_data:/etcd-data ports: - "2379:2379" - networks: - - csf-internal restart: unless-stopped patroni: @@ -227,8 +183,6 @@ services: volumes: - patroni_data:/home/postgres/pgdata - /etc/csf-core/patroni-bootstrap.sh:/etc/csf-bootstrap.sh:ro - networks: - - csf-internal depends_on: - etcd healthcheck: @@ -240,53 +194,40 @@ services: restart: unless-stopped api-gateway: - image: ghcr.io/csfx-cloud/csf-ce-api-gateway:0.2.2-alpha.47 + image: ghcr.io/csfx-cloud/csf-ce-api-gateway@${versions.csf.images.api-gateway.digest} container_name: csf-api-gateway - env_file: - - /etc/csf-core/gateway.env environment: DATABASE_URL: postgres://csf:csfpassword@patroni:5432/csf_core - RUST_LOG: info JWT_SECRET: change_me_in_production - RSA_KEY_SIZE: "4096" + ETCD_ENDPOINTS: http://etcd:2379 REGISTRY_SERVICE_URL: http://registry:8001 SCHEDULER_SERVICE_URL: http://scheduler:8002 VOLUME_MANAGER_URL: http://volume-manager:8003 FAILOVER_CONTROLLER_URL: http://failover-controller:8004 SDN_CONTROLLER_URL: http://sdn-controller:8005 + RUST_LOG: info ports: - "8000:8000" depends_on: patroni: condition: service_healthy - networks: - - csf-internal restart: unless-stopped - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8000/api/system/health"] - interval: 30s - timeout: 10s - retries: 3 - start_period: 30s registry: - image: ghcr.io/csfx-cloud/csf-ce-registry:0.2.2-alpha.47 + image: ghcr.io/csfx-cloud/csf-ce-registry@${versions.csf.images.registry.digest} container_name: csf-registry environment: DATABASE_URL: postgres://csf:csfpassword@patroni:5432/csf_core ETCD_ENDPOINTS: http://etcd:2379 REGISTRY_PORT: "8001" RUST_LOG: info - SCHEDULER_SERVICE_URL: http://scheduler:8002 depends_on: patroni: condition: service_healthy - networks: - - csf-internal restart: unless-stopped scheduler: - image: ghcr.io/csfx-cloud/csf-ce-scheduler:0.2.2-alpha.47 + image: ghcr.io/csfx-cloud/csf-ce-scheduler@${versions.csf.images.scheduler.digest} container_name: csf-scheduler environment: DATABASE_URL: postgres://csf:csfpassword@patroni:5432/csf_core @@ -296,32 +237,27 @@ services: depends_on: patroni: condition: service_healthy - networks: - - csf-internal restart: unless-stopped volume-manager: - image: ghcr.io/csfx-cloud/csf-ce-volume-manager:0.2.2-alpha.47 + image: ghcr.io/csfx-cloud/csf-ce-volume-manager@${versions.csf.images.volume-manager.digest} container_name: csf-volume-manager environment: DATABASE_URL: postgres://csf:csfpassword@patroni:5432/csf_core ETCD_ENDPOINTS: http://etcd:2379 VOLUME_MANAGER_PORT: "8003" RUST_LOG: info - volumes: - - /mnt/csf-volumes:/mnt/csf-volumes depends_on: patroni: condition: service_healthy - networks: - - csf-internal restart: unless-stopped failover-controller: - image: ghcr.io/csfx-cloud/csf-ce-failover-controller:0.2.2-alpha.47 + image: ghcr.io/csfx-cloud/csf-ce-failover-controller@${versions.csf.images.failover-controller.digest} container_name: csf-failover-controller environment: DATABASE_URL: postgres://csf:csfpassword@patroni:5432/csf_core + ETCD_ENDPOINTS: http://etcd:2379 FAILOVER_CONTROLLER_PORT: "8004" SCHEDULER_SERVICE_URL: http://scheduler:8002 VOLUME_MANAGER_URL: http://volume-manager:8003 @@ -329,32 +265,24 @@ services: depends_on: patroni: condition: service_healthy - networks: - - csf-internal restart: unless-stopped sdn-controller: - image: ghcr.io/csfx-cloud/csf-ce-sdn-controller:0.2.2-alpha.47 + image: ghcr.io/csfx-cloud/csf-ce-sdn-controller@${versions.csf.images.sdn-controller.digest} container_name: csf-sdn-controller environment: DATABASE_URL: postgres://csf:csfpassword@patroni:5432/csf_core - ETCD_URL: http://etcd:2379 + ETCD_ENDPOINTS: http://etcd:2379 SDN_CONTROLLER_PORT: "8005" RUST_LOG: info depends_on: patroni: condition: service_healthy - networks: - - csf-internal restart: unless-stopped volumes: etcd_data: patroni_data: - -networks: - csf-internal: - driver: bridge COMPOSE cat > ${composeDir}/patroni-bootstrap.sh <<'BOOTSTRAP' @@ -368,15 +296,27 @@ BOOTSTRAP deps = []; }; - nix = { - settings = { - experimental-features = [ "nix-command" "flakes" ]; - auto-optimise-store = true; - }; - gc = { - automatic = true; - dates = "weekly"; - options = "--delete-older-than 30d"; + systemd.services.csf-control-plane = { + description = "CSF Control Plane (Docker Compose)"; + after = [ "docker.service" "network-online.target" ]; + requires = [ "docker.service" ]; + wants = [ "network-online.target" ]; + wantedBy = [ "multi-user.target" ]; + serviceConfig = { + Type = "oneshot"; + RemainAfterExit = true; + WorkingDirectory = composeDir; + ExecStart = "${pkgs.docker}/bin/docker compose up -d --remove-orphans"; + ExecStop = "${pkgs.docker}/bin/docker compose down"; + TimeoutStartSec = "600"; }; }; + + environment.systemPackages = with pkgs; [ + docker-compose + curl + git + jq + etcd + ]; } From f9e45948215ef877674d32da42330632d80e7202 Mon Sep 17 00:00:00 2001 From: CodeMaster4711 Date: Fri, 10 Apr 2026 16:57:48 +0200 Subject: [PATCH 12/16] chore: push versions.nix updates to develop branch on pre-release --- .github/workflows/prerelease.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml index 1f5dfa5..1a389ec 100644 --- a/.github/workflows/prerelease.yml +++ b/.github/workflows/prerelease.yml @@ -260,6 +260,7 @@ jobs: with: repository: ${{ github.repository_owner }}/CSFX-Infra token: ${{ secrets.INFRA_REPO_TOKEN }} + ref: develop path: infra - uses: actions/download-artifact@v4 @@ -330,7 +331,8 @@ jobs: cd infra git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" + git checkout -B develop git add versions.nix git diff --cached --quiet && echo "no changes" && exit 0 git commit -m "chore: update versions.nix for v${VERSION}" - git push + git push origin develop From 64dfe62b5724c642b6b6d26d30fc6cb2bc6c48dd Mon Sep 17 00:00:00 2001 From: CodeMaster4711 Date: Fri, 10 Apr 2026 17:13:50 +0200 Subject: [PATCH 13/16] feat: replace flake-rev API with version-based update scheduling --- .github/workflows/docker-build.yml | 4 +- .github/workflows/prerelease.yml | 2 + .../api-gateway/src/routes/update.rs | 23 +++-- control-plane/csf-updater/src/etcd.rs | 1 + control-plane/csf-updater/src/main.rs | 1 - control-plane/csf-updater/src/poller.rs | 96 +++++++++++++++---- 6 files changed, 100 insertions(+), 27 deletions(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index de390fb..574fa4e 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -386,7 +386,9 @@ jobs: git add versions.nix git diff --cached --quiet && echo "no changes" && exit 0 git commit -m "chore: update versions.nix for v${VERSION}" - git push + git push origin main + git tag "v${VERSION}" + git push origin "v${VERSION}" summary: name: Summary diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml index 1a389ec..1343eb0 100644 --- a/.github/workflows/prerelease.yml +++ b/.github/workflows/prerelease.yml @@ -336,3 +336,5 @@ jobs: git diff --cached --quiet && echo "no changes" && exit 0 git commit -m "chore: update versions.nix for v${VERSION}" git push origin develop + git tag "v${VERSION}" + git push origin "v${VERSION}" diff --git a/control-plane/api-gateway/src/routes/update.rs b/control-plane/api-gateway/src/routes/update.rs index 8a75570..c51e541 100644 --- a/control-plane/api-gateway/src/routes/update.rs +++ b/control-plane/api-gateway/src/routes/update.rs @@ -6,6 +6,7 @@ use std::env; use crate::auth::rbac::CanManageSystem; use crate::AppState; +const ETCD_DESIRED_VERSION_KEY: &str = "/csf/config/desired_version"; const ETCD_AVAILABLE_FLAKE_REV_KEY: &str = "/csf/config/available_flake_rev"; const ETCD_DESIRED_FLAKE_REV_KEY: &str = "/csf/config/desired_flake_rev"; const ETCD_BUILD_STATUS_KEY: &str = "/csf/config/cp_build_status"; @@ -14,18 +15,19 @@ const ETCD_PAUSED_KEY: &str = "/csf/config/update_paused"; #[derive(Debug, Deserialize)] pub struct UpdateRequest { - pub flake_rev: String, + pub version: String, } #[derive(Debug, Serialize)] pub struct UpdateResponse { pub status: String, - pub flake_rev: String, + pub version: String, } #[derive(Debug, Serialize)] pub struct UpdateStatusResponse { pub current_version: String, + pub desired_version: Option, pub available_flake_rev: Option, pub desired_flake_rev: Option, pub build_status: Option, @@ -57,25 +59,25 @@ async fn trigger_update( State(_state): State, Json(req): Json, ) -> Result, StatusCode> { - if !is_valid_sha(&req.flake_rev) { + if !is_valid_version(&req.version) { return Err(StatusCode::UNPROCESSABLE_ENTITY); } let mut client = etcd_client().await?; client - .put(ETCD_DESIRED_FLAKE_REV_KEY, req.flake_rev.as_bytes(), None) + .put(ETCD_DESIRED_VERSION_KEY, req.version.as_bytes(), None) .await .map_err(|e| { - tracing::error!(error = %e, "failed to write desired flake rev to etcd"); + tracing::error!(error = %e, "failed to write desired version to etcd"); StatusCode::INTERNAL_SERVER_ERROR })?; - tracing::info!(flake_rev = %req.flake_rev, "update requested"); + tracing::info!(version = %req.version, "update requested"); Ok(Json(UpdateResponse { status: "update_scheduled".to_string(), - flake_rev: req.flake_rev, + version: req.version, })) } @@ -85,6 +87,7 @@ async fn update_status( ) -> Result, StatusCode> { let mut client = etcd_client().await?; + let desired_version = etcd_get(&mut client, ETCD_DESIRED_VERSION_KEY).await?; let available_flake_rev = etcd_get(&mut client, ETCD_AVAILABLE_FLAKE_REV_KEY).await?; let desired_flake_rev = etcd_get(&mut client, ETCD_DESIRED_FLAKE_REV_KEY).await?; let build_status = etcd_get(&mut client, ETCD_BUILD_STATUS_KEY).await?; @@ -93,6 +96,7 @@ async fn update_status( Ok(Json(UpdateStatusResponse { current_version: env!("CARGO_PKG_VERSION").to_string(), + desired_version, available_flake_rev, desired_flake_rev, build_status, @@ -114,8 +118,9 @@ async fn etcd_get(client: &mut Client, key: &str) -> Result, Stat .map(|s| s.to_string())) } -fn is_valid_sha(rev: &str) -> bool { - rev.len() == 40 && rev.chars().all(|c| c.is_ascii_hexdigit()) +fn is_valid_version(version: &str) -> bool { + let v = version.trim_start_matches('v'); + !v.is_empty() && v.chars().all(|c| c.is_ascii_alphanumeric() || c == '.' || c == '-') } async fn pause_updates( diff --git a/control-plane/csf-updater/src/etcd.rs b/control-plane/csf-updater/src/etcd.rs index 3ea61c0..8550b25 100644 --- a/control-plane/csf-updater/src/etcd.rs +++ b/control-plane/csf-updater/src/etcd.rs @@ -2,6 +2,7 @@ use anyhow::Result; use crate::config::Config; +pub const DESIRED_VERSION_KEY: &str = "/csf/config/desired_version"; pub const AVAILABLE_FLAKE_REV_KEY: &str = "/csf/config/available_flake_rev"; pub const DESIRED_FLAKE_REV_KEY: &str = "/csf/config/desired_flake_rev"; pub const BUILD_STATUS_KEY: &str = "/csf/config/cp_build_status"; diff --git a/control-plane/csf-updater/src/main.rs b/control-plane/csf-updater/src/main.rs index 4d60a10..1c00315 100644 --- a/control-plane/csf-updater/src/main.rs +++ b/control-plane/csf-updater/src/main.rs @@ -128,7 +128,6 @@ async fn execute_once(cfg: &config::Config, last_applied: &str) -> anyhow::Resul info!(flake_rev = %desired, last_applied = %last_applied, "starting update"); etcd.put(etcd::BUILD_STATUS_KEY, "building").await?; - etcd.delete_prefix(etcd::NODE_HEARTBEAT_PREFIX).await?; let (_cancel_tx, cancel_rx) = watch::channel(false); diff --git a/control-plane/csf-updater/src/poller.rs b/control-plane/csf-updater/src/poller.rs index 68c80fd..b02cff9 100644 --- a/control-plane/csf-updater/src/poller.rs +++ b/control-plane/csf-updater/src/poller.rs @@ -1,4 +1,4 @@ -use anyhow::Result; +use anyhow::{bail, Result}; use reqwest::header::{ETAG, IF_NONE_MATCH}; use serde::Deserialize; use tracing::info; @@ -6,18 +6,60 @@ use tracing::info; use crate::config::Config; use crate::etcd; +#[derive(Debug, Deserialize)] +struct GitHubTag { + object: GitHubObject, +} + +#[derive(Debug, Deserialize)] +struct GitHubObject { + sha: String, + #[serde(rename = "type")] + kind: String, +} + #[derive(Debug, Deserialize)] struct GitHubCommit { sha: String, } -pub async fn poll_and_update(cfg: &Config, etcd: &mut etcd::Client, last_etag: &mut Option) -> Result> { +pub async fn poll_and_update( + cfg: &Config, + etcd: &mut etcd::Client, + last_etag: &mut Option, +) -> Result> { + let desired_version = match etcd.get(etcd::DESIRED_VERSION_KEY).await? { + Some(v) if !v.is_empty() => v, + _ => return Ok(None), + }; + + let sha = resolve_version_to_sha(cfg, &desired_version, last_etag).await?; + + let current = etcd.get(etcd::AVAILABLE_FLAKE_REV_KEY).await?; + if current.as_deref() == Some(sha.as_str()) { + return Ok(None); + } + + etcd.put(etcd::AVAILABLE_FLAKE_REV_KEY, &sha).await?; + etcd.put(etcd::DESIRED_FLAKE_REV_KEY, &sha).await?; + info!(version = %desired_version, sha = %sha, "resolved version to flake rev"); + + Ok(Some(sha)) +} + +async fn resolve_version_to_sha( + cfg: &Config, + version: &str, + last_etag: &mut Option, +) -> Result { + let tag = format!("v{}", version.trim_start_matches('v')); let url = format!( - "https://api.github.com/repos/{}/commits/{}", - cfg.infra_repo_github, cfg.infra_repo_branch + "https://api.github.com/repos/{}/git/ref/tags/{}", + cfg.infra_repo_github, tag ); - let mut req = reqwest::Client::new() + let client = reqwest::Client::new(); + let mut req = client .get(&url) .header("User-Agent", "csf-updater") .header("Accept", "application/vnd.github.v3+json"); @@ -29,27 +71,49 @@ pub async fn poll_and_update(cfg: &Config, etcd: &mut etcd::Client, last_etag: & let resp = req.send().await?; if resp.status() == reqwest::StatusCode::NOT_MODIFIED { - return Ok(None); + bail!("tag not modified, no new sha available"); } if !resp.status().is_success() { - anyhow::bail!("GitHub API returned {}", resp.status()); + bail!( + "GitHub API returned {} for tag {}", + resp.status(), + tag + ); } if let Some(etag) = resp.headers().get(ETAG) { *last_etag = Some(etag.to_str()?.to_string()); } - let commit: GitHubCommit = resp.json().await?; - let sha = commit.sha; + let tag_ref: GitHubTag = resp.json().await?; - let current = etcd.get(etcd::AVAILABLE_FLAKE_REV_KEY).await?; - if current.as_deref() == Some(&sha) { - return Ok(None); - } + let sha = if tag_ref.object.kind == "tag" { + dereference_tag(cfg, &tag_ref.object.sha).await? + } else { + tag_ref.object.sha + }; - etcd.put(etcd::AVAILABLE_FLAKE_REV_KEY, &sha).await?; - info!(sha = %sha, "new flake rev available"); + Ok(sha) +} - Ok(Some(sha)) +async fn dereference_tag(cfg: &Config, tag_sha: &str) -> Result { + let url = format!( + "https://api.github.com/repos/{}/git/tags/{}", + cfg.infra_repo_github, tag_sha + ); + + let resp = reqwest::Client::new() + .get(&url) + .header("User-Agent", "csf-updater") + .header("Accept", "application/vnd.github.v3+json") + .send() + .await?; + + if !resp.status().is_success() { + bail!("GitHub API returned {} when dereferencing tag", resp.status()); + } + + let tag: GitHubTag = resp.json().await?; + Ok(tag.object.sha) } From 6888431b8a52a4827bc9493f7897534eb6b201f1 Mon Sep 17 00:00:00 2001 From: CodeMaster4711 Date: Sat, 11 Apr 2026 20:24:08 +0200 Subject: [PATCH 14/16] feat: renam csf to csfx --- .env.example | 14 +- .github/workflows/README.md | 106 ---- .github/workflows/docker-build.yml | 64 +-- .github/workflows/prerelease.yml | 42 +- .releaserc.json | 4 +- CHANGELOG.md | 462 +++++++++--------- Cargo.lock | 6 +- Cargo.toml | 6 +- README.md | 20 +- agent/Cargo.toml | 4 +- agent/build.rs | 4 +- agent/src/config.rs | 6 +- agent/src/docker.rs | 6 +- agent/src/main.rs | 12 +- agent/src/pki.rs | 8 +- agent/src/rbd.rs | 2 +- agent/src/update_watch.rs | 4 +- ...le.csf-updater => Dockerfile.csfx-updater} | 14 +- control-plane/Dockerfile.prod.shared | 26 +- control-plane/api-gateway/build.rs | 4 +- control-plane/api-gateway/src/auth_service.rs | 4 +- control-plane/api-gateway/src/main.rs | 4 +- control-plane/api-gateway/src/metrics.rs | 8 +- .../api-gateway/src/routes/system.rs | 2 +- .../api-gateway/src/routes/update.rs | 12 +- control-plane/api-gateway/src/self_monitor.rs | 2 +- control-plane/csf-updater/build.rs | 6 - .../{csf-migrate => csfx-migrate}/Cargo.toml | 4 +- .../{csf-migrate => csfx-migrate}/src/main.rs | 2 +- .../{csf-updater => csfx-updater}/Cargo.toml | 4 +- control-plane/csfx-updater/build.rs | 6 + .../src/config.rs | 2 +- .../{csf-updater => csfx-updater}/src/etcd.rs | 14 +- .../src/git_mirror.rs | 0 .../{csf-updater => csfx-updater}/src/main.rs | 2 +- .../src/nix_build.rs | 0 .../src/poller.rs | 4 +- .../src/updater.rs | 0 control-plane/failover-controller/src/main.rs | 7 +- .../failover-controller/src/metrics.rs | 8 +- control-plane/registry/README.md | 8 +- control-plane/registry/docker-compose.dev.yml | 6 +- control-plane/registry/src/handlers/agent.rs | 4 +- control-plane/registry/src/main.rs | 16 +- control-plane/registry/src/metrics.rs | 8 +- .../registry/src/services/api_keys.rs | 2 +- .../registry/src/services/bootstrap_tokens.rs | 2 +- control-plane/registry/src/services/pki.rs | 41 +- control-plane/registry/test-registry.sh | 6 +- control-plane/scheduler/src/main.rs | 16 +- control-plane/scheduler/src/metrics.rs | 8 +- control-plane/scheduler/src/services/etcd.rs | 4 +- control-plane/sdn-controller/src/main.rs | 5 +- control-plane/sdn-controller/src/metrics.rs | 8 +- .../sdn-controller/src/services/ipam.rs | 6 +- control-plane/shared/Cargo.toml | 2 +- control-plane/volume-manager/cleanup-etcd.sh | 10 +- .../volume-manager/docker-compose.dev.yml | 76 +-- .../volume-manager/src/ceph/core/config.rs | 4 +- .../volume-manager/src/ceph/ops/init.rs | 6 +- .../volume-manager/src/db/volumes.rs | 2 +- .../volume-manager/src/etcd/core/config.rs | 4 +- control-plane/volume-manager/src/main.rs | 16 +- control-plane/volume-manager/src/metrics.rs | 8 +- .../volume-manager/test-hybrid-system.sh | 30 +- .../volume-manager/test-patroni-ha.sh | 10 +- ...f-updater.service => csfx-updater.service} | 12 +- .../{csf-updater.sh => csfx-updater.sh} | 20 +- deployments/systemd/install.sh | 42 +- docker-compose.dev.yml | 44 +- docker-compose.failover-controller.yml | 40 +- docker-compose.prod.yml | 72 +-- docker-compose.registry.yml | 38 +- docker-compose.scheduler.yml | 28 +- docker-compose.sdn-controller.yml | 44 +- docker-compose.volume-manager.yml | 28 +- docs/UPDATER_PLAN.md | 118 ++--- .../components/auth/login-form-client.svelte | 4 +- .../components/auth/otp-form-client.svelte | 4 +- .../lib/components/navbar/app-sidebar.svelte | 2 +- .../src/lib/components/navbar/nav-user.svelte | 4 +- .../components/navbar/team-switcher.svelte | 4 +- .../components/settings/UpdateSettings.svelte | 4 +- frontend/src/routes/local-system/+page.svelte | 4 +- .../src/routes/physical-servers/+page.svelte | 2 +- .../routes/physical-servers/[id]/+page.svelte | 2 +- nixos-node/DEPLOYMENT.md | 30 +- nixos-node/README.md | 2 +- nixos-node/deploy.sh | 57 +-- nixos-node/flake.nix | 20 +- nixos-node/modules/iso-configuration.nix | 38 +- nixos-node/modules/server-configuration.nix | 119 ++--- package.json | 4 +- scripts/build-local.sh | 10 +- 94 files changed, 947 insertions(+), 1062 deletions(-) delete mode 100644 .github/workflows/README.md rename control-plane/{Dockerfile.csf-updater => Dockerfile.csfx-updater} (80%) delete mode 100644 control-plane/csf-updater/build.rs rename control-plane/{csf-migrate => csfx-migrate}/Cargo.toml (90%) rename control-plane/{csf-migrate => csfx-migrate}/src/main.rs (92%) rename control-plane/{csf-updater => csfx-updater}/Cargo.toml (92%) create mode 100644 control-plane/csfx-updater/build.rs rename control-plane/{csf-updater => csfx-updater}/src/config.rs (93%) rename control-plane/{csf-updater => csfx-updater}/src/etcd.rs (69%) rename control-plane/{csf-updater => csfx-updater}/src/git_mirror.rs (100%) rename control-plane/{csf-updater => csfx-updater}/src/main.rs (99%) rename control-plane/{csf-updater => csfx-updater}/src/nix_build.rs (100%) rename control-plane/{csf-updater => csfx-updater}/src/poller.rs (96%) rename control-plane/{csf-updater => csfx-updater}/src/updater.rs (100%) rename deployments/systemd/{csf-updater.service => csfx-updater.service} (62%) rename deployments/systemd/{csf-updater.sh => csfx-updater.sh} (88%) diff --git a/.env.example b/.env.example index 3d1caef..510f500 100644 --- a/.env.example +++ b/.env.example @@ -1,13 +1,13 @@ -# CSF-Core Environment Configuration +# CSFX-Core Environment Configuration # Copy to .env and fill in values before running # JWT secret — generate with: openssl rand -hex 64 JWT_SECRET= # PostgreSQL -POSTGRES_USER=csf_user -POSTGRES_PASSWORD=csf-test-password -POSTGRES_DB=csf_core +POSTGRES_USER=csfx_user +POSTGRES_PASSWORD=csfx-test-password +POSTGRES_DB=csfx_core # Rust log level (trace, debug, info, warn, error) RUST_LOG=info @@ -15,12 +15,12 @@ RUST_LOG=info # Docker image registry (GHCR org name, lowercase) GHCR_ORG=local # Image version tag — use "dev" for local builds, semver for prod (e.g. 0.2.2) -CSF_VERSION=dev +CSFX_VERSION=dev # etcd auth — generate with: openssl rand -hex 32 ETCD_ROOT_PASSWORD= -ETCD_CSF_PASSWORD= +ETCD_CSFX_PASSWORD= -# GHCR read token for image digest verification (csf-updater) +# GHCR read token for image digest verification (csfx-updater) # generate at: https://github.com/settings/tokens — scope: read:packages GHCR_TOKEN= diff --git a/.github/workflows/README.md b/.github/workflows/README.md deleted file mode 100644 index 525978e..0000000 --- a/.github/workflows/README.md +++ /dev/null @@ -1,106 +0,0 @@ -# GitHub Actions Workflows - -## Übersicht - -### Release & Deployment Workflows - -#### `main-release.yml` (Haupt-Release-Pipeline) - -Läuft automatisch bei jedem Push auf `main`: - -1. **Semantic Release** - Erstellt neue Releases basierend auf Conventional Commits -2. **Docker Build Backend** - Baut und pusht Backend-Image nach ghcr.io -3. **Docker Build Frontend** - Baut und pusht Frontend-Image nach ghcr.io -4. **Summary** - Zeigt Übersicht aller Artefakte - -**Outputs:** - -- GitHub Release mit Binaries -- Docker Images: `ghcr.io/cs-foundry/csf-core-backend:latest` & `:version` -- Docker Images: `ghcr.io/cs-foundry/csf-core-frontend:latest` & `:version` - -#### `release.yml` (Wiederverwendbarer Release-Workflow) - -Wird von `main-release.yml` aufgerufen: - -- Führt Semantic Release aus -- Baut Backend-Binaries (Linux/macOS) -- Baut Frontend-Package -- Lädt alle Artefakte zum Release hoch - -#### `docker-build-manual.yml` (Manuelles Docker-Build) - -Manueller Workflow für Docker-Builds: - -- Auswahl: Backend, Frontend oder beides -- Eigene Versionsnummer angeben -- Erstellt Tags: `` und `manual-latest` - -### Weitere Workflows - -#### `beta-release.yml` - -Release-Pipeline für Beta-Versionen auf dem `beta` Branch - -#### `docker-build-push.yml` - -Legacy-Workflow für das vereinigte Backend+Frontend Image - -#### `build-artifacts.yml` - -Standalone-Workflow für Binary-Builds - -#### `lint.yml` - -Code-Quality-Checks (Rust, TypeScript, etc.) - -## Verwendung - -### Automatischer Release (main) - -```bash -git commit -m "feat: neue Feature" -git push origin main -# → Automatischer Release + Docker Images -``` - -### Manueller Docker-Build - -1. GitHub Actions → **Manual Docker Build** -2. **Run workflow** klicken -3. Version eingeben (z.B. `1.2.3`) -4. Target auswählen (backend/frontend/both) -5. **Run workflow** ausführen - -## Image-URLs - -Nach erfolgreichem Build sind die Images verfügbar unter: - -```bash -# Backend -ghcr.io/cs-foundry/csf-core-backend:latest -ghcr.io/cs-foundry/csf-core-backend: - -# Frontend -ghcr.io/cs-foundry/csf-core-frontend:latest -ghcr.io/cs-foundry/csf-core-frontend: -``` - -## Permissions - -Die Workflows benötigen folgende Permissions: - -- `contents: write` - Für Releases -- `packages: write` - Für Docker Registry -- `issues: write` - Für Issue-Updates -- `pull-requests: write` - Für PR-Updates - -## Secrets - -Keine zusätzlichen Secrets erforderlich - verwendet `GITHUB_TOKEN` automatisch. - -## Weitere Dokumentation - -- [Docker Registry Integration](../docs/deployment/DOCKER_REGISTRY.md) -- [NixOS Deployment](../docs/deployment/DEPLOYMENT.md) -- [Installation Guide](../docs/deployment/INSTALLATION.md) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 574fa4e..b0dc064 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -92,7 +92,7 @@ jobs: id: image run: | ORG=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') - echo "name=ghcr.io/${ORG}/csf-ce-${{ matrix.service }}" >> $GITHUB_OUTPUT + echo "name=ghcr.io/${ORG}/csfx-ce-${{ matrix.service }}" >> $GITHUB_OUTPUT - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -113,7 +113,7 @@ jobs: build-args: | SERVICE_BIN=${{ matrix.service }} BUILD_JOBS=2 - CSF_BUILD_VERSION=${{ needs.prepare.outputs.version }} + CSFX_BUILD_VERSION=${{ needs.prepare.outputs.version }} push: true outputs: type=registry,name=${{ steps.image.outputs.name }},push-by-digest=true platforms: ${{ matrix.platform }} @@ -141,8 +141,8 @@ jobs: fail-fast: false matrix: binary: - - csf-updater - - csf-agent + - csfx-updater + - csfx-agent arch: - amd64 - arm64 @@ -183,7 +183,7 @@ jobs: env: CARGO_TARGET_X86_64_UNKNOWN_LINUX_MUSL_LINKER: musl-gcc CARGO_TARGET_AARCH64_UNKNOWN_LINUX_MUSL_LINKER: aarch64-linux-gnu-gcc - CSF_BUILD_VERSION: ${{ needs.prepare.outputs.version }} + CSFX_BUILD_VERSION: ${{ needs.prepare.outputs.version }} run: | cargo build --release --bin ${{ matrix.binary }} --target ${{ matrix.target }} cp target/${{ matrix.target }}/release/${{ matrix.binary }} ${{ matrix.binary }}-${{ matrix.arch }} @@ -216,7 +216,7 @@ jobs: id: image run: | ORG=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') - echo "name=ghcr.io/${ORG}/csf-ce-${{ matrix.service }}" >> $GITHUB_OUTPUT + echo "name=ghcr.io/${ORG}/csfx-ce-${{ matrix.service }}" >> $GITHUB_OUTPUT - uses: actions/download-artifact@v4 with: @@ -262,7 +262,7 @@ jobs: - uses: actions/download-artifact@v4 with: - pattern: csf-* + pattern: csfx-* merge-multiple: true - name: Create or update release @@ -271,28 +271,28 @@ jobs: TAG="v${VERSION}" if gh release view "${TAG}" &>/dev/null; then gh release upload "${TAG}" \ - csf-updater-amd64 \ - csf-updater-amd64.sha256 \ - csf-updater-arm64 \ - csf-updater-arm64.sha256 \ - csf-agent-amd64 \ - csf-agent-amd64.sha256 \ - csf-agent-arm64 \ - csf-agent-arm64.sha256 \ + csfx-updater-amd64 \ + csfx-updater-amd64.sha256 \ + csfx-updater-arm64 \ + csfx-updater-arm64.sha256 \ + csfx-agent-amd64 \ + csfx-agent-amd64.sha256 \ + csfx-agent-arm64 \ + csfx-agent-arm64.sha256 \ --clobber else gh release create "${TAG}" \ --title "v${VERSION}" \ --prerelease \ --notes "Alpha build ${VERSION}" \ - csf-updater-amd64 \ - csf-updater-amd64.sha256 \ - csf-updater-arm64 \ - csf-updater-arm64.sha256 \ - csf-agent-amd64 \ - csf-agent-amd64.sha256 \ - csf-agent-arm64 \ - csf-agent-arm64.sha256 + csfx-updater-amd64 \ + csfx-updater-amd64.sha256 \ + csfx-updater-arm64 \ + csfx-updater-arm64.sha256 \ + csfx-agent-amd64 \ + csfx-agent-amd64.sha256 \ + csfx-agent-arm64 \ + csfx-agent-arm64.sha256 fi env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -317,7 +317,7 @@ jobs: - uses: actions/download-artifact@v4 with: - pattern: csf-agent-* + pattern: csfx-agent-* path: /tmp/binaries merge-multiple: true @@ -340,7 +340,7 @@ jobs: get_manifest_digest() { local svc=$1 - local image="ghcr.io/${ORG}/csf-ce-${svc}:${VERSION}" + local image="ghcr.io/${ORG}/csfx-ce-${svc}:${VERSION}" docker buildx imagetools inspect "${image}" \ --format '{{json .Manifest}}' | jq -r '.digest' } @@ -353,7 +353,7 @@ jobs: cat > infra/versions.nix <> $GITHUB_STEP_SUMMARY echo "|---------|-------|" >> $GITHUB_STEP_SUMMARY for svc in api-gateway registry scheduler volume-manager failover-controller sdn-controller; do - echo "| ${svc} | \`ghcr.io/${ORG}/csf-ce-${svc}:${VERSION}\` |" >> $GITHUB_STEP_SUMMARY + echo "| ${svc} | \`ghcr.io/${ORG}/csfx-ce-${svc}:${VERSION}\` |" >> $GITHUB_STEP_SUMMARY done echo "" >> $GITHUB_STEP_SUMMARY echo "### Binaries" >> $GITHUB_STEP_SUMMARY echo "| Binary | Arch | Artifact |" >> $GITHUB_STEP_SUMMARY echo "|--------|------|----------|" >> $GITHUB_STEP_SUMMARY - for bin in csf-updater csf-agent; do + for bin in csfx-updater csfx-agent; do echo "| ${bin} | amd64 | \`${bin}-amd64\` |" >> $GITHUB_STEP_SUMMARY echo "| ${bin} | arm64 | \`${bin}-arm64\` |" >> $GITHUB_STEP_SUMMARY done diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml index 1343eb0..f3df0ab 100644 --- a/.github/workflows/prerelease.yml +++ b/.github/workflows/prerelease.yml @@ -74,7 +74,7 @@ jobs: id: image run: | ORG=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') - echo "name=ghcr.io/${ORG}/csf-ce-${{ matrix.service }}" >> $GITHUB_OUTPUT + echo "name=ghcr.io/${ORG}/csfx-ce-${{ matrix.service }}" >> $GITHUB_OUTPUT - uses: docker/setup-buildx-action@v3 @@ -130,7 +130,7 @@ jobs: id: image run: | ORG=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]') - echo "name=ghcr.io/${ORG}/csf-ce-${{ matrix.service }}" >> $GITHUB_OUTPUT + echo "name=ghcr.io/${ORG}/csfx-ce-${{ matrix.service }}" >> $GITHUB_OUTPUT - uses: actions/download-artifact@v4 with: @@ -165,8 +165,8 @@ jobs: fail-fast: false matrix: binary: - - csf-updater - - csf-agent + - csfx-updater + - csfx-agent arch: - amd64 - arm64 @@ -207,7 +207,7 @@ jobs: env: CARGO_TARGET_X86_64_UNKNOWN_LINUX_MUSL_LINKER: musl-gcc CARGO_TARGET_AARCH64_UNKNOWN_LINUX_MUSL_LINKER: aarch64-linux-gnu-gcc - CSF_BUILD_VERSION: ${{ needs.version.outputs.version }} + CSFX_BUILD_VERSION: ${{ needs.version.outputs.version }} run: | cargo build --release --bin ${{ matrix.binary }} --target ${{ matrix.target }} cp target/${{ matrix.target }}/release/${{ matrix.binary }} ${{ matrix.binary }}-${{ matrix.arch }} @@ -230,7 +230,7 @@ jobs: - uses: actions/download-artifact@v4 with: - pattern: csf-* + pattern: csfx-* merge-multiple: true - name: Create pre-release @@ -240,14 +240,14 @@ jobs: --title "v${VERSION}" \ --prerelease \ --generate-notes \ - csf-updater-amd64 \ - csf-updater-amd64.sha256 \ - csf-updater-arm64 \ - csf-updater-arm64.sha256 \ - csf-agent-amd64 \ - csf-agent-amd64.sha256 \ - csf-agent-arm64 \ - csf-agent-arm64.sha256 + csfx-updater-amd64 \ + csfx-updater-amd64.sha256 \ + csfx-updater-arm64 \ + csfx-updater-arm64.sha256 \ + csfx-agent-amd64 \ + csfx-agent-amd64.sha256 \ + csfx-agent-arm64 \ + csfx-agent-arm64.sha256 env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -265,7 +265,7 @@ jobs: - uses: actions/download-artifact@v4 with: - pattern: csf-agent-* + pattern: csfx-agent-* path: /tmp/binaries merge-multiple: true @@ -288,7 +288,7 @@ jobs: get_manifest_digest() { local svc=$1 - local image="ghcr.io/${ORG}/csf-ce-${svc}:${VERSION}" + local image="ghcr.io/${ORG}/csfx-ce-${svc}:${VERSION}" docker buildx imagetools inspect "${image}" \ --format '{{json .Manifest}}' | jq -r '.digest' } @@ -301,7 +301,7 @@ jobs: cat > infra/versions.nix <>>>>>> origin/main -## [0.2.2](https://github.com/CS-Foundry/CSF-Core/compare/v0.2.1...v0.2.2) (2026-01-05) +## [0.2.2](https://github.com/CS-Foundry/CSFX-Core/compare/v0.2.1...v0.2.2) (2026-01-05) ### Bug Fixes -* frontend build error ([afec643](https://github.com/CS-Foundry/CSF-Core/commit/afec64354d33c9e70cf32cee2483a03250c1b108)) +* frontend build error ([afec643](https://github.com/CS-Foundry/CSFX-Core/commit/afec64354d33c9e70cf32cee2483a03250c1b108)) -## [0.2.1](https://github.com/CS-Foundry/CSF-Core/compare/v0.2.0...v0.2.1) (2026-01-05) +## [0.2.1](https://github.com/CS-Foundry/CSFX-Core/compare/v0.2.0...v0.2.1) (2026-01-05) ### Bug Fixes -* semantic release commit befor build ([9927644](https://github.com/CS-Foundry/CSF-Core/commit/99276446079e169853a7b2b7848a369b45d0f930)) +* semantic release commit befor build ([9927644](https://github.com/CS-Foundry/CSFX-Core/commit/99276446079e169853a7b2b7848a369b45d0f930)) -# [0.2.0](https://github.com/CS-Foundry/CSF-Core/compare/v0.1.3...v0.2.0) (2026-01-05) +# [0.2.0](https://github.com/CS-Foundry/CSFX-Core/compare/v0.1.3...v0.2.0) (2026-01-05) ### Features -* new beta branch features ([b88b509](https://github.com/CS-Foundry/CSF-Core/commit/b88b509342da00aeea618ece55bc6d911ac543e5)) +* new beta branch features ([b88b509](https://github.com/CS-Foundry/CSFX-Core/commit/b88b509342da00aeea618ece55bc6d911ac543e5)) -## [0.1.3](https://github.com/CS-Foundry/CSF-Core/compare/v0.1.2...v0.1.3) (2026-01-04) +## [0.1.3](https://github.com/CS-Foundry/CSFX-Core/compare/v0.1.2...v0.1.3) (2026-01-04) ### Bug Fixes -* semantiv release versioning ([4b4ce16](https://github.com/CS-Foundry/CSF-Core/commit/4b4ce161a29b96531248f11b228a71d2cce0b950)) +* semantiv release versioning ([4b4ce16](https://github.com/CS-Foundry/CSFX-Core/commit/4b4ce161a29b96531248f11b228a71d2cce0b950)) -## [0.1.2](https://github.com/CS-Foundry/CSF-Core/compare/v0.1.1...v0.1.2) (2026-01-04) +## [0.1.2](https://github.com/CS-Foundry/CSFX-Core/compare/v0.1.1...v0.1.2) (2026-01-04) ### Bug Fixes -* version ([3d63017](https://github.com/CS-Foundry/CSF-Core/commit/3d63017237d93288ba1645d9eb6b6f0f318c2ec3)) -* version ([23573b8](https://github.com/CS-Foundry/CSF-Core/commit/23573b862761811ef1b8234477ccb63307687750)) +* version ([3d63017](https://github.com/CS-Foundry/CSFX-Core/commit/3d63017237d93288ba1645d9eb6b6f0f318c2ec3)) +* version ([23573b8](https://github.com/CS-Foundry/CSFX-Core/commit/23573b862761811ef1b8234477ccb63307687750)) -## [0.1.1](https://github.com/CS-Foundry/CSF-Core/compare/v0.1.0...v0.1.1) (2026-01-04) +## [0.1.1](https://github.com/CS-Foundry/CSFX-Core/compare/v0.1.0...v0.1.1) (2026-01-04) ### Bug Fixes -* updater pull ([3ef7e36](https://github.com/CS-Foundry/CSF-Core/commit/3ef7e36cee7a2aeac7d6b6aa11107ccc712c12b5)) +* updater pull ([3ef7e36](https://github.com/CS-Foundry/CSFX-Core/commit/3ef7e36cee7a2aeac7d6b6aa11107ccc712c12b5)) -# [0.1.0](https://github.com/CS-Foundry/CSF-Core/compare/v0.0.8...v0.1.0) (2026-01-04) +# [0.1.0](https://github.com/CS-Foundry/CSFX-Core/compare/v0.0.8...v0.1.0) (2026-01-04) ### Features -* updater for programm ([7b064b8](https://github.com/CS-Foundry/CSF-Core/commit/7b064b8255b34cde174a591e93c7c67604997f2c)) +* updater for programm ([7b064b8](https://github.com/CS-Foundry/CSFX-Core/commit/7b064b8255b34cde174a591e93c7c67604997f2c)) -## [0.0.8](https://github.com/CS-Foundry/CSF-Core/compare/v0.0.7...v0.0.8) (2026-01-04) +## [0.0.8](https://github.com/CS-Foundry/CSFX-Core/compare/v0.0.7...v0.0.8) (2026-01-04) ### Bug Fixes -* docker warn on linux kernel ([1de9a08](https://github.com/CS-Foundry/CSF-Core/commit/1de9a084cbbe5cec93fc2205415c3f1f5ab5b597)) +* docker warn on linux kernel ([1de9a08](https://github.com/CS-Foundry/CSFX-Core/commit/1de9a084cbbe5cec93fc2205415c3f1f5ab5b597)) diff --git a/Cargo.lock b/Cargo.lock index ccc6d17..ed8cee1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1098,7 +1098,7 @@ dependencies = [ ] [[package]] -name = "csf-agent" +name = "csfx-agent" version = "0.2.2" dependencies = [ "anyhow", @@ -1117,7 +1117,7 @@ dependencies = [ ] [[package]] -name = "csf-migrate" +name = "csfx-migrate" version = "0.2.2" dependencies = [ "dotenvy", @@ -1130,7 +1130,7 @@ dependencies = [ ] [[package]] -name = "csf-updater" +name = "csfx-updater" version = "0.2.2" dependencies = [ "anyhow", diff --git a/Cargo.toml b/Cargo.toml index 9bfe508..df041e4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,8 +3,8 @@ resolver = "2" members = [ "agent", "control-plane/api-gateway", - "control-plane/csf-migrate", - "control-plane/csf-updater", + "control-plane/csfx-migrate", + "control-plane/csfx-updater", "control-plane/scheduler", "control-plane/failover-controller", "control-plane/sdn-controller", @@ -20,7 +20,7 @@ version = "0.2.2" edition = "2021" authors = ["CS-Foundry"] license = "SEE LICENSE IN LICENSE" -repository = "https://github.com/CSFX-cloud/CSF-Core" +repository = "https://github.com/CSFX-cloud/CSFX-Core" [workspace.dependencies] # Async runtime diff --git a/README.md b/README.md index cdee1b9..a2653b4 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,14 @@
-# CSF-Core +# CSFX-Core ### Unified Infrastructure Management Platform -[![Pipeline Status](https://img.shields.io/github/actions/workflow/status/CS-Foundry/CSF-Core/main-release.yml?branch=main&label=Release-Pipeline&style=for-the-badge&logo=github)](https://github.com/CS-Foundry/CSF-Core/actions/runs/20679215646) -[![Lint Status](https://img.shields.io/github/actions/workflow/status/CS-Foundry/CSF-Core/lint.yml?branch=main&label=Lint%20%26%20Format&style=for-the-badge&logo=github&color=blueviolet)](https://github.com/CS-Foundry/CSF-Core/actions/runs/20679215645) -[![Version](https://img.shields.io/github/v/release/CS-Foundry/CSF-Core?style=for-the-badge&color=blue)](https://github.com/CS-Foundry/CSF-Core/releases) -[![Downloads](https://img.shields.io/github/downloads/CS-Foundry/CSF-Core/total?style=for-the-badge&color=success)](https://github.com/CS-Foundry/CSF-Core/releases) -[![License](https://img.shields.io/badge/License-CSF--Internal-purple?style=for-the-badge)](LICENSE) +[![Pipeline Status](https://img.shields.io/github/actions/workflow/status/CS-Foundry/CSFX-Core/main-release.yml?branch=main&label=Release-Pipeline&style=for-the-badge&logo=github)](https://github.com/CS-Foundry/CSFX-Core/actions/runs/20679215646) +[![Lint Status](https://img.shields.io/github/actions/workflow/status/CS-Foundry/CSFX-Core/lint.yml?branch=main&label=Lint%20%26%20Format&style=for-the-badge&logo=github&color=blueviolet)](https://github.com/CS-Foundry/CSFX-Core/actions/runs/20679215645) +[![Version](https://img.shields.io/github/v/release/CS-Foundry/CSFX-Core?style=for-the-badge&color=blue)](https://github.com/CS-Foundry/CSFX-Core/releases) +[![Downloads](https://img.shields.io/github/downloads/CS-Foundry/CSFX-Core/total?style=for-the-badge&color=success)](https://github.com/CS-Foundry/CSFX-Core/releases) +[![License](https://img.shields.io/badge/License-CSFX--Internal-purple?style=for-the-badge)](LICENSE)

High-Performance Backend & Frontend in a single systemd service.
@@ -23,7 +23,7 @@ ## ⚡ About the Project -**CSF-Core** revolutionizes infrastructure management through a **Unified Architecture** approach. Instead of manually orchestrating complex microservices, CSF-Core delivers a monolithic yet modular binary that serves both the API backend and the frontend. +**CSFX-Core** revolutionizes infrastructure management through a **Unified Architecture** approach. Instead of manually orchestrating complex microservices, CSFX-Core delivers a monolithic yet modular binary that serves both the API backend and the frontend. ### Key Features @@ -36,12 +36,12 @@ ## 🚀 Quick Start -Install CSF-Core on your Linux system in under 30 seconds using our one-line installer. +Install CSFX-Core on your Linux system in under 30 seconds using our one-line installer. ### Installation ```bash -curl -fsSL [https://raw.githubusercontent.com/CS-Foundry/CSF-Core/main/scripts/install.sh](https://raw.githubusercontent.com/CS-Foundry/CSF-Core/main/scripts/install.sh) | sudo bash +curl -fsSL [https://raw.githubusercontent.com/CS-Foundry/CSFX-Core/main/scripts/install.sh](https://raw.githubusercontent.com/CS-Foundry/CSFX-Core/main/scripts/install.sh) | sudo bash ``` ### Technology Stack @@ -63,7 +63,7 @@ You can find our complete documentation in the [`docs/`](https://www.google.com/ We actively support our users with integration and troubleshooting. -- **🐛 Bug Reports:** Please use [GitHub Issues](https://github.com/CS-Foundry/CSF-Core/issues) to report bugs. +- **🐛 Bug Reports:** Please use [GitHub Issues](https://github.com/CS-Foundry/CSFX-Core/issues) to report bugs. - **📖 Documentation:** Check the [`docs/`](https://www.google.com/search?q=./docs/) folder for detailed instructions. - **🔧 Debugging:** For connectivity issues, refer to the [Connection Debugging Guide](https://www.google.com/search?q=./docs/troubleshooting/DEBUG_CONNECTION.md). diff --git a/agent/Cargo.toml b/agent/Cargo.toml index 35f26fe..4deecb6 100644 --- a/agent/Cargo.toml +++ b/agent/Cargo.toml @@ -1,11 +1,11 @@ [package] -name = "csf-agent" +name = "csfx-agent" version.workspace = true edition.workspace = true license.workspace = true [[bin]] -name = "csf-agent" +name = "csfx-agent" path = "src/main.rs" [dependencies] diff --git a/agent/build.rs b/agent/build.rs index 8b7e815..65082fb 100644 --- a/agent/build.rs +++ b/agent/build.rs @@ -1,6 +1,6 @@ fn main() { - if let Ok(v) = std::env::var("CSF_BUILD_VERSION") { + if let Ok(v) = std::env::var("CSFX_BUILD_VERSION") { println!("cargo:rustc-env=CARGO_PKG_VERSION={}", v); } - println!("cargo:rerun-if-env-changed=CSF_BUILD_VERSION"); + println!("cargo:rerun-if-env-changed=CSFX_BUILD_VERSION"); } diff --git a/agent/src/config.rs b/agent/src/config.rs index a1e7e86..3a99fd8 100644 --- a/agent/src/config.rs +++ b/agent/src/config.rs @@ -3,9 +3,9 @@ use serde::{Deserialize, Serialize}; use std::path::Path; use uuid::Uuid; -const STATE_DIR: &str = "/var/lib/csf-daemon"; -const CREDENTIALS_FILE: &str = "/var/lib/csf-daemon/credentials"; -const CONFIG_FILE: &str = "/var/lib/csf-daemon/config.json"; +const STATE_DIR: &str = "/var/lib/csfx-daemon"; +const CREDENTIALS_FILE: &str = "/var/lib/csfx-daemon/credentials"; +const CONFIG_FILE: &str = "/var/lib/csfx-daemon/config.json"; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct DaemonConfig { diff --git a/agent/src/docker.rs b/agent/src/docker.rs index d8af7da..97e9977 100644 --- a/agent/src/docker.rs +++ b/agent/src/docker.rs @@ -70,7 +70,7 @@ impl DockerManager { } pub async fn start_container(&self, spec: &WorkloadSpec) -> Result { - let container_name = format!("csf-{}", spec.workload_id); + let container_name = format!("csfx-{}", spec.workload_id); let env: Option> = spec.env_vars.as_ref().map(|vars| { vars.iter() @@ -99,8 +99,8 @@ impl DockerManager { }, host_config: Some(host_config), labels: Some(HashMap::from([ - ("csf.workload_id".to_string(), spec.workload_id.clone()), - ("csf.managed".to_string(), "true".to_string()), + ("csfx.workload_id".to_string(), spec.workload_id.clone()), + ("csfx.managed".to_string(), "true".to_string()), ])), ..Default::default() }; diff --git a/agent/src/main.rs b/agent/src/main.rs index 595a39e..66ecc86 100644 --- a/agent/src/main.rs +++ b/agent/src/main.rs @@ -23,12 +23,12 @@ async fn main() -> Result<()> { .with_target(false) .init(); - info!(version = env!("CARGO_PKG_VERSION"), "csf-agent starting"); + info!(version = env!("CARGO_PKG_VERSION"), "csfx-agent starting"); - let gateway_url = std::env::var("CSF_GATEWAY_URL") - .context("CSF_GATEWAY_URL environment variable is required")?; + let gateway_url = std::env::var("CSFX_GATEWAY_URL") + .context("CSFX_GATEWAY_URL environment variable is required")?; - let heartbeat_interval_secs: u64 = std::env::var("CSF_HEARTBEAT_INTERVAL") + let heartbeat_interval_secs: u64 = std::env::var("CSFX_HEARTBEAT_INTERVAL") .ok() .and_then(|v| v.parse().ok()) .unwrap_or(60); @@ -104,10 +104,10 @@ async fn perform_registration( heartbeat_interval_secs: u64, agent_pki: &pki::AgentPki, ) -> Result<(uuid::Uuid, String)> { - let token = match std::env::var("CSF_REGISTRATION_TOKEN") { + let token = match std::env::var("CSFX_REGISTRATION_TOKEN") { Ok(t) => t, Err(_) => { - info!("CSF_REGISTRATION_TOKEN not set, fetching bootstrap token from gateway"); + info!("CSFX_REGISTRATION_TOKEN not set, fetching bootstrap token from gateway"); client .fetch_bootstrap_token() .await diff --git a/agent/src/pki.rs b/agent/src/pki.rs index 227246e..4b103dc 100644 --- a/agent/src/pki.rs +++ b/agent/src/pki.rs @@ -2,10 +2,10 @@ use anyhow::{Context, Result}; use rcgen::{CertificateParams, DnType, KeyPair, PKCS_ECDSA_P256_SHA256}; use std::path::Path; -const KEY_FILE: &str = "/var/lib/csf-daemon/agent.key"; -const CSR_FILE: &str = "/var/lib/csf-daemon/agent.csr"; -const CERT_FILE: &str = "/var/lib/csf-daemon/agent.crt"; -const CA_FILE: &str = "/var/lib/csf-daemon/ca.crt"; +const KEY_FILE: &str = "/var/lib/csfx-daemon/agent.key"; +const CSR_FILE: &str = "/var/lib/csfx-daemon/agent.csr"; +const CERT_FILE: &str = "/var/lib/csfx-daemon/agent.crt"; +const CA_FILE: &str = "/var/lib/csfx-daemon/ca.crt"; pub struct AgentPki { key_pem: String, diff --git a/agent/src/rbd.rs b/agent/src/rbd.rs index 70fda46..7b3aad8 100644 --- a/agent/src/rbd.rs +++ b/agent/src/rbd.rs @@ -87,5 +87,5 @@ pub async fn umount(mount_point: &str) -> Result<()> { } pub fn mount_point_for(volume_id: &str) -> String { - format!("/mnt/csf-volumes/{}", volume_id) + format!("/mnt/csfx-volumes/{}", volume_id) } diff --git a/agent/src/update_watch.rs b/agent/src/update_watch.rs index 1fd1004..cd7f9c9 100644 --- a/agent/src/update_watch.rs +++ b/agent/src/update_watch.rs @@ -3,8 +3,8 @@ use tokio::fs; use tracing::{info, warn}; use uuid::Uuid; -const TRIGGER_FILE: &str = "/var/lib/csf/update_trigger"; -const HEARTBEAT_COUNTER_FILE: &str = "/var/lib/csf/post_update_heartbeats"; +const TRIGGER_FILE: &str = "/var/lib/csfx/update_trigger"; +const HEARTBEAT_COUNTER_FILE: &str = "/var/lib/csfx/post_update_heartbeats"; const MAX_JITTER_SECS: u64 = 300; pub async fn handle(agent_id: Uuid, desired_flake_rev: &str, current_flake_rev: &str) { diff --git a/control-plane/Dockerfile.csf-updater b/control-plane/Dockerfile.csfx-updater similarity index 80% rename from control-plane/Dockerfile.csf-updater rename to control-plane/Dockerfile.csfx-updater index a6f2c2f..998a976 100644 --- a/control-plane/Dockerfile.csf-updater +++ b/control-plane/Dockerfile.csfx-updater @@ -12,8 +12,8 @@ RUN apt-get update && apt-get install -y \ COPY Cargo.toml Cargo.lock ./ COPY agent/Cargo.toml ./agent/ COPY control-plane/api-gateway/Cargo.toml ./control-plane/api-gateway/ -COPY control-plane/csf-migrate/Cargo.toml ./control-plane/csf-migrate/ -COPY control-plane/csf-updater/Cargo.toml ./control-plane/csf-updater/ +COPY control-plane/csfx-migrate/Cargo.toml ./control-plane/csfx-migrate/ +COPY control-plane/csfx-updater/Cargo.toml ./control-plane/csfx-updater/ COPY control-plane/scheduler/Cargo.toml ./control-plane/scheduler/ COPY control-plane/failover-controller/Cargo.toml ./control-plane/failover-controller/ COPY control-plane/sdn-controller/Cargo.toml ./control-plane/sdn-controller/ @@ -27,14 +27,14 @@ COPY control-plane/shared/ ./control-plane/shared/ RUN mkdir -p agent/src \ control-plane/api-gateway/src \ - control-plane/csf-migrate/src \ - control-plane/csf-updater/src \ + control-plane/csfx-migrate/src \ + control-plane/csfx-updater/src \ control-plane/scheduler/src \ control-plane/failover-controller/src \ control-plane/sdn-controller/src \ control-plane/volume-manager/src \ control-plane/registry/src \ - && for d in agent control-plane/api-gateway control-plane/csf-migrate control-plane/csf-updater \ + && for d in agent control-plane/api-gateway control-plane/csfx-migrate control-plane/csfx-updater \ control-plane/scheduler control-plane/failover-controller control-plane/sdn-controller \ control-plane/volume-manager control-plane/registry; do \ echo "fn main() {}" > $d/src/main.rs; \ @@ -42,7 +42,7 @@ RUN mkdir -p agent/src \ COPY . . -RUN cargo build --release --bin csf-updater +RUN cargo build --release --bin csfx-updater FROM scratch AS export -COPY --from=builder /app/target/release/csf-updater /csf-updater +COPY --from=builder /app/target/release/csfx-updater /csfx-updater diff --git a/control-plane/Dockerfile.prod.shared b/control-plane/Dockerfile.prod.shared index bf95142..e1a5722 100644 --- a/control-plane/Dockerfile.prod.shared +++ b/control-plane/Dockerfile.prod.shared @@ -24,18 +24,18 @@ COPY control-plane/registry/Cargo.toml ./control-plane/registry/ COPY control-plane/shared/entity/Cargo.toml ./control-plane/shared/entity/ COPY control-plane/shared/migration/Cargo.toml ./control-plane/shared/migration/ COPY control-plane/shared/shared/Cargo.toml ./control-plane/shared/shared/ -COPY control-plane/csf-migrate/Cargo.toml ./control-plane/csf-migrate/ -COPY control-plane/csf-updater/Cargo.toml ./control-plane/csf-updater/ +COPY control-plane/csfx-migrate/Cargo.toml ./control-plane/csfx-migrate/ +COPY control-plane/csfx-updater/Cargo.toml ./control-plane/csfx-updater/ COPY agent/build.rs ./agent/ COPY control-plane/api-gateway/build.rs ./control-plane/api-gateway/ -COPY control-plane/csf-updater/build.rs ./control-plane/csf-updater/ +COPY control-plane/csfx-updater/build.rs ./control-plane/csfx-updater/ COPY control-plane/shared/ ./control-plane/shared/ RUN mkdir -p agent/src \ control-plane/api-gateway/src \ - control-plane/csf-migrate/src \ - control-plane/csf-updater/src \ + control-plane/csfx-migrate/src \ + control-plane/csfx-updater/src \ control-plane/scheduler/src \ control-plane/failover-controller/src \ control-plane/sdn-controller/src \ @@ -43,8 +43,8 @@ RUN mkdir -p agent/src \ control-plane/registry/src \ && echo "fn main() {}" > agent/src/main.rs \ && echo "fn main() {}" > control-plane/api-gateway/src/main.rs \ - && echo "fn main() {}" > control-plane/csf-migrate/src/main.rs \ - && echo "fn main() {}" > control-plane/csf-updater/src/main.rs \ + && echo "fn main() {}" > control-plane/csfx-migrate/src/main.rs \ + && echo "fn main() {}" > control-plane/csfx-updater/src/main.rs \ && echo "fn main() {}" > control-plane/scheduler/src/main.rs \ && echo "fn main() {}" > control-plane/failover-controller/src/main.rs \ && echo "fn main() {}" > control-plane/sdn-controller/src/main.rs \ @@ -57,10 +57,10 @@ FROM base AS builder ARG SERVICE_BIN ARG BUILD_JOBS=2 -ARG CSF_BUILD_VERSION +ARG CSFX_BUILD_VERSION ENV CARGO_BUILD_JOBS=${BUILD_JOBS} -ENV CSF_BUILD_VERSION=${CSF_BUILD_VERSION} +ENV CSFX_BUILD_VERSION=${CSFX_BUILD_VERSION} COPY --from=planner /app/recipe.json recipe.json @@ -68,7 +68,7 @@ RUN cargo chef cook --profile docker-release --recipe-path recipe.json COPY . . -RUN cargo build --profile docker-release --bin ${SERVICE_BIN} --bin csf-migrate +RUN cargo build --profile docker-release --bin ${SERVICE_BIN} --bin csfx-migrate FROM debian:bookworm-slim AS runtime @@ -83,9 +83,9 @@ WORKDIR /app ARG SERVICE_BIN COPY --from=builder /app/target/docker-release/${SERVICE_BIN} /app/service -COPY --from=builder /app/target/docker-release/csf-migrate /csf-migrate +COPY --from=builder /app/target/docker-release/csfx-migrate /csfx-migrate -RUN useradd -r -s /bin/false csf -USER csf +RUN useradd -r -s /bin/false csfx +USER csfx CMD ["/app/service"] diff --git a/control-plane/api-gateway/build.rs b/control-plane/api-gateway/build.rs index 8b7e815..65082fb 100644 --- a/control-plane/api-gateway/build.rs +++ b/control-plane/api-gateway/build.rs @@ -1,6 +1,6 @@ fn main() { - if let Ok(v) = std::env::var("CSF_BUILD_VERSION") { + if let Ok(v) = std::env::var("CSFX_BUILD_VERSION") { println!("cargo:rustc-env=CARGO_PKG_VERSION={}", v); } - println!("cargo:rerun-if-env-changed=CSF_BUILD_VERSION"); + println!("cargo:rerun-if-env-changed=CSFX_BUILD_VERSION"); } diff --git a/control-plane/api-gateway/src/auth_service.rs b/control-plane/api-gateway/src/auth_service.rs index 3620eca..036e74e 100644 --- a/control-plane/api-gateway/src/auth_service.rs +++ b/control-plane/api-gateway/src/auth_service.rs @@ -219,7 +219,7 @@ impl AuthService { 1, 30, secret.to_bytes().unwrap(), - Some("CSF-Core".to_string()), + Some("CSFX-Core".to_string()), user.name.clone(), ) .unwrap(); @@ -284,7 +284,7 @@ impl AuthService { 1, 30, Secret::Encoded(secret.clone()).to_bytes().unwrap(), - Some("CSF-Core".to_string()), + Some("CSFX-Core".to_string()), user.name.clone(), ) .unwrap(); diff --git a/control-plane/api-gateway/src/main.rs b/control-plane/api-gateway/src/main.rs index c94bced..113fdb9 100644 --- a/control-plane/api-gateway/src/main.rs +++ b/control-plane/api-gateway/src/main.rs @@ -81,9 +81,9 @@ use routes::users::{ ), modifiers(&SecurityAddon), info( - title = "CSF Control Plane API", + title = "CSFX Control Plane API", version = "0.2.0", - description = "CS-Foundry Control Plane — agent registry, workload scheduling, volume management, SDN, failover, RBAC", + description = "CSFX Control Plane — agent registry, workload scheduling, volume management, SDN, failover, RBAC", contact( name = "CS-Foundry Team", email = "support@cs-foundry.com" diff --git a/control-plane/api-gateway/src/metrics.rs b/control-plane/api-gateway/src/metrics.rs index f20e2dc..f69af41 100644 --- a/control-plane/api-gateway/src/metrics.rs +++ b/control-plane/api-gateway/src/metrics.rs @@ -8,20 +8,20 @@ static HTTP_REQUEST_DURATION_SECONDS: OnceLock = OnceLock::new(); pub fn init() { HTTP_REQUESTS_TOTAL.get_or_init(|| { register_counter_vec!( - "csf_gateway_http_requests_total", + "csfx_gateway_http_requests_total", "Total HTTP requests on API gateway", &["method", "path", "status"] ) - .expect("failed to register csf_gateway_http_requests_total") + .expect("failed to register csfx_gateway_http_requests_total") }); HTTP_REQUEST_DURATION_SECONDS.get_or_init(|| { register_histogram_vec!( - "csf_gateway_http_request_duration_seconds", + "csfx_gateway_http_request_duration_seconds", "HTTP request duration on API gateway in seconds", &["method", "path"] ) - .expect("failed to register csf_gateway_http_request_duration_seconds") + .expect("failed to register csfx_gateway_http_request_duration_seconds") }); } diff --git a/control-plane/api-gateway/src/routes/system.rs b/control-plane/api-gateway/src/routes/system.rs index 760876c..8e39fd3 100644 --- a/control-plane/api-gateway/src/routes/system.rs +++ b/control-plane/api-gateway/src/routes/system.rs @@ -68,7 +68,7 @@ pub fn routes() -> Router { async fn health_check() -> Json { Json(serde_json::json!({ "status": "healthy", - "service": "csf-core-backend" + "service": "csfx-core-backend" })) } diff --git a/control-plane/api-gateway/src/routes/update.rs b/control-plane/api-gateway/src/routes/update.rs index c51e541..d054c53 100644 --- a/control-plane/api-gateway/src/routes/update.rs +++ b/control-plane/api-gateway/src/routes/update.rs @@ -6,12 +6,12 @@ use std::env; use crate::auth::rbac::CanManageSystem; use crate::AppState; -const ETCD_DESIRED_VERSION_KEY: &str = "/csf/config/desired_version"; -const ETCD_AVAILABLE_FLAKE_REV_KEY: &str = "/csf/config/available_flake_rev"; -const ETCD_DESIRED_FLAKE_REV_KEY: &str = "/csf/config/desired_flake_rev"; -const ETCD_BUILD_STATUS_KEY: &str = "/csf/config/cp_build_status"; -const ETCD_RESULT_KEY: &str = "/csf/config/last_build_result"; -const ETCD_PAUSED_KEY: &str = "/csf/config/update_paused"; +const ETCD_DESIRED_VERSION_KEY: &str = "/csfx/config/desired_version"; +const ETCD_AVAILABLE_FLAKE_REV_KEY: &str = "/csfx/config/available_flake_rev"; +const ETCD_DESIRED_FLAKE_REV_KEY: &str = "/csfx/config/desired_flake_rev"; +const ETCD_BUILD_STATUS_KEY: &str = "/csfx/config/cp_build_status"; +const ETCD_RESULT_KEY: &str = "/csfx/config/last_build_result"; +const ETCD_PAUSED_KEY: &str = "/csfx/config/update_paused"; #[derive(Debug, Deserialize)] pub struct UpdateRequest { diff --git a/control-plane/api-gateway/src/self_monitor.rs b/control-plane/api-gateway/src/self_monitor.rs index 0aa492e..c457225 100644 --- a/control-plane/api-gateway/src/self_monitor.rs +++ b/control-plane/api-gateway/src/self_monitor.rs @@ -55,7 +55,7 @@ impl SelfMonitor { pub async fn new(db_conn: Arc) -> Result { // Get or create local agent let hostname = System::host_name().unwrap_or_else(|| "localhost".to_string()); - let agent_name = format!("CSF-Core-{}", hostname); + let agent_name = format!("CSFX-Core-{}", hostname); // Check if agent already exists let existing_agent = agents::Entity::find() diff --git a/control-plane/csf-updater/build.rs b/control-plane/csf-updater/build.rs deleted file mode 100644 index 8b7e815..0000000 --- a/control-plane/csf-updater/build.rs +++ /dev/null @@ -1,6 +0,0 @@ -fn main() { - if let Ok(v) = std::env::var("CSF_BUILD_VERSION") { - println!("cargo:rustc-env=CARGO_PKG_VERSION={}", v); - } - println!("cargo:rerun-if-env-changed=CSF_BUILD_VERSION"); -} diff --git a/control-plane/csf-migrate/Cargo.toml b/control-plane/csfx-migrate/Cargo.toml similarity index 90% rename from control-plane/csf-migrate/Cargo.toml rename to control-plane/csfx-migrate/Cargo.toml index bf278d4..f3e1573 100644 --- a/control-plane/csf-migrate/Cargo.toml +++ b/control-plane/csfx-migrate/Cargo.toml @@ -1,12 +1,12 @@ [package] -name = "csf-migrate" +name = "csfx-migrate" version.workspace = true edition.workspace = true license.workspace = true publish = false [[bin]] -name = "csf-migrate" +name = "csfx-migrate" path = "src/main.rs" [dependencies] diff --git a/control-plane/csf-migrate/src/main.rs b/control-plane/csfx-migrate/src/main.rs similarity index 92% rename from control-plane/csf-migrate/src/main.rs rename to control-plane/csfx-migrate/src/main.rs index b6eed99..03892a8 100644 --- a/control-plane/csf-migrate/src/main.rs +++ b/control-plane/csfx-migrate/src/main.rs @@ -9,7 +9,7 @@ async fn main() { tracing_subscriber::fmt() .with_env_filter( tracing_subscriber::EnvFilter::from_default_env() - .add_directive("csf_migrate=info".parse().unwrap()), + .add_directive("csfx_migrate=info".parse().unwrap()), ) .init(); diff --git a/control-plane/csf-updater/Cargo.toml b/control-plane/csfx-updater/Cargo.toml similarity index 92% rename from control-plane/csf-updater/Cargo.toml rename to control-plane/csfx-updater/Cargo.toml index 39a3b9c..e8ca0e5 100644 --- a/control-plane/csf-updater/Cargo.toml +++ b/control-plane/csfx-updater/Cargo.toml @@ -1,12 +1,12 @@ [package] -name = "csf-updater" +name = "csfx-updater" version.workspace = true edition.workspace = true license.workspace = true publish = false [[bin]] -name = "csf-updater" +name = "csfx-updater" path = "src/main.rs" [dependencies] diff --git a/control-plane/csfx-updater/build.rs b/control-plane/csfx-updater/build.rs new file mode 100644 index 0000000..65082fb --- /dev/null +++ b/control-plane/csfx-updater/build.rs @@ -0,0 +1,6 @@ +fn main() { + if let Ok(v) = std::env::var("CSFX_BUILD_VERSION") { + println!("cargo:rustc-env=CARGO_PKG_VERSION={}", v); + } + println!("cargo:rerun-if-env-changed=CSFX_BUILD_VERSION"); +} diff --git a/control-plane/csf-updater/src/config.rs b/control-plane/csfx-updater/src/config.rs similarity index 93% rename from control-plane/csf-updater/src/config.rs rename to control-plane/csfx-updater/src/config.rs index 577a476..22292ed 100644 --- a/control-plane/csf-updater/src/config.rs +++ b/control-plane/csfx-updater/src/config.rs @@ -23,7 +23,7 @@ impl Config { .and_then(|v| v.parse().ok()) .unwrap_or(120), infra_repo_mirror_dir: env::var("INFRA_REPO_MIRROR_DIR") - .unwrap_or_else(|_| "/var/lib/csf-updater/infra.git".to_string()), + .unwrap_or_else(|_| "/var/lib/csfx-updater/infra.git".to_string()), infra_repo_mirror_url: env::var("INFRA_REPO_MIRROR_URL") .context("INFRA_REPO_MIRROR_URL must be set")?, infra_repo_github: env::var("INFRA_REPO_GITHUB") diff --git a/control-plane/csf-updater/src/etcd.rs b/control-plane/csfx-updater/src/etcd.rs similarity index 69% rename from control-plane/csf-updater/src/etcd.rs rename to control-plane/csfx-updater/src/etcd.rs index 8550b25..7052752 100644 --- a/control-plane/csf-updater/src/etcd.rs +++ b/control-plane/csfx-updater/src/etcd.rs @@ -2,13 +2,13 @@ use anyhow::Result; use crate::config::Config; -pub const DESIRED_VERSION_KEY: &str = "/csf/config/desired_version"; -pub const AVAILABLE_FLAKE_REV_KEY: &str = "/csf/config/available_flake_rev"; -pub const DESIRED_FLAKE_REV_KEY: &str = "/csf/config/desired_flake_rev"; -pub const BUILD_STATUS_KEY: &str = "/csf/config/cp_build_status"; -pub const RESULT_KEY: &str = "/csf/config/last_build_result"; -pub const PAUSED_KEY: &str = "/csf/config/update_paused"; -pub const NODE_HEARTBEAT_PREFIX: &str = "/csf/nodes/"; +pub const DESIRED_VERSION_KEY: &str = "/csfx/config/desired_version"; +pub const AVAILABLE_FLAKE_REV_KEY: &str = "/csfx/config/available_flake_rev"; +pub const DESIRED_FLAKE_REV_KEY: &str = "/csfx/config/desired_flake_rev"; +pub const BUILD_STATUS_KEY: &str = "/csfx/config/cp_build_status"; +pub const RESULT_KEY: &str = "/csfx/config/last_build_result"; +pub const PAUSED_KEY: &str = "/csfx/config/update_paused"; +pub const NODE_HEARTBEAT_PREFIX: &str = "/csfx/nodes/"; pub struct Client { inner: etcd_client::Client, diff --git a/control-plane/csf-updater/src/git_mirror.rs b/control-plane/csfx-updater/src/git_mirror.rs similarity index 100% rename from control-plane/csf-updater/src/git_mirror.rs rename to control-plane/csfx-updater/src/git_mirror.rs diff --git a/control-plane/csf-updater/src/main.rs b/control-plane/csfx-updater/src/main.rs similarity index 99% rename from control-plane/csf-updater/src/main.rs rename to control-plane/csfx-updater/src/main.rs index 1c00315..1d80ba1 100644 --- a/control-plane/csf-updater/src/main.rs +++ b/control-plane/csfx-updater/src/main.rs @@ -22,7 +22,7 @@ async fn main() -> anyhow::Result<()> { info!( poll_interval_secs = cfg.poll_interval_secs, infra_repo_github = %cfg.infra_repo_github, - "csf-updater started" + "csfx-updater started" ); let cfg = std::sync::Arc::new(cfg); diff --git a/control-plane/csf-updater/src/nix_build.rs b/control-plane/csfx-updater/src/nix_build.rs similarity index 100% rename from control-plane/csf-updater/src/nix_build.rs rename to control-plane/csfx-updater/src/nix_build.rs diff --git a/control-plane/csf-updater/src/poller.rs b/control-plane/csfx-updater/src/poller.rs similarity index 96% rename from control-plane/csf-updater/src/poller.rs rename to control-plane/csfx-updater/src/poller.rs index b02cff9..7fa7601 100644 --- a/control-plane/csf-updater/src/poller.rs +++ b/control-plane/csfx-updater/src/poller.rs @@ -61,7 +61,7 @@ async fn resolve_version_to_sha( let client = reqwest::Client::new(); let mut req = client .get(&url) - .header("User-Agent", "csf-updater") + .header("User-Agent", "csfx-updater") .header("Accept", "application/vnd.github.v3+json"); if let Some(etag) = last_etag.as_deref() { @@ -105,7 +105,7 @@ async fn dereference_tag(cfg: &Config, tag_sha: &str) -> Result { let resp = reqwest::Client::new() .get(&url) - .header("User-Agent", "csf-updater") + .header("User-Agent", "csfx-updater") .header("Accept", "application/vnd.github.v3+json") .send() .await?; diff --git a/control-plane/csf-updater/src/updater.rs b/control-plane/csfx-updater/src/updater.rs similarity index 100% rename from control-plane/csf-updater/src/updater.rs rename to control-plane/csfx-updater/src/updater.rs diff --git a/control-plane/failover-controller/src/main.rs b/control-plane/failover-controller/src/main.rs index 6369b46..926e5ed 100644 --- a/control-plane/failover-controller/src/main.rs +++ b/control-plane/failover-controller/src/main.rs @@ -15,7 +15,7 @@ async fn main() -> anyhow::Result<()> { logger::init_logger(); metrics::init(); - log_info!("main", "CSF Failover Controller starting..."); + log_info!("main", "CSFX Failover Controller starting..."); log_info!("main", &format!("Version: {}", env!("CARGO_PKG_VERSION"))); log_info!("main", "Connecting to database..."); @@ -34,7 +34,10 @@ async fn main() -> anyhow::Result<()> { .unwrap_or(8004); let addr = SocketAddr::from(([0, 0, 0, 0], port)); - log_info!("main", &format!("Failover Controller listening port={}", port)); + log_info!( + "main", + &format!("Failover Controller listening port={}", port) + ); let listener = tokio::net::TcpListener::bind(addr).await?; diff --git a/control-plane/failover-controller/src/metrics.rs b/control-plane/failover-controller/src/metrics.rs index 3c7cf5a..64d068b 100644 --- a/control-plane/failover-controller/src/metrics.rs +++ b/control-plane/failover-controller/src/metrics.rs @@ -8,20 +8,20 @@ static HTTP_REQUEST_DURATION_SECONDS: OnceLock = OnceLock::new(); pub fn init() { HTTP_REQUESTS_TOTAL.get_or_init(|| { register_counter_vec!( - "csf_http_requests_total", + "csfx_http_requests_total", "Total HTTP requests", &["method", "path", "status"] ) - .expect("failed to register csf_http_requests_total") + .expect("failed to register csfx_http_requests_total") }); HTTP_REQUEST_DURATION_SECONDS.get_or_init(|| { register_histogram_vec!( - "csf_http_request_duration_seconds", + "csfx_http_request_duration_seconds", "HTTP request duration in seconds", &["method", "path"] ) - .expect("failed to register csf_http_request_duration_seconds") + .expect("failed to register csfx_http_request_duration_seconds") }); } diff --git a/control-plane/registry/README.md b/control-plane/registry/README.md index 9e990ec..e9c3682 100644 --- a/control-plane/registry/README.md +++ b/control-plane/registry/README.md @@ -1,4 +1,4 @@ -# CSF Registry Service +# CSFX Registry Service Sicherer Agent Registry Service mit Token-basierter Registrierung und API Key Management. @@ -126,7 +126,7 @@ Response: ```json { "agent_id": "660e8400-e29b-41d4-a716-446655440000", - "api_key": "csf_agent_xyz789...", + "api_key": "csfx_agent_xyz789...", "message": "Agent successfully registered" } ``` @@ -136,7 +136,7 @@ Response: ```bash curl -X POST http://localhost:8000/api/registry/agents/660e8400-e29b-41d4-a716-446655440000/heartbeat \ -H "Content-Type: application/json" \ - -H "X-API-Key: csf_agent_xyz789..." \ + -H "X-API-Key: csfx_agent_xyz789..." \ -d '{ "status": "online" }' @@ -256,7 +256,7 @@ lsof -i :8001 1. **API Key korrekt?** - Verwende den API Key aus der Registrierungs-Response - - Format: `X-API-Key: csf_agent_...` + - Format: `X-API-Key: csfx_agent_...` 2. **Agent ID korrekt?** - URL muss die korrekte Agent ID enthalten diff --git a/control-plane/registry/docker-compose.dev.yml b/control-plane/registry/docker-compose.dev.yml index 1ec29e1..aeef9ba 100644 --- a/control-plane/registry/docker-compose.dev.yml +++ b/control-plane/registry/docker-compose.dev.yml @@ -4,7 +4,7 @@ services: build: context: ../.. dockerfile: control-plane/registry/Dockerfile.dev - container_name: csf-registry + container_name: csfx-registry ports: - "8001:8001" environment: @@ -15,7 +15,7 @@ services: - ../../control-plane/registry/src:/app/control-plane/registry/src - ../../control-plane/shared:/app/control-plane/shared networks: - - csf-network + - csfx-network healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8001/health"] interval: 30s @@ -24,5 +24,5 @@ services: start_period: 40s networks: - csf-network: + csfx-network: driver: bridge diff --git a/control-plane/registry/src/handlers/agent.rs b/control-plane/registry/src/handlers/agent.rs index 2431afb..23a8378 100644 --- a/control-plane/registry/src/handlers/agent.rs +++ b/control-plane/registry/src/handlers/agent.rs @@ -228,7 +228,7 @@ async fn read_desired_flake_rev(etcd_endpoints: &str) -> Option { .ok()?; let resp = client - .get("/csf/config/desired_flake_rev", None) + .get("/csfx/config/desired_flake_rev", None) .await .ok()?; @@ -239,7 +239,7 @@ async fn read_desired_flake_rev(etcd_endpoints: &str) -> Option { } async fn increment_post_update_heartbeats(etcd_endpoints: &str, agent_id: Uuid) -> Option { - let key = format!("/csf/nodes/{}/post_update_heartbeats", agent_id); + let key = format!("/csfx/nodes/{}/post_update_heartbeats", agent_id); let mut client = etcd_client::Client::connect([etcd_endpoints], None) .await diff --git a/control-plane/registry/src/main.rs b/control-plane/registry/src/main.rs index 2a4a9a5..86b353c 100644 --- a/control-plane/registry/src/main.rs +++ b/control-plane/registry/src/main.rs @@ -16,7 +16,7 @@ async fn main() -> anyhow::Result<()> { logger::init_logger(); metrics::init(); - log_info!("main", "CSF Registry Service starting..."); + log_info!("main", "CSFX Registry Service starting..."); log_info!("main", &format!("Version: {}", env!("CARGO_PKG_VERSION"))); log_info!("main", "Connecting to database..."); @@ -25,7 +25,7 @@ async fn main() -> anyhow::Result<()> { .expect("Failed to connect to database"); log_info!("main", "Database connection established"); - let cert_ttl_hours: i64 = std::env::var("CSF_CERT_TTL_HOURS") + let cert_ttl_hours: i64 = std::env::var("CSFX_CERT_TTL_HOURS") .ok() .and_then(|v| v.parse().ok()) .unwrap_or(24); @@ -34,7 +34,9 @@ async fn main() -> anyhow::Result<()> { .expect("Failed to initialize PKI service"); let token_manager = Arc::new(services::tokens::TokenManager::new(db_conn.clone())); - let bootstrap_token_manager = Arc::new(services::bootstrap_tokens::BootstrapTokenManager::new(db_conn.clone())); + let bootstrap_token_manager = Arc::new(services::bootstrap_tokens::BootstrapTokenManager::new( + db_conn.clone(), + )); let api_key_manager = Arc::new(services::api_keys::ApiKeyManager::new(db_conn.clone())); let agent_registry = Arc::new(services::registry::AgentRegistry::new(db_conn.clone())); @@ -43,16 +45,16 @@ async fn main() -> anyhow::Result<()> { let scheduler_url = std::env::var("SCHEDULER_SERVICE_URL") .unwrap_or_else(|_| "http://localhost:8002".to_string()); - let gateway_url = std::env::var("API_GATEWAY_URL") - .unwrap_or_else(|_| "http://localhost:8000".to_string()); + let gateway_url = + std::env::var("API_GATEWAY_URL").unwrap_or_else(|_| "http://localhost:8000".to_string()); let http_client = reqwest::Client::builder() .timeout(std::time::Duration::from_secs(5)) .build() .expect("Failed to build HTTP client"); - let etcd_endpoints = std::env::var("ETCD_ENDPOINTS") - .unwrap_or_else(|_| "http://localhost:2379".to_string()); + let etcd_endpoints = + std::env::var("ETCD_ENDPOINTS").unwrap_or_else(|_| "http://localhost:2379".to_string()); let state = server::AppState { token_manager: token_manager.clone(), diff --git a/control-plane/registry/src/metrics.rs b/control-plane/registry/src/metrics.rs index 3c7cf5a..64d068b 100644 --- a/control-plane/registry/src/metrics.rs +++ b/control-plane/registry/src/metrics.rs @@ -8,20 +8,20 @@ static HTTP_REQUEST_DURATION_SECONDS: OnceLock = OnceLock::new(); pub fn init() { HTTP_REQUESTS_TOTAL.get_or_init(|| { register_counter_vec!( - "csf_http_requests_total", + "csfx_http_requests_total", "Total HTTP requests", &["method", "path", "status"] ) - .expect("failed to register csf_http_requests_total") + .expect("failed to register csfx_http_requests_total") }); HTTP_REQUEST_DURATION_SECONDS.get_or_init(|| { register_histogram_vec!( - "csf_http_request_duration_seconds", + "csfx_http_request_duration_seconds", "HTTP request duration in seconds", &["method", "path"] ) - .expect("failed to register csf_http_request_duration_seconds") + .expect("failed to register csfx_http_request_duration_seconds") }); } diff --git a/control-plane/registry/src/services/api_keys.rs b/control-plane/registry/src/services/api_keys.rs index 574e3a6..b1001b2 100644 --- a/control-plane/registry/src/services/api_keys.rs +++ b/control-plane/registry/src/services/api_keys.rs @@ -17,7 +17,7 @@ impl ApiKey { pub fn new(agent_id: Uuid) -> Self { Self { id: Uuid::new_v4(), - key: format!("csf_agent_{}", Uuid::new_v4().simple()), + key: format!("csfx_agent_{}", Uuid::new_v4().simple()), agent_id, created_at: Utc::now(), last_used: None, diff --git a/control-plane/registry/src/services/bootstrap_tokens.rs b/control-plane/registry/src/services/bootstrap_tokens.rs index 5190062..14754a1 100644 --- a/control-plane/registry/src/services/bootstrap_tokens.rs +++ b/control-plane/registry/src/services/bootstrap_tokens.rs @@ -3,7 +3,7 @@ use sea_orm::DatabaseConnection; use serde::{Deserialize, Serialize}; use uuid::Uuid; -const TOKEN_PREFIX: &str = "csf-bootstrap."; +const TOKEN_PREFIX: &str = "csfx-bootstrap."; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct BootstrapToken { diff --git a/control-plane/registry/src/services/pki.rs b/control-plane/registry/src/services/pki.rs index 680cb09..8f6ba56 100644 --- a/control-plane/registry/src/services/pki.rs +++ b/control-plane/registry/src/services/pki.rs @@ -39,12 +39,11 @@ impl PkiService { fn load_or_generate_ca() -> Result { match ( - std::env::var("CSF_CA_CERT_PEM"), - std::env::var("CSF_CA_KEY_PEM"), + std::env::var("CSFX_CA_CERT_PEM"), + std::env::var("CSFX_CA_KEY_PEM"), ) { (Ok(cert_pem), Ok(key_pem)) => { - KeyPair::from_pem(&key_pem) - .map_err(|e| anyhow!("Failed to load CA key: {}", e))?; + KeyPair::from_pem(&key_pem).map_err(|e| anyhow!("Failed to load CA key: {}", e))?; crate::log_info!("pki", "CA loaded from environment"); @@ -57,7 +56,7 @@ impl PkiService { _ => { crate::log_warn!( "pki", - "CSF_CA_CERT_PEM/CSF_CA_KEY_PEM not set, generating ephemeral CA" + "CSFX_CA_CERT_PEM/CSFX_CA_KEY_PEM not set, generating ephemeral CA" ); Self::generate_ca() } @@ -70,8 +69,12 @@ impl PkiService { let mut params = CertificateParams::default(); params.is_ca = IsCa::Ca(BasicConstraints::Unconstrained); - params.distinguished_name.push(DnType::CommonName, "CSF Internal CA"); - params.distinguished_name.push(DnType::OrganizationName, "CS-Foundry"); + params + .distinguished_name + .push(DnType::CommonName, "CSFX Internal CA"); + params + .distinguished_name + .push(DnType::OrganizationName, "CSFX"); params.key_usages = vec![KeyUsagePurpose::KeyCertSign, KeyUsagePurpose::CrlSign]; params.not_before = rcgen::date_time_ymd(2024, 1, 1); params.not_after = rcgen::date_time_ymd(2035, 1, 1); @@ -154,10 +157,9 @@ impl PkiService { agent_id: Uuid, new_csr_pem: &str, ) -> Result { - if let Some(old_cert) = - crate::db::certificates::get_active_certificate(&self.db, agent_id) - .await - .map_err(|e| anyhow!("DB error: {}", e))? + if let Some(old_cert) = crate::db::certificates::get_active_certificate(&self.db, agent_id) + .await + .map_err(|e| anyhow!("DB error: {}", e))? { crate::db::certificates::revoke_certificate( &self.db, @@ -180,24 +182,15 @@ impl PkiService { self.issue_certificate(agent_id, new_csr_pem).await } - pub async fn revoke_agent_certificate( - &self, - agent_id: Uuid, - reason: String, - ) -> Result<()> { + pub async fn revoke_agent_certificate(&self, agent_id: Uuid, reason: String) -> Result<()> { let cert = crate::db::certificates::get_active_certificate(&self.db, agent_id) .await .map_err(|e| anyhow!("DB error: {}", e))? .ok_or_else(|| anyhow!("No active certificate for agent: {}", agent_id))?; - crate::db::certificates::revoke_certificate( - &self.db, - cert.serial_number, - agent_id, - reason, - ) - .await - .map_err(|e| anyhow!("Failed to revoke certificate: {}", e))?; + crate::db::certificates::revoke_certificate(&self.db, cert.serial_number, agent_id, reason) + .await + .map_err(|e| anyhow!("Failed to revoke certificate: {}", e))?; crate::log_info!( "pki", diff --git a/control-plane/registry/test-registry.sh b/control-plane/registry/test-registry.sh index b45d540..a649876 100755 --- a/control-plane/registry/test-registry.sh +++ b/control-plane/registry/test-registry.sh @@ -1,12 +1,12 @@ #!/bin/bash -# CSF Registry Service Test Script +# CSFX Registry Service Test Script set -e REGISTRY_URL="http://localhost:8001" -echo "🧪 Testing CSF Registry Service" +echo "🧪 Testing CSFX Registry Service" echo "================================" echo "" @@ -137,7 +137,7 @@ echo "" # 11. Deregister Agent echo "1️⃣1️⃣ Deregistering agent..." -curl -s -X POST "${REGISTRY_URL}/admin/agents/${AGENT_ID}" +curl -s -X POST "${REGISTRY_URL}/admin/agents/${AGENT_ID}" echo "✅ Agent deregistered" echo "" diff --git a/control-plane/scheduler/src/main.rs b/control-plane/scheduler/src/main.rs index c4572ca..dab7fdf 100644 --- a/control-plane/scheduler/src/main.rs +++ b/control-plane/scheduler/src/main.rs @@ -17,7 +17,7 @@ async fn main() -> anyhow::Result<()> { logger::init_logger(); metrics::init(); - log_info!("main", "CSF Scheduler Service starting..."); + log_info!("main", "CSFX Scheduler Service starting..."); log_info!("main", &format!("Version: {}", env!("CARGO_PKG_VERSION"))); log_info!("main", "Connecting to database..."); @@ -26,18 +26,24 @@ async fn main() -> anyhow::Result<()> { .expect("Failed to connect to database"); log_info!("main", "Database connection established"); - let etcd_endpoints = std::env::var("ETCD_ENDPOINTS") - .unwrap_or_else(|_| "http://localhost:2379".to_string()); + let etcd_endpoints = + std::env::var("ETCD_ENDPOINTS").unwrap_or_else(|_| "http://localhost:2379".to_string()); let etcd_endpoints: Vec<&str> = etcd_endpoints.split(',').collect(); - log_info!("main", &format!("Connecting to etcd endpoints={}", etcd_endpoints.join(","))); + log_info!( + "main", + &format!("Connecting to etcd endpoints={}", etcd_endpoints.join(",")) + ); let etcd = etcd_client::Client::connect(etcd_endpoints, None) .await .expect("Failed to connect to etcd"); log_info!("main", "etcd connection established"); let etcd = Arc::new(Mutex::new(etcd)); - let scheduler = Arc::new(services::scheduler::SchedulerService::new(db.clone(), etcd.clone())); + let scheduler = Arc::new(services::scheduler::SchedulerService::new( + db.clone(), + etcd.clone(), + )); let state = server::AppState { db, diff --git a/control-plane/scheduler/src/metrics.rs b/control-plane/scheduler/src/metrics.rs index 3c7cf5a..64d068b 100644 --- a/control-plane/scheduler/src/metrics.rs +++ b/control-plane/scheduler/src/metrics.rs @@ -8,20 +8,20 @@ static HTTP_REQUEST_DURATION_SECONDS: OnceLock = OnceLock::new(); pub fn init() { HTTP_REQUESTS_TOTAL.get_or_init(|| { register_counter_vec!( - "csf_http_requests_total", + "csfx_http_requests_total", "Total HTTP requests", &["method", "path", "status"] ) - .expect("failed to register csf_http_requests_total") + .expect("failed to register csfx_http_requests_total") }); HTTP_REQUEST_DURATION_SECONDS.get_or_init(|| { register_histogram_vec!( - "csf_http_request_duration_seconds", + "csfx_http_request_duration_seconds", "HTTP request duration in seconds", &["method", "path"] ) - .expect("failed to register csf_http_request_duration_seconds") + .expect("failed to register csfx_http_request_duration_seconds") }); } diff --git a/control-plane/scheduler/src/services/etcd.rs b/control-plane/scheduler/src/services/etcd.rs index 7e9aa6a..18db45f 100644 --- a/control-plane/scheduler/src/services/etcd.rs +++ b/control-plane/scheduler/src/services/etcd.rs @@ -19,7 +19,7 @@ pub async fn put_placement( etcd: &Arc>, record: &PlacementRecord, ) -> Result<(), String> { - let key = format!("/csf/placements/{}", record.workload_id); + let key = format!("/csfx/placements/{}", record.workload_id); let value = serde_json::to_string(record) .map_err(|e| format!("Failed to serialize placement: {}", e))?; @@ -36,7 +36,7 @@ pub async fn delete_placement( etcd: &Arc>, workload_id: Uuid, ) -> Result<(), String> { - let key = format!("/csf/placements/{}", workload_id); + let key = format!("/csfx/placements/{}", workload_id); etcd.lock() .await diff --git a/control-plane/sdn-controller/src/main.rs b/control-plane/sdn-controller/src/main.rs index 9c13770..47fa396 100644 --- a/control-plane/sdn-controller/src/main.rs +++ b/control-plane/sdn-controller/src/main.rs @@ -15,7 +15,7 @@ async fn main() -> anyhow::Result<()> { logger::init_logger(); metrics::init(); - log_info!("main", "CSF SDN Controller starting..."); + log_info!("main", "CSFX SDN Controller starting..."); log_info!("main", &format!("Version: {}", env!("CARGO_PKG_VERSION"))); log_info!("main", "Connecting to database..."); @@ -24,7 +24,8 @@ async fn main() -> anyhow::Result<()> { .expect("Failed to connect to database"); log_info!("main", "Database connection established"); - let etcd_url = std::env::var("ETCD_URL").unwrap_or_else(|_| "http://localhost:2379".to_string()); + let etcd_url = + std::env::var("ETCD_URL").unwrap_or_else(|_| "http://localhost:2379".to_string()); log_info!("main", &format!("Connecting to etcd url={}", etcd_url)); let etcd = etcd_client::Client::connect([etcd_url.as_str()], None) .await diff --git a/control-plane/sdn-controller/src/metrics.rs b/control-plane/sdn-controller/src/metrics.rs index 3c7cf5a..64d068b 100644 --- a/control-plane/sdn-controller/src/metrics.rs +++ b/control-plane/sdn-controller/src/metrics.rs @@ -8,20 +8,20 @@ static HTTP_REQUEST_DURATION_SECONDS: OnceLock = OnceLock::new(); pub fn init() { HTTP_REQUESTS_TOTAL.get_or_init(|| { register_counter_vec!( - "csf_http_requests_total", + "csfx_http_requests_total", "Total HTTP requests", &["method", "path", "status"] ) - .expect("failed to register csf_http_requests_total") + .expect("failed to register csfx_http_requests_total") }); HTTP_REQUEST_DURATION_SECONDS.get_or_init(|| { register_histogram_vec!( - "csf_http_request_duration_seconds", + "csfx_http_request_duration_seconds", "HTTP request duration in seconds", &["method", "path"] ) - .expect("failed to register csf_http_request_duration_seconds") + .expect("failed to register csfx_http_request_duration_seconds") }); } diff --git a/control-plane/sdn-controller/src/services/ipam.rs b/control-plane/sdn-controller/src/services/ipam.rs index 5e38a2e..f1753bc 100644 --- a/control-plane/sdn-controller/src/services/ipam.rs +++ b/control-plane/sdn-controller/src/services/ipam.rs @@ -3,7 +3,7 @@ use etcd_client::Client; use std::net::Ipv4Addr; use uuid::Uuid; -const IPAM_PREFIX: &str = "/csf/ipam/"; +const IPAM_PREFIX: &str = "/csfx/ipam/"; #[derive(Clone)] pub struct IpamService { @@ -58,7 +58,7 @@ impl IpamService { overlay_ip: &str, public_key: Option<&str>, ) -> Result<()> { - let base_key = format!("/csf/peers/{}/{}", network_id, node_id); + let base_key = format!("/csfx/peers/{}/{}", network_id, node_id); self.etcd .put(format!("{}/overlay_ip", base_key).as_str(), overlay_ip, None) .await @@ -75,7 +75,7 @@ impl IpamService { } pub async fn remove_peer(&mut self, network_id: Uuid, node_id: &str) -> Result<()> { - let prefix = format!("/csf/peers/{}/{}", network_id, node_id); + let prefix = format!("/csfx/peers/{}/{}", network_id, node_id); self.etcd .delete( prefix.as_str(), diff --git a/control-plane/shared/Cargo.toml b/control-plane/shared/Cargo.toml index 072d6a6..4469933 100644 --- a/control-plane/shared/Cargo.toml +++ b/control-plane/shared/Cargo.toml @@ -11,4 +11,4 @@ version = "0.2.2" edition = "2021" authors = ["CS-Foundry"] license = "SEE LICENSE IN LICENSE" -repository = "https://github.com/CS-Foundry/CSF-Core" +repository = "https://github.com/CS-Foundry/CSFX-Core" diff --git a/control-plane/volume-manager/cleanup-etcd.sh b/control-plane/volume-manager/cleanup-etcd.sh index 93eb3aa..6aef160 100755 --- a/control-plane/volume-manager/cleanup-etcd.sh +++ b/control-plane/volume-manager/cleanup-etcd.sh @@ -9,19 +9,19 @@ COLOR_RESET='\033[0m' echo -e "${COLOR_YELLOW}🧹 Cleaning etcd data...${COLOR_RESET}" # Lösche alle alten Daten -echo "Deleting all keys under /csf/volume-manager/..." +echo "Deleting all keys under /csfx/volume-manager/..." # Nodes -ETCDCTL_API=3 etcdctl --endpoints=localhost:2379 del /csf/volume-manager/nodes/ --prefix +ETCDCTL_API=3 etcdctl --endpoints=localhost:2379 del /csfx/volume-manager/nodes/ --prefix # Leader -ETCDCTL_API=3 etcdctl --endpoints=localhost:2379 del /csf/volume-manager/election/ --prefix +ETCDCTL_API=3 etcdctl --endpoints=localhost:2379 del /csfx/volume-manager/election/ --prefix # Volumes -ETCDCTL_API=3 etcdctl --endpoints=localhost:2379 del /csf/volume-manager/volumes/ --prefix +ETCDCTL_API=3 etcdctl --endpoints=localhost:2379 del /csfx/volume-manager/volumes/ --prefix # Snapshots -ETCDCTL_API=3 etcdctl --endpoints=localhost:2379 del /csf/volume-manager/snapshots/ --prefix +ETCDCTL_API=3 etcdctl --endpoints=localhost:2379 del /csfx/volume-manager/snapshots/ --prefix echo -e "${COLOR_GREEN}✅ etcd cleaned!${COLOR_RESET}" echo "" diff --git a/control-plane/volume-manager/docker-compose.dev.yml b/control-plane/volume-manager/docker-compose.dev.yml index 5b40d3a..c688094 100644 --- a/control-plane/volume-manager/docker-compose.dev.yml +++ b/control-plane/volume-manager/docker-compose.dev.yml @@ -20,7 +20,7 @@ services: - ceph-mon1-data:/var/lib/ceph - ./ceph-config/ceph.conf:/etc/ceph/ceph.conf:ro networks: - csf-test: + csfx-test: ipv4_address: 172.20.0.21 cap_add: - ALL @@ -50,7 +50,7 @@ services: - ceph-mon2-data:/var/lib/ceph - ./ceph-config/ceph.conf:/etc/ceph/ceph.conf:ro networks: - csf-test: + csfx-test: ipv4_address: 172.20.0.22 cap_add: - ALL @@ -82,7 +82,7 @@ services: - ceph-mon3-data:/var/lib/ceph - ./ceph-config/ceph.conf:/etc/ceph/ceph.conf:ro networks: - csf-test: + csfx-test: ipv4_address: 172.20.0.23 cap_add: - ALL @@ -112,7 +112,7 @@ services: - ceph-osd1-data:/var/lib/ceph/osd - ./ceph-config/ceph.conf:/etc/ceph/ceph.conf:ro networks: - csf-test: + csfx-test: ipv4_address: 172.20.0.31 cap_add: - ALL @@ -138,7 +138,7 @@ services: - ceph-osd2-data:/var/lib/ceph/osd - ./ceph-config/ceph.conf:/etc/ceph/ceph.conf:ro networks: - csf-test: + csfx-test: ipv4_address: 172.20.0.32 cap_add: - ALL @@ -164,7 +164,7 @@ services: - ceph-osd3-data:/var/lib/ceph/osd - ./ceph-config/ceph.conf:/etc/ceph/ceph.conf:ro networks: - csf-test: + csfx-test: ipv4_address: 172.20.0.33 cap_add: - ALL @@ -190,14 +190,14 @@ services: - ETCD_LISTEN_PEER_URLS=http://0.0.0.0:2380 - ETCD_LISTEN_CLIENT_URLS=http://0.0.0.0:2379 - ETCD_ADVERTISE_CLIENT_URLS=http://etcd1:2379 - - ETCD_INITIAL_CLUSTER_TOKEN=etcd-cluster-csf + - ETCD_INITIAL_CLUSTER_TOKEN=etcd-cluster-csfx - ETCD_INITIAL_CLUSTER=etcd1=http://etcd1:2380,etcd2=http://etcd2:2380,etcd3=http://etcd3:2380 - ETCD_INITIAL_CLUSTER_STATE=new ports: - "2379:2379" - "2380:2380" networks: - csf-test: + csfx-test: ipv4_address: 172.20.0.11 volumes: - etcd1-data:/etcd-data @@ -214,14 +214,14 @@ services: - ETCD_LISTEN_PEER_URLS=http://0.0.0.0:2380 - ETCD_LISTEN_CLIENT_URLS=http://0.0.0.0:2379 - ETCD_ADVERTISE_CLIENT_URLS=http://etcd2:2379 - - ETCD_INITIAL_CLUSTER_TOKEN=etcd-cluster-csf + - ETCD_INITIAL_CLUSTER_TOKEN=etcd-cluster-csfx - ETCD_INITIAL_CLUSTER=etcd1=http://etcd1:2380,etcd2=http://etcd2:2380,etcd3=http://etcd3:2380 - ETCD_INITIAL_CLUSTER_STATE=new ports: - "2479:2379" - "2480:2380" networks: - csf-test: + csfx-test: ipv4_address: 172.20.0.12 volumes: - etcd2-data:/etcd-data @@ -238,14 +238,14 @@ services: - ETCD_LISTEN_PEER_URLS=http://0.0.0.0:2380 - ETCD_LISTEN_CLIENT_URLS=http://0.0.0.0:2379 - ETCD_ADVERTISE_CLIENT_URLS=http://etcd3:2379 - - ETCD_INITIAL_CLUSTER_TOKEN=etcd-cluster-csf + - ETCD_INITIAL_CLUSTER_TOKEN=etcd-cluster-csfx - ETCD_INITIAL_CLUSTER=etcd1=http://etcd1:2380,etcd2=http://etcd2:2380,etcd3=http://etcd3:2380 - ETCD_INITIAL_CLUSTER_STATE=new ports: - "2579:2379" - "2580:2380" networks: - csf-test: + csfx-test: ipv4_address: 172.20.0.13 volumes: - etcd3-data:/etcd-data @@ -262,7 +262,7 @@ services: environment: - PATRONI_NAME=patroni1 - ETCD_HOST=etcd1:2379 - - PATRONI_SCOPE=postgres-csf + - PATRONI_SCOPE=postgres-csfxx - PATRONI_ETCD3_HOSTS=etcd1:2379,etcd2:2379,etcd3:2379 - PATRONI_ETCD3_PROTOCOL=http @@ -281,9 +281,9 @@ services: # Application User - PATRONI_POSTGRESQL_PGPASS=/tmp/pgpass - - POSTGRES_USER=csf - - POSTGRES_PASSWORD=csfpassword - - POSTGRES_DB=csf_core + - POSTGRES_USER=csfx + - POSTGRES_PASSWORD=csfxpassword + - POSTGRES_DB=csfx_core # REST API - PATRONI_RESTAPI_LISTEN=0.0.0.0:8008 @@ -307,7 +307,7 @@ services: volumes: - patroni1-data:/home/postgres/pgdata networks: - csf-test: + csfx-test: ipv4_address: 172.20.0.41 ports: - "5441:5432" @@ -334,7 +334,7 @@ services: environment: - ETCD_HOST=etcd1:2379 - PATRONI_NAME=patroni2 - - PATRONI_SCOPE=postgres-csf + - PATRONI_SCOPE=postgres-csfxx - PATRONI_ETCD3_HOSTS=etcd1:2379,etcd2:2379,etcd3:2379 - PATRONI_ETCD3_PROTOCOL=http @@ -349,9 +349,9 @@ services: - PATRONI_SUPERUSER_PASSWORD=postgrespass - PATRONI_POSTGRESQL_PGPASS=/tmp/pgpass - - POSTGRES_USER=csf - - POSTGRES_PASSWORD=csfpassword - - POSTGRES_DB=csf_core + - POSTGRES_USER=csfx + - POSTGRES_PASSWORD=csfxpassword + - POSTGRES_DB=csfx_core - PATRONI_RESTAPI_LISTEN=0.0.0.0:8008 - PATRONI_RESTAPI_CONNECT_ADDRESS=patroni2:8008 @@ -372,7 +372,7 @@ services: volumes: - patroni2-data:/home/postgres/pgdata networks: - csf-test: + csfx-test: ipv4_address: 172.20.0.42 ports: - "5442:5432" @@ -399,7 +399,7 @@ services: environment: - ETCD_HOST=etcd1:2379 - PATRONI_NAME=patroni3 - - PATRONI_SCOPE=postgres-csf + - PATRONI_SCOPE=postgres-csfxx - PATRONI_ETCD3_HOSTS=etcd1:2379,etcd2:2379,etcd3:2379 - PATRONI_ETCD3_PROTOCOL=http @@ -414,9 +414,9 @@ services: - PATRONI_SUPERUSER_PASSWORD=postgrespass - PATRONI_POSTGRESQL_PGPASS=/tmp/pgpass - - POSTGRES_USER=csf - - POSTGRES_PASSWORD=csfpassword - - POSTGRES_DB=csf_core + - POSTGRES_USER=csfx + - POSTGRES_PASSWORD=csfxpassword + - POSTGRES_DB=csfx_core - PATRONI_RESTAPI_LISTEN=0.0.0.0:8008 - PATRONI_RESTAPI_CONNECT_ADDRESS=patroni3:8008 @@ -437,7 +437,7 @@ services: volumes: - patroni3-data:/home/postgres/pgdata networks: - csf-test: + csfx-test: ipv4_address: 172.20.0.43 ports: - "5443:5432" @@ -471,7 +471,7 @@ services: - "5433:5433" # Read Port (Replicas) - "8000:8000" # HAProxy Stats networks: - csf-test: + csfx-test: ipv4_address: 172.20.0.40 depends_on: - patroni1 @@ -500,13 +500,13 @@ services: - ETCD_ENDPOINTS=http://etcd1:2379,http://etcd2:2379,http://etcd3:2379 - NODE_ID=vm-1 - CEPH_MON_HOSTS=ceph-mon1:6789,ceph-mon2:6789,ceph-mon3:6789 - - CEPH_DEFAULT_POOL=csf-data + - CEPH_DEFAULT_POOL=csfx-data - CEPH_PG_NUM=128 - CEPH_DEFAULT_REPLICATION=3 - - PATRONI_SCOPE=postgres-csf + - PATRONI_SCOPE=postgres-csfxx - PATRONI_NODES=patroni1:8008,patroni2:8008,patroni3:8008 networks: - csf-test: + csfx-test: ipv4_address: 172.20.0.51 depends_on: - etcd1 @@ -532,13 +532,13 @@ services: - ETCD_ENDPOINTS=http://etcd1:2379,http://etcd2:2379,http://etcd3:2379 - NODE_ID=vm-2 - CEPH_MON_HOSTS=ceph-mon1:6789,ceph-mon2:6789,ceph-mon3:6789 - - CEPH_DEFAULT_POOL=csf-data + - CEPH_DEFAULT_POOL=csfx-data - CEPH_PG_NUM=128 - CEPH_DEFAULT_REPLICATION=3 - - PATRONI_SCOPE=postgres-csf + - PATRONI_SCOPE=postgres-csfxx - PATRONI_NODES=patroni1:8008,patroni2:8008,patroni3:8008 networks: - csf-test: + csfx-test: ipv4_address: 172.20.0.52 depends_on: - etcd1 @@ -564,13 +564,13 @@ services: - ETCD_ENDPOINTS=http://etcd1:2379,http://etcd2:2379,http://etcd3:2379 - NODE_ID=vm-3 - CEPH_MON_HOSTS=ceph-mon1:6789,ceph-mon2:6789,ceph-mon3:6789 - - CEPH_DEFAULT_POOL=csf-data + - CEPH_DEFAULT_POOL=csfx-data - CEPH_PG_NUM=128 - CEPH_DEFAULT_REPLICATION=3 - - PATRONI_SCOPE=postgres-csf + - PATRONI_SCOPE=postgres-csfxx - PATRONI_NODES=patroni1:8008,patroni2:8008,patroni3:8008 networks: - csf-test: + csfx-test: ipv4_address: 172.20.0.53 depends_on: - etcd1 @@ -585,7 +585,7 @@ services: restart: unless-stopped networks: - csf-test: + csfx-test: driver: bridge ipam: config: diff --git a/control-plane/volume-manager/src/ceph/core/config.rs b/control-plane/volume-manager/src/ceph/core/config.rs index 7bcf4f7..735ae1f 100644 --- a/control-plane/volume-manager/src/ceph/core/config.rs +++ b/control-plane/volume-manager/src/ceph/core/config.rs @@ -24,7 +24,7 @@ impl CephConfig { keyring_path: env::var("CEPH_KEYRING").ok(), client_name: env::var("CEPH_CLIENT_NAME").unwrap_or_else(|_| "admin".to_string()), default_pool: env::var("CEPH_DEFAULT_POOL") - .unwrap_or_else(|_| "csf-volumes".to_string()), + .unwrap_or_else(|_| "csfx-volumes".to_string()), default_pg_num: env::var("CEPH_PG_NUM") .ok() .and_then(|s| s.parse().ok()) @@ -72,7 +72,7 @@ impl Default for CephConfig { ], keyring_path: None, client_name: "admin".to_string(), - default_pool: "csf-volumes".to_string(), + default_pool: "csfx-volumes".to_string(), default_pg_num: 128, default_replication: 3, } diff --git a/control-plane/volume-manager/src/ceph/ops/init.rs b/control-plane/volume-manager/src/ceph/ops/init.rs index d0015a8..405be9f 100644 --- a/control-plane/volume-manager/src/ceph/ops/init.rs +++ b/control-plane/volume-manager/src/ceph/ops/init.rs @@ -54,14 +54,14 @@ pub async fn init_ceph() -> Result { min_size: 2, }, CephPool { - name: "csf-postgres".to_string(), + name: "csfx-postgres".to_string(), pg_num: 64, pgp_num: 64, size: config.default_replication, min_size: 2, }, CephPool { - name: "csf-metadata".to_string(), + name: "csfx-metadata".to_string(), pg_num: 32, pgp_num: 32, size: config.default_replication, @@ -108,7 +108,7 @@ pub async fn create_postgres_volumes(ceph: &CephManager, node_count: u32) -> Res let volume = crate::ceph::storage::types::CephVolume { name: volume_name.clone(), - pool: "csf-postgres".to_string(), + pool: "csfx-postgres".to_string(), size_mb: 10240, // 10 GB features: vec!["layering".to_string(), "exclusive-lock".to_string()], encrypted: false, diff --git a/control-plane/volume-manager/src/db/volumes.rs b/control-plane/volume-manager/src/db/volumes.rs index 59592cf..44a3d2a 100644 --- a/control-plane/volume-manager/src/db/volumes.rs +++ b/control-plane/volume-manager/src/db/volumes.rs @@ -14,7 +14,7 @@ pub async fn create( db: &DatabaseConnection, req: &CreateVolumeRequest, ) -> Result { - let pool = req.pool.clone().unwrap_or_else(|| "csf-volumes".to_string()); + let pool = req.pool.clone().unwrap_or_else(|| "csfx-volumes".to_string()); let image_name = format!("{}-{}", req.name, Uuid::new_v4()); let model = volumes::ActiveModel { diff --git a/control-plane/volume-manager/src/etcd/core/config.rs b/control-plane/volume-manager/src/etcd/core/config.rs index 9f9e23c..40091dc 100644 --- a/control-plane/volume-manager/src/etcd/core/config.rs +++ b/control-plane/volume-manager/src/etcd/core/config.rs @@ -36,7 +36,7 @@ impl Default for EtcdConfig { request_timeout: Duration::from_secs(10), keepalive_interval: Duration::from_secs(30), keepalive_timeout: Duration::from_secs(10), - namespace: "/csf/volume-manager".to_string(), + namespace: "/csfx/volume-manager".to_string(), username: None, password: None, } @@ -53,7 +53,7 @@ impl EtcdConfig { .collect(); let namespace = - std::env::var("ETCD_NAMESPACE").unwrap_or_else(|_| "/csf/volume-manager".to_string()); + std::env::var("ETCD_NAMESPACE").unwrap_or_else(|_| "/csfx/volume-manager".to_string()); let username = std::env::var("ETCD_USERNAME").ok(); let password = std::env::var("ETCD_PASSWORD").ok(); diff --git a/control-plane/volume-manager/src/main.rs b/control-plane/volume-manager/src/main.rs index 148ab15..a411c5d 100644 --- a/control-plane/volume-manager/src/main.rs +++ b/control-plane/volume-manager/src/main.rs @@ -19,7 +19,7 @@ async fn main() -> anyhow::Result<()> { logger::init_logger(); metrics::init(); - log_info!("main", "CSF Volume Manager starting"); + log_info!("main", "CSFX Volume Manager starting"); log_info!("main", &format!("Version: {}", env!("CARGO_PKG_VERSION"))); let db = shared::establish_connection() @@ -27,11 +27,14 @@ async fn main() -> anyhow::Result<()> { .expect("Failed to connect to database"); log_info!("main", "Database connection established"); - let etcd_endpoints = std::env::var("ETCD_ENDPOINTS") - .unwrap_or_else(|_| "http://localhost:2379".to_string()); + let etcd_endpoints = + std::env::var("ETCD_ENDPOINTS").unwrap_or_else(|_| "http://localhost:2379".to_string()); let etcd_endpoints: Vec<&str> = etcd_endpoints.split(',').collect(); - log_info!("main", &format!("Connecting to etcd endpoints={}", etcd_endpoints.join(","))); + log_info!( + "main", + &format!("Connecting to etcd endpoints={}", etcd_endpoints.join(",")) + ); let etcd = etcd_client::Client::connect(etcd_endpoints, None) .await .expect("Failed to connect to etcd"); @@ -45,7 +48,10 @@ async fn main() -> anyhow::Result<()> { Some(Arc::new(manager)) } Err(e) => { - log_warn!("main", &format!("Ceph not available (continuing without): {}", e)); + log_warn!( + "main", + &format!("Ceph not available (continuing without): {}", e) + ); None } }; diff --git a/control-plane/volume-manager/src/metrics.rs b/control-plane/volume-manager/src/metrics.rs index 3c7cf5a..64d068b 100644 --- a/control-plane/volume-manager/src/metrics.rs +++ b/control-plane/volume-manager/src/metrics.rs @@ -8,20 +8,20 @@ static HTTP_REQUEST_DURATION_SECONDS: OnceLock = OnceLock::new(); pub fn init() { HTTP_REQUESTS_TOTAL.get_or_init(|| { register_counter_vec!( - "csf_http_requests_total", + "csfx_http_requests_total", "Total HTTP requests", &["method", "path", "status"] ) - .expect("failed to register csf_http_requests_total") + .expect("failed to register csfx_http_requests_total") }); HTTP_REQUEST_DURATION_SECONDS.get_or_init(|| { register_histogram_vec!( - "csf_http_request_duration_seconds", + "csfx_http_request_duration_seconds", "HTTP request duration in seconds", &["method", "path"] ) - .expect("failed to register csf_http_request_duration_seconds") + .expect("failed to register csfx_http_request_duration_seconds") }); } diff --git a/control-plane/volume-manager/test-hybrid-system.sh b/control-plane/volume-manager/test-hybrid-system.sh index 2a9e5a6..e4034c5 100755 --- a/control-plane/volume-manager/test-hybrid-system.sh +++ b/control-plane/volume-manager/test-hybrid-system.sh @@ -157,13 +157,13 @@ check_patroni_health() { check_volume_manager_health() { log_step "Checking Volume Manager..." - local leader=$(etcdctl --endpoints=localhost:2379 get /csf/volume-manager/election/leader --print-value-only 2>/dev/null) + local leader=$(etcdctl --endpoints=localhost:2379 get /csfx/volume-manager/election/leader --print-value-only 2>/dev/null) if [ -n "$leader" ]; then log_success "Volume Manager leader: $leader" # Count nodes - local node_count=$(etcdctl --endpoints=localhost:2379 get /csf/volume-manager/nodes/ --prefix --keys-only 2>/dev/null | grep -c "/csf/volume-manager/nodes/" || echo "0") + local node_count=$(etcdctl --endpoints=localhost:2379 get /csfx/volume-manager/nodes/ --prefix --keys-only 2>/dev/null | grep -c "/csfx/volume-manager/nodes/" || echo "0") log_info "Registered nodes: $node_count" return 0 else @@ -204,18 +204,18 @@ test_data_replication() { local test_data="hybrid_test_$(date +%s)" log_step "Creating test table..." - docker exec patroni1 psql -U csf -d csf_core -c \ + docker exec patroni1 psql -U csfx -d csfx_core -c \ "CREATE TABLE IF NOT EXISTS hybrid_test (id SERIAL PRIMARY KEY, data TEXT, created_at TIMESTAMP DEFAULT NOW());" &>/dev/null log_step "Writing test data to primary..." - docker exec patroni1 psql -U csf -d csf_core -c \ + docker exec patroni1 psql -U csfx -d csfx_core -c \ "INSERT INTO hybrid_test (data) VALUES ('$test_data');" &>/dev/null # Wait for replication sleep 2 log_step "Verifying data on replica..." - local result=$(docker exec patroni2 psql -U csf -d csf_core -t -c \ + local result=$(docker exec patroni2 psql -U csfx -d csfx_core -t -c \ "SELECT data FROM hybrid_test WHERE data='$test_data';" 2>/dev/null | xargs) if [ "$result" == "$test_data" ]; then @@ -296,7 +296,7 @@ test_postgres_failover() { # Test connectivity sleep 2 - if docker exec $new_primary psql -U csf -d csf_core -c "SELECT 1;" &>/dev/null; then + if docker exec $new_primary psql -U csfx -d csfx_core -c "SELECT 1;" &>/dev/null; then log_success "New primary accepting connections" fi @@ -348,7 +348,7 @@ test_ceph_failover() { echo "" log_step "Testing PostgreSQL availability..." - if docker exec patroni1 psql -U csf -d csf_core -c "SELECT version();" &>/dev/null; then + if docker exec patroni1 psql -U csfx -d csfx_core -c "SELECT version();" &>/dev/null; then log_success "PostgreSQL still fully operational (Ceph has 2 remaining replicas)" else log_error "PostgreSQL affected by OSD failure" @@ -375,7 +375,7 @@ test_ceph_failover() { test_volume_manager_failover() { log_header "Test 5: Volume Manager Failover" - local current_leader=$(etcdctl --endpoints=localhost:2379 get /csf/volume-manager/election/leader --print-value-only 2>/dev/null) + local current_leader=$(etcdctl --endpoints=localhost:2379 get /csfx/volume-manager/election/leader --print-value-only 2>/dev/null) if [ -z "$current_leader" ]; then log_error "No leader found" @@ -398,7 +398,7 @@ test_volume_manager_failover() { log_step "Waiting for leader re-election (10s)..." sleep 10 - local new_leader=$(etcdctl --endpoints=localhost:2379 get /csf/volume-manager/election/leader --print-value-only 2>/dev/null) + local new_leader=$(etcdctl --endpoints=localhost:2379 get /csfx/volume-manager/election/leader --print-value-only 2>/dev/null) if [ -n "$new_leader" ] && [ "$new_leader" != "$current_leader" ]; then log_success "New leader elected: $new_leader" @@ -457,11 +457,11 @@ test_e2e_integration() { log_step "Testing complete data flow..." local test_val="e2e_test_$(date +%s)" - if docker exec patroni1 psql -U csf -d csf_core -c \ + if docker exec patroni1 psql -U csfx -d csfx_core -c \ "CREATE TABLE IF NOT EXISTS e2e_test (val TEXT); INSERT INTO e2e_test VALUES ('$test_val');" &>/dev/null; then sleep 2 - local result=$(docker exec patroni2 psql -U csf -d csf_core -t -c \ + local result=$(docker exec patroni2 psql -U csfx -d csfx_core -t -c \ "SELECT val FROM e2e_test WHERE val='$test_val';" 2>/dev/null | xargs) if [ "$result" == "$test_val" ]; then @@ -488,7 +488,7 @@ test_performance_metrics() { echo "" # PostgreSQL connections - local pg_connections=$(docker exec patroni1 psql -U csf -d csf_core -t -c \ + local pg_connections=$(docker exec patroni1 psql -U csfx -d csfx_core -t -c \ "SELECT count(*) FROM pg_stat_activity;" 2>/dev/null | xargs) echo -e "${CYAN}PostgreSQL Connections:${NC} $pg_connections" @@ -516,7 +516,7 @@ test_live_monitoring() { # etcd echo -e "${CYAN}🔑 etcd Leader:${NC}" - etcdctl --endpoints=localhost:2379 get /csf/volume-manager/election/leader --print-value-only 2>/dev/null || echo "none" + etcdctl --endpoints=localhost:2379 get /csfx/volume-manager/election/leader --print-value-only 2>/dev/null || echo "none" echo "" # Ceph @@ -595,7 +595,7 @@ test_chaos() { # Scenario 3: Kill Volume Manager leader log_info "🔥 Scenario 3: Killing Volume Manager leader..." - local leader=$(etcdctl --endpoints=localhost:2379 get /csf/volume-manager/election/leader --print-value-only 2>/dev/null) + local leader=$(etcdctl --endpoints=localhost:2379 get /csfx/volume-manager/election/leader --print-value-only 2>/dev/null) if [ -n "$leader" ]; then docker-compose -f docker-compose.patroni.yml stop $leader &>/dev/null log_warn "$leader stopped" @@ -609,7 +609,7 @@ test_chaos() { # Check if system is still functional log_step "Testing system functionality under stress..." - if docker exec patroni2 psql -U csf -d csf_core -c "SELECT 1;" &>/dev/null; then + if docker exec patroni2 psql -U csfx -d csfx_core -c "SELECT 1;" &>/dev/null; then log_success "✅ Database still accessible!" else log_error "Database not accessible" diff --git a/control-plane/volume-manager/test-patroni-ha.sh b/control-plane/volume-manager/test-patroni-ha.sh index 55931d2..ac1eef6 100755 --- a/control-plane/volume-manager/test-patroni-ha.sh +++ b/control-plane/volume-manager/test-patroni-ha.sh @@ -76,15 +76,15 @@ test_write() { if [ "$primary" == "patroni1" ]; then replica="patroni2"; else replica="patroni1"; fi - docker exec $primary psql -U csf -d csf_core -c \ + docker exec $primary psql -U csfx -d csfx_core -c \ "CREATE TABLE IF NOT EXISTS failover_test (id SERIAL PRIMARY KEY, data TEXT, created_at TIMESTAMP DEFAULT NOW());" &>/dev/null - docker exec $primary psql -U csf -d csf_core -c \ + docker exec $primary psql -U csfx -d csfx_core -c \ "INSERT INTO failover_test (data) VALUES ('$test_data');" &>/dev/null # Verify on replica sleep 2 - local result=$(docker exec $replica psql -U csf -d csf_core -t -c \ + local result=$(docker exec $replica psql -U csfx -d csfx_core -t -c \ "SELECT data FROM failover_test WHERE data='$test_data';" 2>/dev/null | xargs) if [ "$result" == "$test_data" ]; then @@ -212,7 +212,7 @@ test_postgres_failover() { # Test connection to new primary echo "Testing connection to new primary..." sleep 3 - if docker exec $new_primary psql -U csf -d csf_core -c "SELECT 1;" &>/dev/null; then + if docker exec $new_primary psql -U csfx -d csfx_core -c "SELECT 1;" &>/dev/null; then echo -e "${GREEN}✅ New primary is accepting connections${NC}" else echo -e "${RED}❌ New primary not ready${NC}" @@ -257,7 +257,7 @@ test_ceph_failure() { echo "" echo -e "${YELLOW}Testing if PostgreSQL still works...${NC}" - if docker exec patroni1 psql -U csf -d csf_core -c "SELECT version();" &>/dev/null; then + if docker exec patroni1 psql -U csfx -d csfx_core -c "SELECT version();" &>/dev/null; then echo -e "${GREEN}✅ PostgreSQL still working (Ceph has 2 replicas)${NC}" else echo -e "${RED}❌ PostgreSQL affected${NC}" diff --git a/deployments/systemd/csf-updater.service b/deployments/systemd/csfx-updater.service similarity index 62% rename from deployments/systemd/csf-updater.service rename to deployments/systemd/csfx-updater.service index d25f07e..544f4f0 100644 --- a/deployments/systemd/csf-updater.service +++ b/deployments/systemd/csfx-updater.service @@ -1,24 +1,24 @@ [Unit] -Description=CSF Control Plane Updater +Description=CSFX Control Plane Updater After=network.target docker.service Requires=docker.service [Service] Type=simple -User=csf-updater +User=csfx-updater Group=docker -EnvironmentFile=/opt/csf/.env +EnvironmentFile=/opt/csfx/.env Environment=ETCD_ENDPOINT=http://localhost:2379 Environment=ETCD_USERNAME=csf Environment=GHCR_ORG=csfx-cloud -Environment=COMPOSE_FILE=/opt/csf/docker-compose.prod.yml +Environment=COMPOSE_FILE=/opt/csfx/docker-compose.prod.yml Environment=POLL_INTERVAL=30 -ExecStart=/opt/csf/csf-updater.sh +ExecStart=/opt/csfx/csfx-updater.sh Restart=always RestartSec=10 StandardOutput=journal StandardError=journal -SyslogIdentifier=csf-updater +SyslogIdentifier=csfx-updater [Install] WantedBy=multi-user.target diff --git a/deployments/systemd/csf-updater.sh b/deployments/systemd/csfx-updater.sh similarity index 88% rename from deployments/systemd/csf-updater.sh rename to deployments/systemd/csfx-updater.sh index 6c08ff7..c0fb533 100755 --- a/deployments/systemd/csf-updater.sh +++ b/deployments/systemd/csfx-updater.sh @@ -2,15 +2,15 @@ set -euo pipefail ETCD_ENDPOINT="${ETCD_ENDPOINT:-http://localhost:2379}" -ETCD_USERNAME="${ETCD_USERNAME:-csf}" +ETCD_USERNAME="${ETCD_USERNAME:-csfx}" ETCD_PASSWORD="${ETCD_PASSWORD:?ETCD_PASSWORD must be set}" -COMPOSE_FILE="${COMPOSE_FILE:-/opt/csf/docker-compose.prod.yml}" +COMPOSE_FILE="${COMPOSE_FILE:-/opt/csfxx/docker-compose.prod.yml}" GHCR_ORG="${GHCR_ORG:-csfx-cloud}" POLL_INTERVAL="${POLL_INTERVAL:-30}" GHCR_TOKEN="${GHCR_TOKEN:?GHCR_TOKEN must be set}" -ETCD_DESIRED_KEY="/csf/config/desired_cp_version" -ETCD_RESULT_KEY="/csf/config/last_update_result" +ETCD_DESIRED_KEY="/csfx/config/desired_cp_version" +ETCD_RESULT_KEY="/csfx/config/last_update_result" SERVICES=(api-gateway registry scheduler volume-manager failover-controller sdn-controller) @@ -75,9 +75,9 @@ verify_images() { local version="$1" log "verifying image digests against GHCR" for svc in "${SERVICES[@]}"; do - local image="ghcr.io/${GHCR_ORG}/csf-ce-${svc}" + local image="ghcr.io/${GHCR_ORG}/csfx-ce-${svc}" local remote_digest local_dig - remote_digest="$(ghcr_digest "${GHCR_ORG}/csf-ce-${svc}" "${version}")" + remote_digest="$(ghcr_digest "${GHCR_ORG}/csfx-ce-${svc}" "${version}")" local_dig="$(local_digest "${image}:${version}")" if [[ -z "$remote_digest" ]]; then @@ -99,7 +99,7 @@ run_update() { etcd_put "$ETCD_RESULT_KEY" "in_progress" log "pulling images" - if ! GHCR_ORG="$GHCR_ORG" CSF_VERSION="$version" \ + if ! GHCR_ORG="$GHCR_ORG" CSFX_VERSION="$version" \ docker compose -f "$COMPOSE_FILE" pull; then log "pull failed" etcd_put "$ETCD_RESULT_KEY" "failed" @@ -113,7 +113,7 @@ run_update() { fi log "restarting services" - if ! GHCR_ORG="$GHCR_ORG" CSF_VERSION="$version" \ + if ! GHCR_ORG="$GHCR_ORG" CSFX_VERSION="$version" \ docker compose -f "$COMPOSE_FILE" up -d; then log "up failed" etcd_put "$ETCD_RESULT_KEY" "failed" @@ -122,7 +122,7 @@ run_update() { log "waiting for health checks" sleep 15 - if ! GHCR_ORG="$GHCR_ORG" CSF_VERSION="$version" \ + if ! GHCR_ORG="$GHCR_ORG" CSFX_VERSION="$version" \ docker compose -f "$COMPOSE_FILE" ps --format json \ | jq -e '[.[] | select(.Health == "unhealthy")] | length == 0' > /dev/null 2>&1; then log "health check failed" @@ -138,7 +138,7 @@ is_valid_version() { [[ "$1" =~ ^v?[0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9._-]+)?$ ]] } -log "csf-updater started, polling etcd every ${POLL_INTERVAL}s" +log "csfx-updater started, polling etcd every ${POLL_INTERVAL}s" last_applied="" diff --git a/deployments/systemd/install.sh b/deployments/systemd/install.sh index 96ee1d9..57c447e 100755 --- a/deployments/systemd/install.sh +++ b/deployments/systemd/install.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euo pipefail -CSF_DIR="/opt/csf" +CSFX_DIR="/opt/csfxx" if [[ "$EUID" -ne 0 ]]; then echo "run as root" @@ -11,28 +11,28 @@ fi SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" -if ! id csf-updater &>/dev/null; then - useradd --system --no-create-home --shell /usr/sbin/nologin csf-updater - usermod -aG docker csf-updater - echo "created csf-updater system user" +if ! id csfx-updater &>/dev/null; then + useradd --system --no-create-home --shell /usr/sbin/nologin csfx-updater + usermod -aG docker csfx-updater + echo "created csfx-updater system user" fi -mkdir -p "$CSF_DIR" -chown csf-updater:docker "$CSF_DIR" +mkdir -p "$CSFX_DIR" +chown csfx-updater:docker "$CSFX_DIR" -cp "${REPO_ROOT}/docker-compose.prod.yml" "${CSF_DIR}/docker-compose.prod.yml" -cp "${SCRIPT_DIR}/csf-updater.sh" "${CSF_DIR}/csf-updater.sh" -chmod 750 "${CSF_DIR}/csf-updater.sh" -chown csf-updater:docker "${CSF_DIR}/csf-updater.sh" +cp "${REPO_ROOT}/docker-compose.prod.yml" "${CSFX_DIR}/docker-compose.prod.yml" +cp "${SCRIPT_DIR}/csfx-updater.sh" "${CSFX_DIR}/csfx-updater.sh" +chmod 750 "${CSFX_DIR}/csfx-updater.sh" +chown csfx-updater:docker "${CSFX_DIR}/csfx-updater.sh" -if [[ ! -f "${CSF_DIR}/.env" ]]; then - cp "${REPO_ROOT}/.env.example" "${CSF_DIR}/.env" - chmod 640 "${CSF_DIR}/.env" - chown csf-updater:docker "${CSF_DIR}/.env" - echo "created ${CSF_DIR}/.env — fill in values before starting" +if [[ ! -f "${CSFX_DIR}/.env" ]]; then + cp "${REPO_ROOT}/.env.example" "${CSFX_DIR}/.env" + chmod 640 "${CSFX_DIR}/.env" + chown csfx-updater:docker "${CSFX_DIR}/.env" + echo "created ${CSFX_DIR}/.env — fill in values before starting" fi -cp "${SCRIPT_DIR}/csf-updater.service" /etc/systemd/system/csf-updater.service +cp "${SCRIPT_DIR}/csfx-updater.service" /etc/systemd/system/csfx-updater.service if command -v ufw &>/dev/null; then ufw deny in 2379/tcp comment "etcd - internal only" @@ -46,8 +46,8 @@ elif command -v firewall-cmd &>/dev/null; then fi systemctl daemon-reload -systemctl enable csf-updater -systemctl start csf-updater +systemctl enable csfx-updater +systemctl start csfx-updater -echo "csf-updater installed and started" -echo "logs: journalctl -u csf-updater -f" +echo "csfx-updater installed and started" +echo "logs: journalctl -u csfx-updater -f" diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index 0408396..b37a5b0 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -9,32 +9,32 @@ x-rust-service: &rust-service - cargo_cache:/usr/local/cargo/registry - cargo_git:/usr/local/cargo/git networks: - - csf-network + - csfx-network restart: unless-stopped services: postgres: image: postgres:16-alpine - container_name: csf-postgres-dev + container_name: csfx-postgres-dev environment: - POSTGRES_USER: ${POSTGRES_USER:-csf_user} - POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-csf_password} - POSTGRES_DB: ${POSTGRES_DB:-csf_core} + POSTGRES_USER: ${POSTGRES_USER:-csfx_user} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-csfx_password} + POSTGRES_DB: ${POSTGRES_DB:-csfx_core} ports: - "5432:5432" volumes: - postgres_data:/var/lib/postgresql/data healthcheck: - test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-csf_user} -d ${POSTGRES_DB:-csf_core}"] + test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-csfx_user} -d ${POSTGRES_DB:-csfx_core}"] interval: 10s timeout: 5s retries: 5 networks: - - csf-network + - csfx-network etcd: image: gcr.io/etcd-development/etcd:v3.5.21 - container_name: csf-etcd-dev + container_name: csfx-etcd-dev command: - etcd - --advertise-client-urls=http://etcd:2379 @@ -42,13 +42,13 @@ services: ports: - "2379:2379" networks: - - csf-network + - csfx-network api-gateway: <<: *rust-service - container_name: csf-api-gateway-dev + container_name: csfx-api-gateway-dev environment: - DATABASE_URL: postgres://${POSTGRES_USER:-csf_user}:${POSTGRES_PASSWORD:-csf_password}@postgres:5432/${POSTGRES_DB:-csf_core} + DATABASE_URL: postgres://${POSTGRES_USER:-csfx_user}:${POSTGRES_PASSWORD:-csfx_password}@postgres:5432/${POSTGRES_DB:-csfx_core} RUST_LOG: ${RUST_LOG:-debug} JWT_SECRET: ${JWT_SECRET:-dev_jwt_secret_change_in_production} RSA_KEY_SIZE: "2048" @@ -69,9 +69,9 @@ services: registry: <<: *rust-service - container_name: csf-registry-dev + container_name: csfx-registry-dev environment: - DATABASE_URL: postgres://${POSTGRES_USER:-csf_user}:${POSTGRES_PASSWORD:-csf_password}@postgres:5432/${POSTGRES_DB:-csf_core} + DATABASE_URL: postgres://${POSTGRES_USER:-csfx_user}:${POSTGRES_PASSWORD:-csfx_password}@postgres:5432/${POSTGRES_DB:-csfx_core} ETCD_ENDPOINTS: http://etcd:2379 REGISTRY_PORT: "8001" RUST_LOG: ${RUST_LOG:-debug} @@ -95,9 +95,9 @@ services: scheduler: <<: *rust-service - container_name: csf-scheduler-dev + container_name: csfx-scheduler-dev environment: - DATABASE_URL: postgres://${POSTGRES_USER:-csf_user}:${POSTGRES_PASSWORD:-csf_password}@postgres:5432/${POSTGRES_DB:-csf_core} + DATABASE_URL: postgres://${POSTGRES_USER:-csfx_user}:${POSTGRES_PASSWORD:-csfx_password}@postgres:5432/${POSTGRES_DB:-csfx_core} ETCD_ENDPOINTS: http://etcd:2379 SCHEDULER_PORT: "8002" RUST_LOG: ${RUST_LOG:-debug} @@ -115,9 +115,9 @@ services: volume-manager: <<: *rust-service - container_name: csf-volume-manager-dev + container_name: csfx-volume-manager-dev environment: - DATABASE_URL: postgres://${POSTGRES_USER:-csf_user}:${POSTGRES_PASSWORD:-csf_password}@postgres:5432/${POSTGRES_DB:-csf_core} + DATABASE_URL: postgres://${POSTGRES_USER:-csfx_user}:${POSTGRES_PASSWORD:-csfx_password}@postgres:5432/${POSTGRES_DB:-csfx_core} ETCD_ENDPOINTS: http://etcd:2379 VOLUME_MANAGER_PORT: "8003" RUST_LOG: ${RUST_LOG:-debug} @@ -135,9 +135,9 @@ services: failover-controller: <<: *rust-service - container_name: csf-failover-controller-dev + container_name: csfx-failover-controller-dev environment: - DATABASE_URL: postgres://${POSTGRES_USER:-csf_user}:${POSTGRES_PASSWORD:-csf_password}@postgres:5432/${POSTGRES_DB:-csf_core} + DATABASE_URL: postgres://${POSTGRES_USER:-csfx_user}:${POSTGRES_PASSWORD:-csfx_password}@postgres:5432/${POSTGRES_DB:-csfx_core} FAILOVER_CONTROLLER_PORT: "8004" SCHEDULER_SERVICE_URL: http://scheduler:8002 VOLUME_MANAGER_URL: http://volume-manager:8003 @@ -158,9 +158,9 @@ services: sdn-controller: <<: *rust-service - container_name: csf-sdn-controller-dev + container_name: csfx-sdn-controller-dev environment: - DATABASE_URL: postgres://${POSTGRES_USER:-csf_user}:${POSTGRES_PASSWORD:-csf_password}@postgres:5432/${POSTGRES_DB:-csf_core} + DATABASE_URL: postgres://${POSTGRES_USER:-csfx_user}:${POSTGRES_PASSWORD:-csfx_password}@postgres:5432/${POSTGRES_DB:-csfx_core} ETCD_URL: http://etcd:2379 SDN_CONTROLLER_PORT: "8005" RUST_LOG: ${RUST_LOG:-debug} @@ -193,5 +193,5 @@ volumes: cargo_target_sdn_controller: networks: - csf-network: + csfx-network: driver: bridge diff --git a/docker-compose.failover-controller.yml b/docker-compose.failover-controller.yml index 413c7c4..b2a5a8b 100644 --- a/docker-compose.failover-controller.yml +++ b/docker-compose.failover-controller.yml @@ -9,32 +9,32 @@ x-rust-common: &rust-common - cargo_cache:/usr/local/cargo/registry - cargo_git:/usr/local/cargo/git networks: - - csf-network + - csfx-network restart: unless-stopped services: postgres: image: postgres:16-alpine - container_name: csf-postgres-failover + container_name: csfx-postgres-failover environment: - POSTGRES_USER: csf_user - POSTGRES_PASSWORD: csf_password - POSTGRES_DB: csf_core + POSTGRES_USER: csfx_user + POSTGRES_PASSWORD: csfx_password + POSTGRES_DB: csfx_core ports: - "5432:5432" volumes: - postgres_data:/var/lib/postgresql/data healthcheck: - test: ["CMD-SHELL", "pg_isready -U csf_user -d csf_core"] + test: ["CMD-SHELL", "pg_isready -U csfx_user -d csfx_core"] interval: 10s timeout: 5s retries: 5 networks: - - csf-network + - csfx-network etcd: image: gcr.io/etcd-development/etcd:v3.5.21 - container_name: csf-etcd-failover + container_name: csfx-etcd-failover command: - etcd - --advertise-client-urls=http://etcd:2379 @@ -42,13 +42,13 @@ services: ports: - "2379:2379" networks: - - csf-network + - csfx-network api-gateway: <<: *rust-common - container_name: csf-api-gateway-failover + container_name: csfx-api-gateway-failover environment: - DATABASE_URL: postgres://csf_user:csf_password@postgres:5432/csf_core + DATABASE_URL: postgres://csfx_user:csfx_password@postgres:5432/csfx_core RUST_LOG: debug JWT_SECRET: ${JWT_SECRET:-dev_jwt_secret_change_in_production} RSA_KEY_SIZE: "2048" @@ -73,9 +73,9 @@ services: registry: <<: *rust-common - container_name: csf-registry-failover + container_name: csfx-registry-failover environment: - DATABASE_URL: postgres://csf_user:csf_password@postgres:5432/csf_core + DATABASE_URL: postgres://csfx_user:csfx_password@postgres:5432/csfx_core ETCD_ENDPOINTS: http://etcd:2379 REGISTRY_PORT: "8001" RUST_LOG: debug @@ -99,9 +99,9 @@ services: scheduler: <<: *rust-common - container_name: csf-scheduler-failover + container_name: csfx-scheduler-failover environment: - DATABASE_URL: postgres://csf_user:csf_password@postgres:5432/csf_core + DATABASE_URL: postgres://csfx_user:csfx_password@postgres:5432/csfx_core ETCD_ENDPOINTS: http://etcd:2379 SCHEDULER_PORT: "8002" RUST_LOG: debug @@ -124,9 +124,9 @@ services: volume-manager: <<: *rust-common - container_name: csf-volume-manager-failover + container_name: csfx-volume-manager-failover environment: - DATABASE_URL: postgres://csf_user:csf_password@postgres:5432/csf_core + DATABASE_URL: postgres://csfx_user:csfx_password@postgres:5432/csfx_core ETCD_ENDPOINTS: http://etcd:2379 VOLUME_MANAGER_PORT: "8003" RUST_LOG: debug @@ -149,9 +149,9 @@ services: failover-controller: <<: *rust-common - container_name: csf-failover-controller + container_name: csfx-failover-controller environment: - DATABASE_URL: postgres://csf_user:csf_password@postgres:5432/csf_core + DATABASE_URL: postgres://csfx_user:csfx_password@postgres:5432/csfx_core FAILOVER_CONTROLLER_PORT: "8004" SCHEDULER_SERVICE_URL: http://scheduler:8002 VOLUME_MANAGER_URL: http://volume-manager:8003 @@ -199,5 +199,5 @@ volumes: driver: local networks: - csf-network: + csfx-network: driver: bridge diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 6dcdabc..fad5805 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -1,25 +1,25 @@ services: postgres: image: postgres:16-alpine - container_name: csf-postgres + container_name: csfx-postgres environment: POSTGRES_USER: ${POSTGRES_USER} POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} - POSTGRES_DB: ${POSTGRES_DB:-csf_core} + POSTGRES_DB: ${POSTGRES_DB:-csfx_core} volumes: - postgres_data:/var/lib/postgresql/data healthcheck: - test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB:-csf_core}"] + test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB:-csfx_core}"] interval: 10s timeout: 5s retries: 5 networks: - - csf-internal + - csfx-internal restart: unless-stopped etcd: image: gcr.io/etcd-development/etcd:v3.5.21 - container_name: csf-etcd + container_name: csfx-etcd command: - etcd - --advertise-client-urls=http://etcd:2379 @@ -28,7 +28,7 @@ services: volumes: - etcd_data:/etcd-data networks: - - csf-internal + - csfx-internal restart: unless-stopped healthcheck: test: ["CMD", "etcdctl", "endpoint", "health"] @@ -37,23 +37,23 @@ services: retries: 5 migrate: - image: ghcr.io/${GHCR_ORG}/csf-ce-api-gateway:${CSF_VERSION:-latest} - container_name: csf-migrate - command: ["/csf-migrate"] + image: ghcr.io/${GHCR_ORG}/csfx-ce-api-gateway:${CSFX_VERSION:-latest} + container_name: csfx-migrate + command: ["/csfx-migrate"] environment: - DATABASE_URL: postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-csf_core} + DATABASE_URL: postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-csfx_core} depends_on: postgres: condition: service_healthy networks: - - csf-internal + - csfx-internal restart: "no" api-gateway: - image: ghcr.io/${GHCR_ORG}/csf-ce-api-gateway:${CSF_VERSION:-latest} - container_name: csf-api-gateway + image: ghcr.io/${GHCR_ORG}/csfx-ce-api-gateway:${CSFX_VERSION:-latest} + container_name: csfx-api-gateway environment: - DATABASE_URL: postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-csf_core} + DATABASE_URL: postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-csfx_core} RUST_LOG: ${RUST_LOG:-info} JWT_SECRET: ${JWT_SECRET} RSA_KEY_SIZE: "4096" @@ -71,7 +71,7 @@ services: migrate: condition: service_completed_successfully networks: - - csf-internal + - csfx-internal restart: unless-stopped healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8000/api/system/health"] @@ -81,10 +81,10 @@ services: start_period: 15s registry: - image: ghcr.io/${GHCR_ORG}/csf-ce-registry:${CSF_VERSION:-latest} - container_name: csf-registry + image: ghcr.io/${GHCR_ORG}/csfx-ce-registry:${CSFX_VERSION:-latest} + container_name: csfx-registry environment: - DATABASE_URL: postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-csf_core} + DATABASE_URL: postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-csfx_core} ETCD_ENDPOINTS: http://etcd:2379 REGISTRY_PORT: "8001" RUST_LOG: ${RUST_LOG:-info} @@ -95,7 +95,7 @@ services: migrate: condition: service_completed_successfully networks: - - csf-internal + - csfx-internal restart: unless-stopped healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8001/health"] @@ -105,10 +105,10 @@ services: start_period: 15s scheduler: - image: ghcr.io/${GHCR_ORG}/csf-ce-scheduler:${CSF_VERSION:-latest} - container_name: csf-scheduler + image: ghcr.io/${GHCR_ORG}/csfx-ce-scheduler:${CSFX_VERSION:-latest} + container_name: csfx-scheduler environment: - DATABASE_URL: postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-csf_core} + DATABASE_URL: postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-csfx_core} ETCD_ENDPOINTS: http://etcd:2379 SCHEDULER_PORT: "8002" RUST_LOG: ${RUST_LOG:-info} @@ -118,14 +118,14 @@ services: migrate: condition: service_completed_successfully networks: - - csf-internal + - csfx-internal restart: unless-stopped volume-manager: - image: ghcr.io/${GHCR_ORG}/csf-ce-volume-manager:${CSF_VERSION:-latest} - container_name: csf-volume-manager + image: ghcr.io/${GHCR_ORG}/csfx-ce-volume-manager:${CSFX_VERSION:-latest} + container_name: csfx-volume-manager environment: - DATABASE_URL: postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-csf_core} + DATABASE_URL: postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-csfx_core} ETCD_ENDPOINTS: http://etcd:2379 VOLUME_MANAGER_PORT: "8003" RUST_LOG: ${RUST_LOG:-info} @@ -135,14 +135,14 @@ services: migrate: condition: service_completed_successfully networks: - - csf-internal + - csfx-internal restart: unless-stopped failover-controller: - image: ghcr.io/${GHCR_ORG}/csf-ce-failover-controller:${CSF_VERSION:-latest} - container_name: csf-failover-controller + image: ghcr.io/${GHCR_ORG}/csfx-ce-failover-controller:${CSFX_VERSION:-latest} + container_name: csfx-failover-controller environment: - DATABASE_URL: postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-csf_core} + DATABASE_URL: postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-csfx_core} FAILOVER_CONTROLLER_PORT: "8004" SCHEDULER_SERVICE_URL: http://scheduler:8002 VOLUME_MANAGER_URL: http://volume-manager:8003 @@ -157,14 +157,14 @@ services: migrate: condition: service_completed_successfully networks: - - csf-internal + - csfx-internal restart: unless-stopped sdn-controller: - image: ghcr.io/${GHCR_ORG}/csf-ce-sdn-controller:${CSF_VERSION:-latest} - container_name: csf-sdn-controller + image: ghcr.io/${GHCR_ORG}/csfx-ce-sdn-controller:${CSFX_VERSION:-latest} + container_name: csfx-sdn-controller environment: - DATABASE_URL: postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-csf_core} + DATABASE_URL: postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-csfx_core} ETCD_URL: http://etcd:2379 SDN_CONTROLLER_PORT: "8005" RUST_LOG: ${RUST_LOG:-info} @@ -174,7 +174,7 @@ services: migrate: condition: service_completed_successfully networks: - - csf-internal + - csfx-internal restart: unless-stopped healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8005/health"] @@ -188,5 +188,5 @@ volumes: etcd_data: networks: - csf-internal: + csfx-internal: driver: bridge diff --git a/docker-compose.registry.yml b/docker-compose.registry.yml index 6fd7c7b..e07b4c8 100644 --- a/docker-compose.registry.yml +++ b/docker-compose.registry.yml @@ -3,7 +3,7 @@ x-rust-common: &rust-common context: . dockerfile: control-plane/Dockerfile.dev.shared cache_from: - - type=registry,ref=csf-core-rust-base:latest + - type=registry,ref=csfx-core-rust-base:latest volumes: - ./control-plane/shared:/app/control-plane/shared - ./Cargo.toml:/app/Cargo.toml @@ -11,34 +11,34 @@ x-rust-common: &rust-common - cargo_cache:/usr/local/cargo/registry - cargo_git:/usr/local/cargo/git networks: - - csf-network + - csfx-network restart: unless-stopped services: postgres: image: postgres:16-alpine - container_name: csf-postgres-registry + container_name: csfx-postgres-registry environment: - POSTGRES_USER: csf_user - POSTGRES_PASSWORD: csf_password - POSTGRES_DB: csf_core + POSTGRES_USER: csfx_user + POSTGRES_PASSWORD: csfx_password + POSTGRES_DB: csfx_core ports: - "5432:5432" volumes: - postgres_data:/var/lib/postgresql/data healthcheck: - test: ["CMD-SHELL", "pg_isready -U csf_user -d csf_core"] + test: ["CMD-SHELL", "pg_isready -U csfx_user -d csfx_core"] interval: 10s timeout: 5s retries: 5 networks: - - csf-network + - csfx-network api-gateway: <<: *rust-common - container_name: csf-api-gateway-registry + container_name: csfx-api-gateway-registry environment: - DATABASE_URL: postgres://csf_user:csf_password@postgres:5432/csf_core + DATABASE_URL: postgres://csfx_user:csfx_password@postgres:5432/csfx_core RUST_LOG: debug JWT_SECRET: ${JWT_SECRET:-dev_jwt_secret_change_in_production} RSA_KEY_SIZE: "2048" @@ -60,9 +60,9 @@ services: registry: <<: *rust-common - container_name: csf-registry-service + container_name: csfx-registry-service environment: - DATABASE_URL: postgres://csf_user:csf_password@postgres:5432/csf_core + DATABASE_URL: postgres://csfx_user:csfx_password@postgres:5432/csfx_core RUST_LOG: debug REGISTRY_PORT: 8001 ports: @@ -89,11 +89,11 @@ services: agent: <<: *rust-common - container_name: csf-agent-test + container_name: csfx-agent-test environment: - CSF_GATEWAY_URL: http://api-gateway:8000 - CSF_REGISTRATION_TOKEN: ${CSF_REGISTRATION_TOKEN} - CSF_HEARTBEAT_INTERVAL: "30" + CSFX_GATEWAY_URL: http://api-gateway:8000 + CSFX_REGISTRATION_TOKEN: ${CSFX_REGISTRATION_TOKEN} + CSFX_HEARTBEAT_INTERVAL: "30" RUST_LOG: debug depends_on: registry: @@ -106,8 +106,8 @@ services: - cargo_cache:/usr/local/cargo/registry - cargo_git:/usr/local/cargo/git - cargo_target_agent:/app/target - - agent_state:/var/lib/csf-daemon - command: cargo run -p csf-agent + - agent_state:/var/lib/csfxx-daemon + command: cargo run -p csfx-agent restart: "no" volumes: @@ -127,5 +127,5 @@ volumes: driver: local networks: - csf-network: + csfx-network: driver: bridge diff --git a/docker-compose.scheduler.yml b/docker-compose.scheduler.yml index 7f7eae1..da73b4d 100644 --- a/docker-compose.scheduler.yml +++ b/docker-compose.scheduler.yml @@ -9,32 +9,32 @@ x-rust-common: &rust-common - cargo_cache:/usr/local/cargo/registry - cargo_git:/usr/local/cargo/git networks: - - csf-network + - csfx-network restart: unless-stopped services: postgres: image: postgres:16-alpine - container_name: csf-postgres-scheduler + container_name: csfx-postgres-scheduler environment: - POSTGRES_USER: csf_user - POSTGRES_PASSWORD: csf_password - POSTGRES_DB: csf_core + POSTGRES_USER: csfx_user + POSTGRES_PASSWORD: csfx_password + POSTGRES_DB: csfx_core ports: - "5432:5432" volumes: - postgres_data:/var/lib/postgresql/data healthcheck: - test: ["CMD-SHELL", "pg_isready -U csf_user -d csf_core"] + test: ["CMD-SHELL", "pg_isready -U csfx_user -d csfx_core"] interval: 10s timeout: 5s retries: 5 networks: - - csf-network + - csfx-network etcd: image: gcr.io/etcd-development/etcd:v3.5.21 - container_name: csf-etcd-scheduler + container_name: csfx-etcd-scheduler command: - etcd - --advertise-client-urls=http://etcd:2379 @@ -42,13 +42,13 @@ services: ports: - "2379:2379" networks: - - csf-network + - csfx-network api-gateway: <<: *rust-common - container_name: csf-api-gateway-scheduler + container_name: csfx-api-gateway-scheduler environment: - DATABASE_URL: postgres://csf_user:csf_password@postgres:5432/csf_core + DATABASE_URL: postgres://csfx_user:csfx_password@postgres:5432/csfx_core RUST_LOG: debug JWT_SECRET: ${JWT_SECRET:-dev_jwt_secret_change_in_production} RSA_KEY_SIZE: "2048" @@ -71,9 +71,9 @@ services: scheduler: <<: *rust-common - container_name: csf-scheduler + container_name: csfx-scheduler environment: - DATABASE_URL: postgres://csf_user:csf_password@postgres:5432/csf_core + DATABASE_URL: postgres://csfx_user:csfx_password@postgres:5432/csfx_core ETCD_ENDPOINTS: http://etcd:2379 SCHEDULER_PORT: "8002" RUST_LOG: debug @@ -114,5 +114,5 @@ volumes: driver: local networks: - csf-network: + csfx-network: driver: bridge diff --git a/docker-compose.sdn-controller.yml b/docker-compose.sdn-controller.yml index 79db47a..a4fc790 100644 --- a/docker-compose.sdn-controller.yml +++ b/docker-compose.sdn-controller.yml @@ -9,32 +9,32 @@ x-rust-common: &rust-common - cargo_cache:/usr/local/cargo/registry - cargo_git:/usr/local/cargo/git networks: - - csf-network + - csfx-network restart: unless-stopped services: postgres: image: postgres:16-alpine - container_name: csf-postgres-sdn + container_name: csfx-postgres-sdn environment: - POSTGRES_USER: csf_user - POSTGRES_PASSWORD: csf_password - POSTGRES_DB: csf_core + POSTGRES_USER: csfx_user + POSTGRES_PASSWORD: csfx_password + POSTGRES_DB: csfx_core ports: - "5432:5432" volumes: - postgres_data:/var/lib/postgresql/data healthcheck: - test: ["CMD-SHELL", "pg_isready -U csf_user -d csf_core"] + test: ["CMD-SHELL", "pg_isready -U csfx_user -d csfx_core"] interval: 10s timeout: 5s retries: 5 networks: - - csf-network + - csfx-network etcd: image: gcr.io/etcd-development/etcd:v3.5.21 - container_name: csf-etcd-sdn + container_name: csfx-etcd-sdn command: - etcd - --advertise-client-urls=http://etcd:2379 @@ -42,13 +42,13 @@ services: ports: - "2379:2379" networks: - - csf-network + - csfx-network api-gateway: <<: *rust-common - container_name: csf-api-gateway-sdn + container_name: csfx-api-gateway-sdn environment: - DATABASE_URL: postgres://csf_user:csf_password@postgres:5432/csf_core + DATABASE_URL: postgres://csfx_user:csfx_password@postgres:5432/csfx_core RUST_LOG: debug JWT_SECRET: ${JWT_SECRET:-dev_jwt_secret_change_in_production} RSA_KEY_SIZE: "2048" @@ -74,9 +74,9 @@ services: registry: <<: *rust-common - container_name: csf-registry-sdn + container_name: csfx-registry-sdn environment: - DATABASE_URL: postgres://csf_user:csf_password@postgres:5432/csf_core + DATABASE_URL: postgres://csfx_user:csfx_password@postgres:5432/csfx_core ETCD_ENDPOINTS: http://etcd:2379 REGISTRY_PORT: "8001" RUST_LOG: debug @@ -100,9 +100,9 @@ services: scheduler: <<: *rust-common - container_name: csf-scheduler-sdn + container_name: csfx-scheduler-sdn environment: - DATABASE_URL: postgres://csf_user:csf_password@postgres:5432/csf_core + DATABASE_URL: postgres://csfx_user:csfx_password@postgres:5432/csfx_core ETCD_ENDPOINTS: http://etcd:2379 SCHEDULER_PORT: "8002" RUST_LOG: debug @@ -125,9 +125,9 @@ services: volume-manager: <<: *rust-common - container_name: csf-volume-manager-sdn + container_name: csfx-volume-manager-sdn environment: - DATABASE_URL: postgres://csf_user:csf_password@postgres:5432/csf_core + DATABASE_URL: postgres://csfx_user:csfx_password@postgres:5432/csfx_core ETCD_ENDPOINTS: http://etcd:2379 VOLUME_MANAGER_PORT: "8003" RUST_LOG: debug @@ -150,9 +150,9 @@ services: failover-controller: <<: *rust-common - container_name: csf-failover-controller-sdn + container_name: csfx-failover-controller-sdn environment: - DATABASE_URL: postgres://csf_user:csf_password@postgres:5432/csf_core + DATABASE_URL: postgres://csfx_user:csfx_password@postgres:5432/csfx_core FAILOVER_CONTROLLER_PORT: "8004" SCHEDULER_SERVICE_URL: http://scheduler:8002 VOLUME_MANAGER_URL: http://volume-manager:8003 @@ -178,9 +178,9 @@ services: sdn-controller: <<: *rust-common - container_name: csf-sdn-controller + container_name: csfx-sdn-controller environment: - DATABASE_URL: postgres://csf_user:csf_password@postgres:5432/csf_core + DATABASE_URL: postgres://csfx_user:csfx_password@postgres:5432/csfx_core ETCD_URL: http://etcd:2379 SDN_CONTROLLER_PORT: "8005" RUST_LOG: debug @@ -227,5 +227,5 @@ volumes: driver: local networks: - csf-network: + csfx-network: driver: bridge diff --git a/docker-compose.volume-manager.yml b/docker-compose.volume-manager.yml index 8838a32..ce7312f 100644 --- a/docker-compose.volume-manager.yml +++ b/docker-compose.volume-manager.yml @@ -9,32 +9,32 @@ x-rust-common: &rust-common - cargo_cache:/usr/local/cargo/registry - cargo_git:/usr/local/cargo/git networks: - - csf-network + - csfx-network restart: unless-stopped services: postgres: image: postgres:16-alpine - container_name: csf-postgres-volumes + container_name: csfx-postgres-volumes environment: - POSTGRES_USER: csf_user - POSTGRES_PASSWORD: csf_password - POSTGRES_DB: csf_core + POSTGRES_USER: csfx_user + POSTGRES_PASSWORD: csfx_password + POSTGRES_DB: csfx_core ports: - "5432:5432" volumes: - postgres_data:/var/lib/postgresql/data healthcheck: - test: ["CMD-SHELL", "pg_isready -U csf_user -d csf_core"] + test: ["CMD-SHELL", "pg_isready -U csfx_user -d csfx_core"] interval: 10s timeout: 5s retries: 5 networks: - - csf-network + - csfx-network etcd: image: gcr.io/etcd-development/etcd:v3.5.21 - container_name: csf-etcd-volumes + container_name: csfx-etcd-volumes command: - etcd - --advertise-client-urls=http://etcd:2379 @@ -42,13 +42,13 @@ services: ports: - "2379:2379" networks: - - csf-network + - csfx-network api-gateway: <<: *rust-common - container_name: csf-api-gateway-volumes + container_name: csfx-api-gateway-volumes environment: - DATABASE_URL: postgres://csf_user:csf_password@postgres:5432/csf_core + DATABASE_URL: postgres://csfx_user:csfx_password@postgres:5432/csfx_core RUST_LOG: debug JWT_SECRET: ${JWT_SECRET:-dev_jwt_secret_change_in_production} RSA_KEY_SIZE: "2048" @@ -72,9 +72,9 @@ services: volume-manager: <<: *rust-common - container_name: csf-volume-manager + container_name: csfx-volume-manager environment: - DATABASE_URL: postgres://csf_user:csf_password@postgres:5432/csf_core + DATABASE_URL: postgres://csfx_user:csfx_password@postgres:5432/csfx_core ETCD_ENDPOINTS: http://etcd:2379 VOLUME_MANAGER_PORT: "8003" RUST_LOG: debug @@ -115,5 +115,5 @@ volumes: driver: local networks: - csf-network: + csfx-network: driver: bridge diff --git a/docs/UPDATER_PLAN.md b/docs/UPDATER_PLAN.md index 4da90fa..d907c73 100644 --- a/docs/UPDATER_PLAN.md +++ b/docs/UPDATER_PLAN.md @@ -1,4 +1,4 @@ -# CSF Updater — Architekturplan +# CSFX Updater — Architekturplan ## Aktueller Stand (vollständig analysiert) @@ -9,9 +9,9 @@ - `docker-build.yml`: Trigggert nach erfolgreichem Release-Please-Run **oder** `workflow_dispatch` **oder** `push` auf `develop` - Matrix-Build: 6 Services × 2 Architekturen (amd64 + arm64) via native GitHub Runners (`ubuntu-latest` + `ubuntu-24.04-arm`) - Build-Strategie: `push-by-digest` → separater `manifest`-Job erstellt Multi-Arch-Manifest - - Images landen auf `ghcr.io//csf-ce-:` + `:latest` + - Images landen auf `ghcr.io//csfx-ce-:` + `:latest` - Dockerfile: `control-plane/Dockerfile.prod.shared` mit `cargo-chef` für Layer-Caching - - `build-binaries`-Job: baut `csf-updater` und `csf-agent` als statische musl-Binaries (amd64 + arm64) + - `build-binaries`-Job: baut `csfx-updater` und `csfx-agent` als statische musl-Binaries (amd64 + arm64) - `attach-binaries-release`-Job: uploaded Binaries + SHA256-Dateien zum GitHub Release - `prerelease.yml`: Identischer Flow für `develop`-Branch → Pre-release mit `-alpha.` Tag - `lint.yml`: `cargo clippy -D warnings` + `cargo fmt --check` + `cargo audit` auf PRs und `main` @@ -19,55 +19,55 @@ **Dockerfile-Struktur (`Dockerfile.prod.shared`):** - Stage 1 (`planner`): `cargo chef prepare` — generiert `recipe.json` -- Stage 2 (`builder`): `cargo chef cook` (Dependency-Cache) + `cargo build --profile docker-release --bin --bin csf-migrate` -- Stage 3 (`runtime`): `debian:bookworm-slim`, beide Binaries (`/app/service` + `/csf-migrate`) kopiert -- Build-Arg `CSF_BUILD_VERSION` wird an den Build übergeben (für `build.rs`) +- Stage 2 (`builder`): `cargo chef cook` (Dependency-Cache) + `cargo build --profile docker-release --bin --bin csfx-migrate` +- Stage 3 (`runtime`): `debian:bookworm-slim`, beide Binaries (`/app/service` + `/csfx-migrate`) kopiert +- Build-Arg `CSFX_BUILD_VERSION` wird an den Build übergeben (für `build.rs`) -**`Dockerfile.csf-updater`:** -- Separates Dockerfile nur für `csf-updater`, exportiert Binary via `FROM scratch AS export` -- Wird nicht vom CI verwendet — CI baut `csf-updater` als musl-Binary direkt via `cargo build` +**`Dockerfile.csfx-updater`:** +- Separates Dockerfile nur für `csfx-updater`, exportiert Binary via `FROM scratch AS export` +- Wird nicht vom CI verwendet — CI baut `csfx-updater` als musl-Binary direkt via `cargo build` - Dieses Dockerfile ist totes Deployment-Artefakt, das nicht mehr zum CI-Flow passt ### Runtime-Komponenten -**`csf-updater` Binary** (`control-plane/csf-updater/`): -- Pollt etcd alle N Sekunden auf `/csf/config/desired_cp_version` -- Validiert Semver-Format, setzt `/csf/config/last_update_result` als Statusindikator +**`csfx-updater` Binary** (`control-plane/csfx-updater/`): +- Pollt etcd alle N Sekunden auf `/csfx/config/desired_cp_version` +- Validiert Semver-Format, setzt `/csfx/config/last_update_result` als Statusindikator - Lädt GHCR-Token verschlüsselt aus etcd (AES-256-GCM via `secret.rs`) - Führt `docker compose pull` → Digest-Verify → `docker compose up -d` aus - Digest-Verify: GHCR Registry API (remote) vs. `docker image inspect` (lokal) — aber `local_digest()` macht intern nochmal `docker pull` - Wartet 15s pauschal, prüft dann `docker compose ps` auf unhealthy Services -- Downloadet `csf-agent` und `csf-updater` Binaries von GitHub Releases, verifiziert SHA256, swappt atomar via `rename(2)` +- Downloadet `csfx-agent` und `csfx-updater` Binaries von GitHub Releases, verifiziert SHA256, swappt atomar via `rename(2)` - Startet Units via `sudo systemctl restart ` -**Shell-Fallback** (`deployments/systemd/csf-updater.sh`): +**Shell-Fallback** (`deployments/systemd/csfx-updater.sh`): - Identische Logik in Bash: etcd-Poll via curl + jq, docker-compose-Flow, Digest-Verify - Kein Binary-Download, kein Self-Update - Kein Health-Check nach up (nur `sleep 15` + `jq`-Filter) -**Systemd-Unit** (`deployments/systemd/csf-updater.service`): -- `ExecStart` zeigt auf `csf-updater.sh` (Shell-Script), nicht auf das Rust-Binary +**Systemd-Unit** (`deployments/systemd/csfx-updater.service`): +- `ExecStart` zeigt auf `csfx-updater.sh` (Shell-Script), nicht auf das Rust-Binary - Fehlende Env-Var: `SECRET_ENCRYPTION_KEY` (vom Rust-Binary required, im Shell-Script nicht gebraucht) - `ETCD_ENDPOINT` (Singular) statt `ETCD_ENDPOINTS` (Liste, wie Config erwartet) - Kein Hardening: kein `ProtectSystem`, kein `NoNewPrivileges`, kein `CapabilityBoundingSet` -- User `csf-updater` ist in Gruppe `docker` — kann alle Container auf dem Host steuern +- User `csfx-updater` ist in Gruppe `docker` — kann alle Container auf dem Host steuern --- ## Probleme und Schwachstellen ### P1 — systemd-Unit startet Shell-Script statt Rust-Binary -`ExecStart=/opt/csf/csf-updater.sh` — das Rust-Binary wird gebaut, deployed, aber nie gestartet. +`ExecStart=/opt/csfx/csfx-updater.sh` — das Rust-Binary wird gebaut, deployed, aber nie gestartet. Das Secret-Handling (AES-256-GCM), das persistente etcd-RESULT_KEY-Schreiben und die SHA256-Verify laufen damit in Prod nie. Die Shell-Version hat keine Verschlüsselung und kein Binary-Download. ### P2 — sudo ohne sudoers-Regel bricht in Prod -`restart_unit()` ruft `sudo systemctl restart ` auf. Der User `csf-updater` hat keine sudoers-Regel — jeder Update-Cycle schlägt beim systemctl-Call fehl, ohne Rollback. +`restart_unit()` ruft `sudo systemctl restart ` auf. Der User `csfx-updater` hat keine sudoers-Regel — jeder Update-Cycle schlägt beim systemctl-Call fehl, ohne Rollback. ### P3 — Kein Rollback Wenn `health_check()` einen unhealthy Service meldet, wird `RESULT_KEY` auf `failed` gesetzt und der Cycle endet. Die Services laufen weiterhin mit dem neuen (kaputten) Image. Kein `docker compose up -d` mit dem vorherigen Tag. ### P4 — Self-Update-Race -`update_self_binary()` downloaded das neue Binary und macht `systemctl restart csf-updater`. Der eigene Prozess wird gekillt bevor er `RESULT_KEY = success` schreiben kann — jeder Self-Update-Cycle hinterlässt `in_progress` in etcd. +`update_self_binary()` downloaded das neue Binary und macht `systemctl restart csfx-updater`. Der eigene Prozess wird gekillt bevor er `RESULT_KEY = success` schreiben kann — jeder Self-Update-Cycle hinterlässt `in_progress` in etcd. ### P5 — `last_applied` nur im RAM Nach Crash oder Restart versucht der Updater sofort wieder dieselbe Version zu applyen. Bei einem kaputten Setup → endloser Retry-Loop. @@ -85,16 +85,16 @@ Der Updater reconnected zu etcd jede Poll-Iteration und macht ein synchrones GET In `verify_images()` wird `docker pull --quiet` in `local_digest()` aufgerufen — obwohl `pull()` das Image bereits wenige Sekunden vorher gezogen hat. Verdoppelt die Download-Zeit. ### P10 — Agent-Binary-Update inkompatibel mit NixOS -`update_agent_binary()` schreibt nach `/usr/local/bin/csf-agent` und startet `csf-daemon` neu. Auf NixOS überlebt das Binary keinen `nixos-rebuild switch` — die systemd-Unit zeigt auf einen Nix-Store-Pfad, nicht auf `/usr/local/bin`. Der Ansatz funktioniert nur auf nicht-NixOS-Systemen. +`update_agent_binary()` schreibt nach `/usr/local/bin/csfx-agent` und startet `csfx-daemon` neu. Auf NixOS überlebt das Binary keinen `nixos-rebuild switch` — die systemd-Unit zeigt auf einen Nix-Store-Pfad, nicht auf `/usr/local/bin`. Der Ansatz funktioniert nur auf nicht-NixOS-Systemen. -### P11 — `Dockerfile.csf-updater` ist orphaned -Das separate Dockerfile baut `csf-updater` als statisches Binary, exportiert es via `FROM scratch`. Der CI-Flow (`docker-build.yml`) nutzt es nicht — er baut `csf-updater` direkt via `cargo build --target musl`. Das Dockerfile ist toter Code und führt zu Verwirrung bei der Frage welcher Build-Pfad der kanonische ist. +### P11 — `Dockerfile.csfx-updater` ist orphaned +Das separate Dockerfile baut `csfx-updater` als statisches Binary, exportiert es via `FROM scratch`. Der CI-Flow (`docker-build.yml`) nutzt es nicht — er baut `csfx-updater` direkt via `cargo build --target musl`. Das Dockerfile ist toter Code und führt zu Verwirrung bei der Frage welcher Build-Pfad der kanonische ist. ### P12 — `update-versions.sh` referenziert `backend/Cargo.toml` das nicht existiert -Das Script in `.github/scripts/update-versions.sh` patcht `backend/Cargo.toml`. Das Projekt heißt aber `CSF-Core` mit `Cargo.toml` im Root als Workspace. `backend/` existiert nicht. Das Script ist toter Code aus einem früheren Projekt-Layout. +Das Script in `.github/scripts/update-versions.sh` patcht `backend/Cargo.toml`. Das Projekt heißt aber `CSFX-Core` mit `Cargo.toml` im Root als Workspace. `backend/` existiert nicht. Das Script ist toter Code aus einem früheren Projekt-Layout. -### P13 — `csf-updater` im selben `Dockerfile.prod.shared` wie Services -Der `build`-Job in `docker-build.yml` baut alle 6 Services mit `Dockerfile.prod.shared`. `csf-updater` hat ein eigenes `Dockerfile.csf-updater`. Der `build-binaries`-Job baut `csf-updater` als musl-Binary. Drei verschiedene Build-Pfade für dasselbe Binary — unklar welcher kanonisch ist. +### P13 — `csfx-updater` im selben `Dockerfile.prod.shared` wie Services +Der `build`-Job in `docker-build.yml` baut alle 6 Services mit `Dockerfile.prod.shared`. `csfx-updater` hat ein eigenes `Dockerfile.csfx-updater`. Der `build-binaries`-Job baut `csfx-updater` als musl-Binary. Drei verschiedene Build-Pfade für dasselbe Binary — unklar welcher kanonisch ist. --- @@ -105,11 +105,11 @@ Der `build`-Job in `docker-build.yml` baut alle 6 Services mit `Dockerfile.prod. ``` GitHub Release v1.2.3 → CI baut Images + musl-Binaries - → Images auf ghcr.io//csf-ce-:1.2.3 - → Binaries als Release-Assets (csf-agent-amd64, csf-updater-amd64 etc.) - → Admin setzt etcd: /csf/config/desired_cp_version = "1.2.3" + → Images auf ghcr.io//csfx-ce-:1.2.3 + → Binaries als Release-Assets (csfx-agent-amd64, csfx-updater-amd64 etc.) + → Admin setzt etcd: /csfx/config/desired_cp_version = "1.2.3" -etcd-Watch (kein Poll) triggert csf-updater: +etcd-Watch (kein Poll) triggert csfx-updater: 1. acquire_lock (etcd Lease, 60s TTL) — verhindert parallele Updates 2. pull images (alle 6 Services parallel via goroutines/tasks) 3. verify digests (remote GHCR API vs lokaler docker inspect, KEIN zweiter pull) @@ -126,13 +126,13 @@ bei Fehler in Schritt 4/5: **etcd-Keys:** ``` -/csf/config/desired_cp_version → Zielversion (Admin schreibt diesen Key) -/csf/config/applied_cp_version → zuletzt erfolgreich gerollte Version (persistentes last_applied) -/csf/config/last_update_result → in_progress | success | failed | rolled_back -/csf/config/update_paused → true/false (bereits implementiert) -/csf/config/update_lock → Distributed Lock (etcd Lease) -/csf/config/ghcr_token → AES-256-GCM verschlüsseltes Token (bereits implementiert) -/csf/config/desired_agent_version → Zielversion für csf-agent (Registry liest, Heartbeat trägt aus) +/csfx/config/desired_cp_version → Zielversion (Admin schreibt diesen Key) +/csfx/config/applied_cp_version → zuletzt erfolgreich gerollte Version (persistentes last_applied) +/csfx/config/last_update_result → in_progress | success | failed | rolled_back +/csfx/config/update_paused → true/false (bereits implementiert) +/csfx/config/update_lock → Distributed Lock (etcd Lease) +/csfx/config/ghcr_token → AES-256-GCM verschlüsseltes Token (bereits implementiert) +/csfx/config/desired_agent_version → Zielversion für csfx-agent (Registry liest, Heartbeat trägt aus) ``` ### Schicht 2 — Agent-Updates @@ -142,30 +142,30 @@ bei Fehler in Schritt 4/5: Registry liest desired_agent_version aus etcd → Heartbeat-Response: { desired_version: "1.2.3" } → Agent vergleicht mit env!("CARGO_PKG_VERSION") aus build.rs - → wenn neuer: schreibe /var/lib/csf-daemon/desired_version - → triggere systemctl start csf-agent-update.service (PolicyKit-Regel) + → wenn neuer: schreibe /var/lib/csfx-daemon/desired_version + → triggere systemctl start csfx-agent-update.service (PolicyKit-Regel) → Oneshot-Unit führt nixos-rebuild switch aus - → systemd startet csf-daemon nach rebuild neu (neues Binary aus Nix-Store) + → systemd startet csfx-daemon nach rebuild neu (neues Binary aus Nix-Store) ``` **Nicht-NixOS-Fallback:** ``` Agent: - 1. Download Binary in tmpfile (/var/lib/csf-daemon/csf-agent.new) + 1. Download Binary in tmpfile (/var/lib/csfx-daemon/csfx-agent.new) 2. verifiziere SHA256 gegen Release-Asset 3. chmod 0o750 - 4. rename(2) → atomarer swap nach /var/lib/csf-daemon/csf-agent + 4. rename(2) → atomarer swap nach /var/lib/csfx-daemon/csfx-agent 5. exec() sich selbst (in-place restart, kein PID-Wechsel) - bei exec()-Fehler: systemctl restart csf-daemon via D-Bus (kein sudo) + bei exec()-Fehler: systemctl restart csfx-daemon via D-Bus (kein sudo) ``` -Der `csf-updater` ist nicht zuständig für Agent-Updates. Er schreibt nur `/csf/config/desired_agent_version`. Die Verteilung läuft ausschließlich über den Heartbeat-Mechanismus. +Der `csfx-updater` ist nicht zuständig für Agent-Updates. Er schreibt nur `/csfx/config/desired_agent_version`. Die Verteilung läuft ausschließlich über den Heartbeat-Mechanismus. ### Schicht 3 — Self-Update des Updaters -Empfehlung: `csf-updater` Self-Update entfernen. +Empfehlung: `csfx-updater` Self-Update entfernen. -Begründung: `csf-updater` ist kein Service der laufend upgedatet werden muss. Er wird beim Aufsetzen eines neuen Nodes deployed (via NixOS-Modul oder Ansible). Neue Versionen des Updaters kommen mit dem nächsten Node-Provisioning. Der Self-Update-Race (P4) entfällt komplett. +Begründung: `csfx-updater` ist kein Service der laufend upgedatet werden muss. Er wird beim Aufsetzen eines neuen Nodes deployed (via NixOS-Modul oder Ansible). Neue Versionen des Updaters kommen mit dem nächsten Node-Provisioning. Der Self-Update-Race (P4) entfällt komplett. Falls Self-Update doch gewünscht: `success` + `applied_cp_version` in etcd schreiben, **dann** Binary tauschen + Unit neustarten. Die neue Instanz liest `applied_cp_version` beim Start und überspringt die Version. @@ -174,9 +174,9 @@ Falls Self-Update doch gewünscht: `success` + `applied_cp_version` in etcd schr ## Konkrete Änderungen (priorisiert) ### 1 — systemd-Unit auf Rust-Binary umstellen [blocking] -`ExecStart` von `csf-updater.sh` auf `/usr/local/bin/csf-updater` ändern. +`ExecStart` von `csfx-updater.sh` auf `/usr/local/bin/csfx-updater` ändern. `ETCD_ENDPOINT` → `ETCD_ENDPOINTS` (kommaseparierte Liste). -`SECRET_ENCRYPTION_KEY` als Env-Var ergänzen (aus `/opt/csf/.env`). +`SECRET_ENCRYPTION_KEY` als Env-Var ergänzen (aus `/opt/csfx/.env`). ### 2 — Persistentes `applied_version` in etcd [blocking] Beim Start: `etcd.get(APPLIED_VERSION_KEY)` als initialen `last_applied`. @@ -218,9 +218,9 @@ Fallback-Poll alle 5 Minuten (Watch kann bei Netzwerkproblemen abreißen). Eliminiert das unnötige Reconnect bei jedem Poll-Cycle. ### 8 — sudoers-Datei oder D-Bus-Restart -Einfachste Lösung: `/etc/sudoers.d/90-csf-updater`: +Einfachste Lösung: `/etc/sudoers.d/90-csfx-updater`: ``` -csf-updater ALL=(root) NOPASSWD: /usr/bin/systemctl restart csf-daemon +csfx-updater ALL=(root) NOPASSWD: /usr/bin/systemctl restart csfx-daemon ``` Dieses File muss Teil des NixOS-Moduls / Deployment-Skripts sein. Mittelfristig: `zbus`-Crate für D-Bus-nativen systemd-Unit-Restart ohne sudo. @@ -230,17 +230,17 @@ Mittelfristig: `zbus`-Crate für D-Bus-nativen systemd-Unit-Restart ohne sudo. Agent-Updates laufen via Heartbeat-Response (Schicht 2). Updater-Updates laufen via Node-Provisioning. -### 10 — `Dockerfile.csf-updater` entfernen +### 10 — `Dockerfile.csfx-updater` entfernen Totes Artefakt — CI nutzt es nicht. Verursacht Verwirrung über den kanonischen Build-Pfad. Kanonisch ist `build-binaries`-Job in `docker-build.yml` (musl, statisches Binary). ### 11 — `update-versions.sh` fixen oder entfernen Script referenziert `backend/Cargo.toml` (existiert nicht). Versioning läuft über `release-please` + `Cargo.toml` workspace. Script ist funktionslos, sollte entfernt werden. -### 12 — NixOS-Modul: `csf-agent-update.service` Oneshot-Unit +### 12 — NixOS-Modul: `csfx-agent-update.service` Oneshot-Unit ```nix -systemd.services.csf-agent-update = { - description = "CSF Agent NixOS Update"; +systemd.services.csfx-agent-update = { + description = "CSFX Agent NixOS Update"; serviceConfig = { Type = "oneshot"; ExecStart = "${pkgs.nixos-rebuild}/bin/nixos-rebuild switch"; @@ -250,8 +250,8 @@ systemd.services.csf-agent-update = { security.polkit.extraConfig = '' polkit.addRule(function(action, subject) { if (action.id === "org.freedesktop.systemd1.manage-units" && - action.lookup("unit") === "csf-agent-update.service" && - subject.user === "csf-daemon") { + action.lookup("unit") === "csfx-agent-update.service" && + subject.user === "csfx-daemon") { return polkit.Result.YES; } }); @@ -286,12 +286,12 @@ security.polkit.extraConfig = '' [ ] etcd-Watch in main.rs (mit Fallback-Poll) [ ] sudoers-Datei im Deployment oder D-Bus-basierter Restart [ ] Self-Update (update_agent_binary, update_self_binary) aus updater::run() entfernt -[ ] Dockerfile.csf-updater entfernt +[ ] Dockerfile.csfx-updater entfernt [ ] update-versions.sh entfernt oder auf Workspace-Cargo.toml korrigiert [ ] desired_agent_version in etcd schreiben (Admin-API oder Registry-Seite) [ ] HeartbeatResponse: desired_version Feld ergänzen (Registry + Agent) [ ] Agent: Version-Check + Update-Trigger (NixOS-Pfad + Fallback) -[ ] NixOS-Modul: csf-agent-update.service Oneshot-Unit + PolicyKit-Regel +[ ] NixOS-Modul: csfx-agent-update.service Oneshot-Unit + PolicyKit-Regel [ ] systemd-Unit Hardening (NoNewPrivileges, ProtectSystem, CapabilityBoundingSet) ``` @@ -301,5 +301,5 @@ security.polkit.extraConfig = '' - Watchtower: Dev-only, kein Digest-Verify, kein Rollback — nicht Prod-fähig - Kubernetes-style Rolling Updates pro Replica: nicht relevant, Docker-Compose-Instanz pro Node -- Automatische Datenbankmigrationen im Updater: `csf-migrate` Init-Container ist korrekt und bleibt getrennt +- Automatische Datenbankmigrationen im Updater: `csfx-migrate` Init-Container ist korrekt und bleibt getrennt - Separate Version-Tracks pro Service: alle Services laufen auf derselben Workspace-Version diff --git a/frontend/src/lib/components/auth/login-form-client.svelte b/frontend/src/lib/components/auth/login-form-client.svelte index 4024a09..959bd0e 100644 --- a/frontend/src/lib/components/auth/login-form-client.svelte +++ b/frontend/src/lib/components/auth/login-form-client.svelte @@ -131,7 +131,7 @@

Willkommen zurück

-

Melden Sie sich in Ihrem CSF-Core Konto an

+

Melden Sie sich in Ihrem CSFX-Core Konto an

@@ -204,7 +204,7 @@ class="mx-auto mb-4 w-200 h-200 md:w-200 h-200 lg:w-[240px] h-[240px]" /> -

CSF-Core

+

CSFX-Core

The AI-Ready Business Platform
diff --git a/frontend/src/lib/components/auth/otp-form-client.svelte b/frontend/src/lib/components/auth/otp-form-client.svelte index ccff56a..812003c 100644 --- a/frontend/src/lib/components/auth/otp-form-client.svelte +++ b/frontend/src/lib/components/auth/otp-form-client.svelte @@ -168,11 +168,11 @@
CSF-Core Logo -

CSF-Core

+

CSFX-Core

The AI-Ready Business Platform
diff --git a/frontend/src/lib/components/navbar/app-sidebar.svelte b/frontend/src/lib/components/navbar/app-sidebar.svelte index fa33e3c..749fef8 100644 --- a/frontend/src/lib/components/navbar/app-sidebar.svelte +++ b/frontend/src/lib/components/navbar/app-sidebar.svelte @@ -24,7 +24,7 @@ }, teams: [ { - name: 'CSF Core', + name: 'CSFX Core', plan: 'Premium', }, ], diff --git a/frontend/src/lib/components/navbar/nav-user.svelte b/frontend/src/lib/components/navbar/nav-user.svelte index f39dcdd..62aa4c7 100644 --- a/frontend/src/lib/components/navbar/nav-user.svelte +++ b/frontend/src/lib/components/navbar/nav-user.svelte @@ -71,7 +71,7 @@
{authState.user?.username || 'User'} - CSF-Core + CSFX-Core
@@ -94,7 +94,7 @@
{authState.user?.username || 'User'} - CSF-Core + CSFX-Core
diff --git a/frontend/src/lib/components/navbar/team-switcher.svelte b/frontend/src/lib/components/navbar/team-switcher.svelte index 9bdaced..468fd40 100644 --- a/frontend/src/lib/components/navbar/team-switcher.svelte +++ b/frontend/src/lib/components/navbar/team-switcher.svelte @@ -18,12 +18,12 @@ > CSF-Core Logo
- CSF-Core + CSFX-Core Business Platform
diff --git a/frontend/src/lib/components/settings/UpdateSettings.svelte b/frontend/src/lib/components/settings/UpdateSettings.svelte index ddc6905..d0cc25e 100644 --- a/frontend/src/lib/components/settings/UpdateSettings.svelte +++ b/frontend/src/lib/components/settings/UpdateSettings.svelte @@ -103,7 +103,7 @@ Software-Updates - Überprüfen und installieren Sie CSF-Core Updates + Überprüfen und installieren Sie CSFX-Core Updates @@ -295,7 +295,7 @@

Automatische Update-Prüfung

- CSF-Core prüft automatisch stündlich auf neue Updates. Updates werden nur angezeigt, wenn + CSFX-Core prüft automatisch stündlich auf neue Updates. Updates werden nur angezeigt, wenn sie verfügbar sind.

diff --git a/frontend/src/routes/local-system/+page.svelte b/frontend/src/routes/local-system/+page.svelte index 68cd4d8..26249f2 100644 --- a/frontend/src/routes/local-system/+page.svelte +++ b/frontend/src/routes/local-system/+page.svelte @@ -80,14 +80,14 @@ - Local System - CSF Core + Local System - CSFX Core

Local System

-

Monitor the system running the CSF Core backend daemon

+

Monitor the system running the CSFX Core backend daemon