diff --git a/memory/2026-02-18.md b/memory/2026-02-18.md index 096af1f..6b2bbab 100644 --- a/memory/2026-02-18.md +++ b/memory/2026-02-18.md @@ -67,3 +67,77 @@ - Firewall: ID 10553199 - Load Balancer: ID 5833603, IP 46.225.37.146 - SSH keys: dominik-nb01 (ID 107656266), openclaw-vm (ID 107656268) + +## Coolify 3-Node HA Setup Complete + +### Infrastructure +- **coolify-mgr** (188.34.201.101, 10.0.1.1) — Coolify UI + etcd +- **coolify-w1** (46.225.62.90, 10.0.1.2) — Apps + etcd + Patroni PRIMARY + PgBouncer +- **coolify-w2** (46.224.208.205, 10.0.1.4) — Apps + etcd + Patroni REPLICA + PgBouncer +- Hetzner server ID for w2: 121361614, Coolify UUID: mwccg08sokosk4wgw40g08ok + +### Components +- **etcd 3.5.17** on all 3 nodes (quay.io/coreos/etcd, ARM64 compatible) +- **Patroni + PostgreSQL 16** on workers (custom Docker image `patroni:local`) +- **PgBouncer** (edoburu/pgbouncer) on workers — routes to current primary +- **Watcher** (systemd timer, every 5s) updates PgBouncer config on failover + +### Key Facts +- Docker daemon.json on all nodes: `172.17.0.0/12` pool (fixes 10.0.x conflict with Hetzner private net) +- Infra compose: `/opt/infra/docker-compose.yml` on each node +- Patroni config: `/opt/infra/patroni/patroni.yml` +- PgBouncer config: `/opt/infra/pgbouncer/pgbouncer.ini` +- Watcher script: `/opt/infra/pgbouncer/update-primary.sh` +- Failover log: `/opt/infra/pgbouncer/failover.log` +- `docfast` database created and replicated +- Failover tested: pg1→pg2 promotion + pg1 rejoin as replica ✅ +- Switchover tested: pg2→pg1 clean switchover ✅ +- Cost: €11.67/mo (3x CAX11) + +### Remaining Steps +- [ ] Migrate DocFast data from 167.235.156.214 to Patroni cluster +- [ ] Deploy DocFast app via Coolify on both workers +- [ ] Set up BorgBackup on new nodes +- [ ] Add docfast user SCRAM hash to PgBouncer userlist +- [ ] Create project-scoped API tokens for CEO agents + +## K3s + CloudNativePG Setup Complete + +### Architecture +- **k3s-mgr** (188.34.201.101, 10.0.1.5) — K3s control plane, Hetzner ID 121365837 +- **k3s-w1** (159.69.23.121, 10.0.1.6) — Worker, Hetzner ID 121365839 +- **k3s-w2** (46.225.169.60, 10.0.1.7) — Worker, Hetzner ID 121365840 + +### Cluster Components +- K3s v1.34.4 (Traefik DaemonSet on workers, servicelb disabled) +- CloudNativePG 1.25.1 (operator in cnpg-system namespace) +- cert-manager 1.17.2 (Let's Encrypt ClusterIssuer) +- PostgreSQL 17.4 (CNPG managed, 2 instances, 1 primary + 1 replica) +- PgBouncer Pooler (CNPG managed, 2 instances, transaction mode) + +### Namespaces +- postgres: CNPG cluster + pooler +- docfast: DocFast app deployment +- cnpg-system: CNPG operator +- cert-manager: Certificate management + +### DocFast Deployment +- 2 replicas, one per worker +- Image: docker.io/library/docfast:latest (locally built + imported via k3s ctr) +- DB: main-db-pooler.postgres.svc:5432 +- Health: /health on port 3100 +- 53 API keys migrated from old server + +### Key Learnings +- Docker images must be imported with `k3s ctr images import --all-platforms` (not `ctr -n k3s.io`) +- CNPG tolerations field caused infinite restart loop — removed to fix +- DB table ownership must be set to app user after pg_restore with --no-owner + +### Remaining +- [ ] Switch DNS docfast.dev → worker IP (159.69.23.121 or 46.225.169.60) +- [ ] TLS cert will auto-complete after DNS switch +- [ ] Update Stripe webhook endpoint if needed +- [ ] Set up CI/CD pipeline for automated deploys +- [ ] Create CEO namespace RBAC +- [ ] Decommission old server (167.235.156.214) +- [ ] Clean up Docker from workers (only needed containerd/K3s) diff --git a/memory/portfolio.json b/memory/portfolio.json index 6d4c4b7..09948bc 100644 --- a/memory/portfolio.json +++ b/memory/portfolio.json @@ -54,10 +54,10 @@ "created": "2026-02-12T20:00:00Z", "lastUpdated": "2026-02-17T16:15:00Z", "closingSnapshot": { - "date": "2026-02-17", - "DFNS": 57.01, - "portfolioValue": 1014.27, - "dailyPL": 0.07, - "totalReturn": 1.43 + "date": "2026-02-18", + "DFNS": 57.50, + "portfolioValue": 1022.58, + "dailyPL": 0.86, + "totalReturn": 2.26 } } diff --git a/scripts/coolify-setup.sh b/scripts/coolify-setup.sh new file mode 100755 index 0000000..aafa222 --- /dev/null +++ b/scripts/coolify-setup.sh @@ -0,0 +1,166 @@ +#!/bin/bash +# Coolify Infrastructure Setup via Hetzner Cloud API +set -euo pipefail + +# Load API key from services.env +CRED_FILE="${HOME}/.openclaw/workspace/.credentials/services.env" +HETZNER_TOKEN=$(grep '^COOLIFY_HETZNER_API_KEY=' "$CRED_FILE" | cut -d= -f2- || true) + +if [ -z "$HETZNER_TOKEN" ]; then + echo "ERROR: COOLIFY_HETZNER_API_KEY not found in services.env" + exit 1 +fi + +API="https://api.hetzner.cloud/v1" +AUTH="Authorization: Bearer $HETZNER_TOKEN" + +hcloud() { + curl -s -H "$AUTH" -H "Content-Type: application/json" "$@" +} + +echo "=== Coolify Infrastructure Setup ===" + +# Step 1: Upload SSH keys +echo "" +echo "--- Step 1: SSH Keys ---" + +# User's key +USER_KEY="ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFshMhXwS0FQFPlITipshvNKrV8sA52ZFlnaoHd1thKg dominik@nb-01" +# OpenClaw key +OPENCLAW_KEY="ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL+i4Nn0Nc1ovqHXmbyekxCigT2Qn6RD1cdbKkW727Yl openclaw@openclaw-vm" + +# Check if keys already exist +EXISTING_KEYS=$(hcloud "$API/ssh_keys" | jq -r '.ssh_keys[].name' 2>/dev/null || true) + +upload_key() { + local name="$1" key="$2" + if echo "$EXISTING_KEYS" | grep -q "^${name}$"; then + hcloud "$API/ssh_keys" | jq -r ".ssh_keys[] | select(.name==\"$name\") | .id" + else + echo " Uploading key '$name'..." >&2 + RESP=$(hcloud -X POST "$API/ssh_keys" -d "{\"name\":\"$name\",\"public_key\":\"$key\"}") + local kid=$(echo "$RESP" | jq -r '.ssh_key.id') + if [ "$kid" = "null" ] || [ -z "$kid" ]; then + echo " ERROR: $(echo "$RESP" | jq -r '.error.message // "Unknown error"')" >&2 + exit 1 + fi + echo " Uploaded: ID $kid" >&2 + echo "$kid" + fi +} + +USER_KEY_ID=$(upload_key "dominik-nb01" "$USER_KEY") +OPENCLAW_KEY_ID=$(upload_key "openclaw-vm" "$OPENCLAW_KEY") + +echo " User key ID: $USER_KEY_ID" +echo " OpenClaw key ID: $OPENCLAW_KEY_ID" + +# Step 2: Create Network +echo "" +echo "--- Step 2: Private Network ---" + +EXISTING_NET=$(hcloud "$API/networks" | jq -r '.networks[] | select(.name=="coolify-net") | .id' 2>/dev/null || echo "") + +if [ -n "$EXISTING_NET" ]; then + echo " Network 'coolify-net' already exists: ID $EXISTING_NET" + NET_ID="$EXISTING_NET" +else + echo " Creating network 'coolify-net' (10.0.0.0/16)..." + RESP=$(hcloud -X POST "$API/networks" -d '{ + "name": "coolify-net", + "ip_range": "10.0.0.0/16", + "subnets": [{"type": "cloud", "network_zone": "eu-central", "ip_range": "10.0.1.0/24"}] + }') + NET_ID=$(echo "$RESP" | jq '.network.id') + if [ "$NET_ID" = "null" ] || [ -z "$NET_ID" ]; then + echo " ERROR: $(echo "$RESP" | jq -r '.error.message // "Unknown error"')" + exit 1 + fi + echo " Created: ID $NET_ID" +fi + +# Step 3: Create Servers +echo "" +echo "--- Step 3: Servers ---" + +create_server() { + local name="$1" + + EXISTING=$(hcloud "$API/servers" | jq -r ".servers[] | select(.name==\"$name\") | .id" 2>/dev/null || echo "") + if [ -n "$EXISTING" ]; then + echo " Server '$name' already exists: ID $EXISTING" + IP=$(hcloud "$API/servers/$EXISTING" | jq -r '.server.public_net.ipv4.ip') + echo " IP: $IP" + return + fi + + echo " Creating server '$name' (CAX11, ARM64, Ubuntu 24.04, fsn1)..." + RESP=$(hcloud -X POST "$API/servers" -d "{ + \"name\": \"$name\", + \"server_type\": \"cax11\", + \"image\": \"ubuntu-24.04\", + \"location\": \"fsn1\", + \"ssh_keys\": [$USER_KEY_ID, $OPENCLAW_KEY_ID], + \"networks\": [$NET_ID], + \"public_net\": {\"enable_ipv4\": true, \"enable_ipv6\": true}, + \"labels\": {\"project\": \"coolify\", \"role\": \"$(echo $name | sed 's/coolify-//')\"} + }") + + SERVER_ID=$(echo "$RESP" | jq '.server.id') + IP=$(echo "$RESP" | jq -r '.server.public_net.ipv4.ip') + + if [ "$SERVER_ID" = "null" ] || [ -z "$SERVER_ID" ]; then + echo " ERROR: $(echo "$RESP" | jq -r '.error.message // "Unknown error"')" + exit 1 + fi + + echo " Created: ID $SERVER_ID, IP $IP" +} + +create_server "coolify-1" +create_server "coolify-2" + +# Step 4: Create Firewall +echo "" +echo "--- Step 4: Firewall ---" + +EXISTING_FW=$(hcloud "$API/firewalls" | jq -r '.firewalls[] | select(.name=="coolify-fw") | .id' 2>/dev/null || echo "") + +if [ -n "$EXISTING_FW" ]; then + echo " Firewall 'coolify-fw' already exists: ID $EXISTING_FW" +else + echo " Creating firewall 'coolify-fw'..." + RESP=$(hcloud -X POST "$API/firewalls" -d '{ + "name": "coolify-fw", + "rules": [ + {"direction": "in", "protocol": "tcp", "port": "22", "source_ips": ["0.0.0.0/0", "::/0"], "description": "SSH"}, + {"direction": "in", "protocol": "tcp", "port": "80", "source_ips": ["0.0.0.0/0", "::/0"], "description": "HTTP"}, + {"direction": "in", "protocol": "tcp", "port": "443", "source_ips": ["0.0.0.0/0", "::/0"], "description": "HTTPS"}, + {"direction": "in", "protocol": "tcp", "port": "8000", "source_ips": ["0.0.0.0/0", "::/0"], "description": "Coolify UI"} + ] + }') + FW_ID=$(echo "$RESP" | jq '.firewall.id') + echo " Created: ID $FW_ID" + + # Apply to both servers + echo " Applying firewall to servers..." + SERVER_IDS=$(hcloud "$API/servers" | jq '[.servers[] | select(.name | startswith("coolify-")) | .id]') + for SID in $(echo "$SERVER_IDS" | jq -r '.[]'); do + hcloud -X POST "$API/firewalls/$FW_ID/actions/apply_to_resources" \ + -d "{\"apply_to\": [{\"type\": \"server\", \"server\": {\"id\": $SID}}]}" > /dev/null + echo " Applied to server $SID" + done +fi + +# Summary +echo "" +echo "=== DONE ===" +echo "" +echo "Servers:" +for name in coolify-1 coolify-2; do + IP=$(hcloud "$API/servers" | jq -r ".servers[] | select(.name==\"$name\") | .public_net.ipv4.ip") + PRIV_IP=$(hcloud "$API/servers" | jq -r ".servers[] | select(.name==\"$name\") | .private_net[0].ip // \"pending\"") + echo " $name: public=$IP private=$PRIV_IP" +done +echo "" +echo "Next: Wait ~60s for servers to boot, then install Coolify on coolify-1"