Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions .github/workflows/cd.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
name: CD

permissions: {}

on:
release:
types: [published]

jobs:
deploy:
name: Deploy
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Check for major version upgrade
run: |
VERSION="${{ github.event.release.tag_name }}"
VERSION="${VERSION#v}"
MAJOR="${VERSION%%.*}"

if [ -z "${{ vars.DEPLOYED_MAJOR_VERSION }}" ]; then
echo "::error::DEPLOYED_MAJOR_VERSION repository variable is not set."
exit 1
fi

if [ "$MAJOR" != "${{ vars.DEPLOYED_MAJOR_VERSION }}" ]; then
echo "::error::Major version upgrade detected (${{ vars.DEPLOYED_MAJOR_VERSION }} -> $MAJOR). Manual deployment required."
exit 1
fi
Comment thread
dargmuesli marked this conversation as resolved.

- name: Deploy to manager
env:
DEPLOY_TAG: ${{ github.event.release.tag_name }}
MANAGER_IPV6: ${{ secrets.MANAGER_IPV6 }}
SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
run: |
mkdir -p ~/.ssh
echo "$SSH_PRIVATE_KEY" > ~/.ssh/deploy_key
chmod 600 ~/.ssh/deploy_key
# TOFU: For stronger assurance, store the manager's host key fingerprint
# as a GitHub secret and write it to known_hosts instead of scanning.
ssh-keyscan -H "$MANAGER_IPV6" >> ~/.ssh/known_hosts 2>/dev/null

Comment thread
dargmuesli marked this conversation as resolved.
ssh -i ~/.ssh/deploy_key "root@$MANAGER_IPV6" bash -s -- "$(printf '%q' "$DEPLOY_TAG")" << 'DEPLOY'
Comment on lines +37 to +45
set -euo pipefail
DEPLOY_TAG="$1"
cd /opt/vibetype
git fetch origin --tags
git checkout -- . && git checkout "$DEPLOY_TAG"
bash src/production/terraform/scripts/create-secrets.sh
bash src/production/terraform/scripts/generate-env.sh
dargstack deploy -p "$DEPLOY_TAG" --offline
DEPLOY

rm -f ~/.ssh/deploy_key
4 changes: 4 additions & 0 deletions .sops.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
creation_rules:
- path_regex: (secrets\.enc\.yaml|terraform\.tfvars\.enc\.yaml)$
# Replace with the age public key from `age-keygen` output (e.g. age1abc...)
age: "<your-age-public-key>"
Comment thread
dargmuesli marked this conversation as resolved.
45 changes: 45 additions & 0 deletions secrets.example.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copy this file to secrets.enc.yaml, fill in the values, and encrypt:
# cp secrets.example.yaml secrets.enc.yaml
# sops -e -i secrets.enc.yaml
elasticsearch-keystore_password: ""
elasticsearch-password: ""
env_CLOUDFLARED_TUNNEL_TOKEN: ""
env_SENTRY_CRONS: ""
env_TRAEFIK_ACME_EMAIL: ""
grafana_admin_email: ""
grafana_admin_password: ""
grafana_admin_user: ""
grafana_discord_webhook: ""
jobber_aliases: ""
jobber_aws-bucket: ""
jobber_aws-configuration: ""
jobber_aws-credentials: ""
jobber_msmtprc: ""
portainer_admin-password: ""
postgraphile_connection: ""
postgraphile_jwt-secret: ""
postgraphile_owner-connection: ""
postgres-backup_db: ""
postgres_db: ""
postgres_password: ""
postgres_role_service_grafana_password: ""
postgres_role_service_grafana_username: ""
postgres_role_service_postgraphile_password: ""
postgres_role_service_postgraphile_username: ""
postgres_role_service_vibetype_password: ""
postgres_role_service_vibetype_username: ""
postgres_role_service_zammad_password: ""
postgres_role_service_zammad_username: ""
postgres_user: ""
reccoom_ingest-api-key: ""
reccoom_openai-api-key: ""
sqitch_target: ""
traefik_cf-dns-api-token: ""
traefik_cf-zone-api-token: ""
tusd_aws: ""
vibetype_api-notification-secret: ""
vibetype_aws-credentials: ""
vibetype_firebase-service-account-credentials: ""
vibetype_monday: ""
vibetype_openai-api-key: ""
vibetype_turnstile-key: ""
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
apiVersion: 1
groups:
- orgId: 1
name: Infrastructure
folder: Infrastructure
interval: 1m
rules:
- uid: alert-cpu-high
title: High CPU usage
condition: C
data:
- refId: A
relativeTimeRange:
from: 300
to: 0
datasourceUid: prometheus
model:
expr: 100 - (avg by (instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100)
intervalMs: 15000
maxDataPoints: 43200
refId: A
- refId: B
datasourceUid: __expr__
model:
expression: A
reducer: mean
refId: B
type: reduce
- refId: C
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 85
type: gt
expression: B
refId: C
type: threshold
noDataState: NoData
execErrState: Error
for: 5m
annotations:
summary: CPU usage is above 85% for more than 5 minutes on {{ $labels.instance }}.
labels:
severity: warning
isPaused: false
notification_settings:
receiver: 'Discord'
- uid: alert-memory-high
title: High memory usage
condition: C
data:
- refId: A
relativeTimeRange:
from: 300
to: 0
datasourceUid: prometheus
model:
expr: (1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * 100
intervalMs: 15000
maxDataPoints: 43200
refId: A
- refId: B
datasourceUid: __expr__
model:
expression: A
reducer: mean
refId: B
type: reduce
- refId: C
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 90
type: gt
expression: B
refId: C
type: threshold
noDataState: NoData
execErrState: Error
for: 5m
annotations:
summary: Memory usage is above 90% for more than 5 minutes on {{ $labels.instance }}.
labels:
severity: warning
isPaused: false
notification_settings:
receiver: 'Discord'
- uid: alert-disk-low
title: Low disk space
condition: C
data:
- refId: A
relativeTimeRange:
from: 300
to: 0
datasourceUid: prometheus
model:
expr: (1 - node_filesystem_avail_bytes{fstype!~"tmpfs|overlay"} / node_filesystem_size_bytes{fstype!~"tmpfs|overlay"}) * 100
intervalMs: 15000
maxDataPoints: 43200
refId: A
- refId: B
datasourceUid: __expr__
model:
expression: A
reducer: max
refId: B
type: reduce
- refId: C
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 85
type: gt
expression: B
refId: C
type: threshold
noDataState: NoData
execErrState: Error
for: 5m
annotations:
summary: Disk usage is above 85% on {{ $labels.instance }} ({{ $labels.mountpoint }}).
labels:
severity: warning
isPaused: false
notification_settings:
receiver: 'Discord'
- uid: alert-container-restart
title: Container restart loop
condition: C
data:
- refId: A
relativeTimeRange:
from: 3600
to: 0
datasourceUid: prometheus
model:
expr: changes(container_start_time_seconds{name=~".+"}[1h])
intervalMs: 15000
maxDataPoints: 43200
refId: A
- refId: B
datasourceUid: __expr__
model:
expression: A
reducer: max
refId: B
type: reduce
- refId: C
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 5
type: gt
expression: B
refId: C
type: threshold
noDataState: NoData
execErrState: Error
for: 0s
annotations:
summary: Container {{ $labels.name }} has restarted more than 5 times in the last hour.
labels:
severity: critical
isPaused: false
notification_settings:
receiver: 'Discord'
- uid: alert-service-down
title: Service down
condition: C
data:
- refId: A
relativeTimeRange:
from: 300
to: 0
datasourceUid: prometheus
model:
expr: up == 0
intervalMs: 15000
maxDataPoints: 43200
refId: A
- refId: B
datasourceUid: __expr__
model:
expression: A
reducer: last
refId: B
type: reduce
- refId: C
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 0
type: gt
expression: B
refId: C
type: threshold
noDataState: NoData
execErrState: Error
for: 2m
annotations:
summary: Prometheus target {{ $labels.instance }} (job {{ $labels.job }}) is down.
labels:
severity: critical
isPaused: false
notification_settings:
receiver: 'Discord'
4 changes: 2 additions & 2 deletions src/production/production.env.template
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
CLOUDFLARED_TUNNEL_TOKEN=
SENTRY_CRONS=
STACK_DOMAIN=
STACK_DOMAIN=vibetype.app
TRAEFIK_ACME_EMAIL=
TRAEFIK_ACME_PROVIDER=
TRAEFIK_ACME_PROVIDER=cloudflare
Loading
Loading