diff --git a/custom-domain/dstack-ingress/Dockerfile b/custom-domain/dstack-ingress/Dockerfile index 95fe693..a018ac9 100644 --- a/custom-domain/dstack-ingress/Dockerfile +++ b/custom-domain/dstack-ingress/Dockerfile @@ -1,4 +1,8 @@ -FROM nginx@sha256:b6653fca400812e81569f9be762ae315db685bc30b12ddcdc8616c63a227d3ca +FROM haproxy@sha256:49a0a0d6f0b8b7e59c233b06eefab1564f2c8d64f673554d368fd7d2ab4b2c2d + +# haproxy image runs as non-root (uid 99) by default; we need root for +# certbot, DNS management, and writing to /etc/haproxy/certs. +USER root RUN --mount=type=bind,source=pinned-packages.txt,target=/tmp/pinned-packages.txt,ro \ set -e; \ @@ -26,15 +30,17 @@ RUN --mount=type=bind,source=pinned-packages.txt,target=/tmp/pinned-packages.txt python3.11-venv \ curl \ jq \ - coreutils && \ + coreutils \ + mini-httpd && \ rm -rf /var/lib/apt/lists/* /var/log/* /var/cache/ldconfig/aux-cache RUN mkdir -p \ /etc/letsencrypt \ /var/www/certbot \ - /usr/share/nginx/html \ - /etc/nginx/conf.d \ - /var/log/nginx + /etc/haproxy/certs \ + /var/run/haproxy \ + /var/lib/haproxy \ + /evidences # Install scripts with deterministic permissions via bind mount RUN --mount=type=bind,source=scripts,target=/tmp/scripts,ro \ @@ -62,4 +68,4 @@ ENV PYTHONUNBUFFERED=1 COPY --chmod=666 .GIT_REV /etc/ ENTRYPOINT ["/scripts/entrypoint.sh"] -CMD ["nginx", "-g", "daemon off;"] +CMD ["haproxy", "-W", "-f", "/etc/haproxy/haproxy.cfg"] diff --git a/custom-domain/dstack-ingress/README.md b/custom-domain/dstack-ingress/README.md index d166cd2..d6a2bfc 100644 --- a/custom-domain/dstack-ingress/README.md +++ b/custom-domain/dstack-ingress/README.md @@ -1,57 +1,28 @@ -# Custom Domain Setup for dstack Applications +# dstack-ingress -This repository provides a solution for setting up custom domains with automatic SSL certificate management for dstack applications using various DNS providers and Let's Encrypt. +TCP proxy with automatic TLS termination for dstack applications. ## Overview -This project enables you to run dstack applications with your own custom domain, complete with: +dstack-ingress is a HAProxy-based L4 (TCP) proxy that provides: - Automatic SSL certificate provisioning and renewal via Let's Encrypt -- Multi-provider DNS support (Cloudflare, Linode DNS, more to come) -- Automatic DNS configuration for CNAME, TXT, and CAA records -- Nginx reverse proxy to route traffic to your application -- Certificate evidence generation for verification -- Strong SSL/TLS configuration with modern cipher suites (AES-GCM and ChaCha20-Poly1305) +- Multi-provider DNS support (Cloudflare, Linode DNS, Namecheap) +- Pure TCP proxying — all protocols (HTTP, WebSocket, gRPC, arbitrary TCP) work transparently +- Wildcard domain support +- SNI-based multi-domain routing +- Certificate evidence generation for TEE attestation verification +- Strong TLS configuration (TLS 1.2+, AES-GCM, ChaCha20-Poly1305) ## How It Works -The dstack-ingress system provides a seamless way to set up custom domains for dstack applications with automatic SSL certificate management. Here's how it works: +1. **Bootstrap**: On first start, obtains SSL certificates from Let's Encrypt using DNS-01 validation and configures DNS records (CNAME, TXT, optional CAA). -1. **Initial Setup**: +2. **TLS Termination**: HAProxy terminates TLS and forwards the decrypted TCP stream to your backend. No HTTP inspection — the proxy operates entirely at L4. - - When first deployed, the container automatically obtains SSL certificates from Let's Encrypt using DNS validation - - It configures your DNS provider by creating necessary CNAME, TXT, and optional CAA records - - Nginx is configured to use the obtained certificates and proxy requests to your application +3. **Certificate Renewal**: A background daemon checks for renewal every 12 hours. On renewal, HAProxy is gracefully reloaded with zero downtime. -2. **DNS Configuration**: - - - A CNAME record is created to point your custom domain to the dstack gateway domain - - A TXT record is added with application identification information to help dstack-gateway to route traffic to your application - - If enabled, CAA records are set to restrict which Certificate Authorities can issue certificates for your domain - - The system automatically detects your DNS provider based on environment variables - -3. **Certificate Management**: - - - SSL certificates are automatically obtained during initial setup - - A simple background daemon checks for certificate renewal every 12 hours - - When certificates are renewed, Nginx is automatically reloaded to use the new certificates - - Uses a simple sleep loop instead of cron for reliability and easier debugging in containers - -4. **Evidence Generation**: - - The system generates evidence files for verification purposes - - These include the ACME account information and certificate data - - Evidence files are accessible through a dedicated endpoint - -## Features - -### Multi-Domain Support (New!) - -The dstack-ingress now supports multiple domains in a single container: - -- **Single Domain Mode** (backward compatible): Use `DOMAIN` and `TARGET_ENDPOINT` environment variables -- **Multi-Domain Mode**: Use `DOMAINS` environment variable with custom nginx configurations in `/etc/nginx/conf.d/` -- Each domain gets its own SSL certificate -- Flexible nginx configuration per domain +4. **Evidence Generation**: Generates cryptographically linked attestation evidence (ACME account, certificates, TDX quote) for TEE verification. ### Wildcard Domain Support @@ -88,56 +59,50 @@ volumes: ## Usage -### Prerequisites - -- Host your domain on one of the supported DNS providers -- Have appropriate API credentials for your DNS provider (see [DNS Provider Configuration](DNS_PROVIDERS.md) for details) - -### Deployment - -You can either build the ingress container and push it to docker hub, or use the prebuilt image at `dstacktee/dstack-ingress:20250924`. - -#### Option 1: Use the Pre-built Image - -The fastest way to get started is to use our pre-built image. Simply use the following docker-compose configuration: +### Single Domain ```yaml services: dstack-ingress: - image: dstacktee/dstack-ingress:20250929@sha256:2b47b3e538df0b3e7724255b89369194c8c83a7cfba64d2faf0115ad0a586458 + image: dstacktee/dstack-ingress:latest ports: - "443:443" environment: - # DNS Provider - DNS_PROVIDER=cloudflare - - # Cloudflare example - CLOUDFLARE_API_TOKEN=${CLOUDFLARE_API_TOKEN} - - # Common configuration - DOMAIN=${DOMAIN} - GATEWAY_DOMAIN=${GATEWAY_DOMAIN} - CERTBOT_EMAIL=${CERTBOT_EMAIL} - SET_CAA=true - - TARGET_ENDPOINT=http://app:80 + - TARGET_ENDPOINT=app:80 volumes: - /var/run/dstack.sock:/var/run/dstack.sock - /var/run/tappd.sock:/var/run/tappd.sock - cert-data:/etc/letsencrypt + - evidences:/evidences restart: unless-stopped + app: - image: nginx # Replace with your application image + image: your-app + volumes: + - evidences:/evidences:ro restart: unless-stopped + volumes: - cert-data: # Persistent volume for certificates + cert-data: + evidences: ``` -### Multi-Domain Configuration +`TARGET_ENDPOINT` accepts bare `host:port` (preferred) or with protocol prefix (`http://app:80`, `grpc://app:50051`). The protocol prefix is stripped — HAProxy forwards raw TCP regardless of protocol. + +### Multi-Domain with Routing + +Use `ROUTING_MAP` to route different domains to different backends via SNI: ```yaml services: ingress: - image: dstacktee/dstack-ingress:20250929@sha256:2b47b3e538df0b3e7724255b89369194c8c83a7cfba64d2faf0115ad0a586458 + image: dstacktee/dstack-ingress:latest ports: - "443:443" environment: @@ -147,187 +112,107 @@ services: GATEWAY_DOMAIN: _.dstack-prod5.phala.network SET_CAA: true DOMAINS: | - ${APP_DOMAIN} - ${API_DOMAIN} - + app.example.com + api.example.com + ROUTING_MAP: | + app.example.com=app-main:80 + api.example.com=app-api:8080 volumes: - /var/run/tappd.sock:/var/run/tappd.sock - letsencrypt:/etc/letsencrypt - - configs: - - source: app_conf - target: /etc/nginx/conf.d/app.conf - mode: 0444 - - source: api_conf - target: /etc/nginx/conf.d/api.conf - mode: 0444 - + - evidences:/evidences restart: unless-stopped app-main: image: nginx + volumes: + - evidences:/evidences:ro restart: unless-stopped app-api: - image: nginx + image: your-api + volumes: + - evidences:/evidences:ro restart: unless-stopped volumes: letsencrypt: - -configs: - app_conf: - content: | - server { - listen 443 ssl; - server_name ${APP_DOMAIN}; - ssl_certificate /etc/letsencrypt/live/${APP_DOMAIN}/fullchain.pem; - ssl_certificate_key /etc/letsencrypt/live/${APP_DOMAIN}/privkey.pem; - location / { - proxy_pass http://app-main:80; - } - } - api_conf: - content: | - server { - listen 443 ssl; - server_name ${API_DOMAIN}; - ssl_certificate /etc/letsencrypt/live/${API_DOMAIN}/fullchain.pem; - ssl_certificate_key /etc/letsencrypt/live/${API_DOMAIN}/privkey.pem; - location / { - proxy_pass http://app-api:80; - } - } + evidences: ``` -**Core Environment Variables:** - -- `DNS_PROVIDER`: DNS provider to use (cloudflare, linode) -- `DOMAIN`: Your custom domain (for single domain mode) -- `DOMAINS`: Multiple domains, one per line (supports environment variable substitution like `${APP_DOMAIN}`) -- `GATEWAY_DOMAIN`: The dstack gateway domain (e.g. `_.dstack-prod5.phala.network` for Phala Cloud) -- `CERTBOT_EMAIL`: Your email address used in Let's Encrypt certificate requests -- `TARGET_ENDPOINT`: The plain HTTP endpoint of your dstack application (for single domain mode) -- `SET_CAA`: Set to `true` to enable CAA record setup -- `CLIENT_MAX_BODY_SIZE`: Optional value for nginx `client_max_body_size` (numeric with optional `k|m|g` suffix, e.g. `50m`) in single-domain mode -- `PROXY_READ_TIMEOUT`: Optional value for nginx `proxy_read_timeout` (numeric with optional `s|m|h` suffix, e.g. `30s`) in single-domain mode -- `PROXY_SEND_TIMEOUT`: Optional value for nginx `proxy_send_timeout` (numeric with optional `s|m|h` suffix, e.g. `30s`) in single-domain mode -- `PROXY_CONNECT_TIMEOUT`: Optional value for nginx `proxy_connect_timeout` (numeric with optional `s|m|h` suffix, e.g. `10s`) in single-domain mode -- `PROXY_BUFFER_SIZE`: Optional value for nginx `proxy_buffer_size` (numeric with optional `k|m` suffix, e.g. `128k`) in single-domain mode -- `PROXY_BUFFERS`: Optional value for nginx `proxy_buffers` (format: `number size`, e.g. `4 256k`) in single-domain mode -- `PROXY_BUSY_BUFFERS_SIZE`: Optional value for nginx `proxy_busy_buffers_size` (numeric with optional `k|m` suffix, e.g. `256k`) in single-domain mode -- `CERTBOT_STAGING`: Optional; set this value to the string `true` to set the `--staging` server option on the [`certbot` cli](https://eff-certbot.readthedocs.io/en/stable/using.html#certbot-command-line-options) - -**Backward Compatibility:** - -- If both `DOMAIN` and `TARGET_ENDPOINT` are set, the system operates in single-domain mode with auto-generated nginx config -- If `DOMAINS` is set, the system operates in multi-domain mode and expects custom nginx configs in `/etc/nginx/conf.d/` -- You can use both modes simultaneously - -For provider-specific configuration details, see [DNS Provider Configuration](DNS_PROVIDERS.md). - -#### Option 2: Build Your Own Image - -If you prefer to build the image yourself: +### Wildcard Domains -1. Clone this repository -2. Build the Docker image using the provided build script: +Wildcard certificates work out of the box with DNS-01 validation: -```bash -./build-image.sh yourusername/dstack-ingress:tag -``` - -**Important**: You must use the `build-image.sh` script to build the image. This script ensures reproducible builds with: - -- Specific buildkit version (v0.20.2) -- Deterministic timestamps (`SOURCE_DATE_EPOCH=0`) -- Package pinning for consistency -- Git revision tracking - -Direct `docker build` commands will not work properly due to the specialized build requirements. - -3. Push to your registry (optional): - -```bash -docker push yourusername/dstack-ingress:tag +```yaml +environment: + - DOMAIN=*.example.com + - TARGET_ENDPOINT=app:80 ``` -4. Update the docker-compose.yaml file with your image name and deploy +## Environment Variables -#### gRPC Support +### Required -If your dstack application uses gRPC, you can set `TARGET_ENDPOINT` to `grpc://app:50051`. - -example: - -```yaml -services: - dstack-ingress: - image: dstacktee/dstack-ingress:20250929@sha256:2b47b3e538df0b3e7724255b89369194c8c83a7cfba64d2faf0115ad0a586458 - ports: - - "443:443" - environment: - - CLOUDFLARE_API_TOKEN=${CLOUDFLARE_API_TOKEN} - - DOMAIN=${DOMAIN} - - GATEWAY_DOMAIN=${GATEWAY_DOMAIN} - - CERTBOT_EMAIL=${CERTBOT_EMAIL} - - SET_CAA=true - - TARGET_ENDPOINT=grpc://app:50051 - volumes: - - /var/run/dstack.sock:/var/run/dstack.sock - - /var/run/tappd.sock:/var/run/tappd.sock - - cert-data:/etc/letsencrypt - restart: unless-stopped - app: - image: your-grpc-app - restart: unless-stopped -volumes: - cert-data: -``` +| Variable | Description | +|----------|-------------| +| `DOMAIN` | Your domain (single-domain mode). Supports wildcards (`*.example.com`) | +| `TARGET_ENDPOINT` | Backend address, e.g. `app:80` or `http://app:80` | +| `GATEWAY_DOMAIN` | dstack gateway domain (e.g. `_.dstack-prod5.phala.network`) | +| `CERTBOT_EMAIL` | Email for Let's Encrypt registration | +| `DNS_PROVIDER` | DNS provider (`cloudflare`, `linode`, `namecheap`) | -## Domain Attestation and Verification +### Optional -The dstack-ingress system provides mechanisms to verify and attest that your custom domain endpoint is secure and properly configured. This comprehensive verification approach ensures the integrity and authenticity of your application. +| Variable | Default | Description | +|----------|---------|-------------| +| `PORT` | `443` | HAProxy listen port | +| `DOMAINS` | | Multiple domains, one per line | +| `ROUTING_MAP` | | Multi-domain routing: `domain=host:port` per line | +| `SET_CAA` | `false` | Enable CAA DNS record | +| `TXT_PREFIX` | `_dstack-app-address` | DNS TXT record prefix | +| `CERTBOT_STAGING` | `false` | Use Let's Encrypt staging server | +| `MAXCONN` | `4096` | HAProxy max connections | +| `TIMEOUT_CONNECT` | `10s` | Backend connect timeout | +| `TIMEOUT_CLIENT` | `86400s` | Client-side timeout (24h for long-lived connections) | +| `TIMEOUT_SERVER` | `86400s` | Server-side timeout | +| `EVIDENCE_SERVER` | `true` | Serve evidence files at `/evidences/` on the TLS port | +| `EVIDENCE_PORT` | `80` | Internal port for evidence HTTP server | +| `ALPN` | | TLS ALPN protocols (e.g. `h2,http/1.1`). Only set if backends support h2c | -### Evidence Collection +For DNS provider credentials, see [DNS_PROVIDERS.md](DNS_PROVIDERS.md). -When certificates are issued or renewed, the system automatically generates a set of cryptographically linked evidence files: +## Evidence & Attestation -1. **Access Evidence Files**: +Evidence files are served at `https://your-domain.com/evidences/` by default (via payload inspection in HAProxy's TCP mode). They can also be accessed by the backend application through the shared `/evidences` volume. - - Evidence files are accessible at `https://your-domain.com/evidences/` - - Key files include `acme-account.json`, `cert.pem`, `sha256sum.txt`, and `quote.json` +To disable the built-in evidence endpoint and serve evidence files only through your backend, set `EVIDENCE_SERVER=false`. -2. **Verification Chain**: +### Evidence Files - - `quote.json` contains a TDX quote with the SHA-256 digest of `sha256sum.txt` embedded in the report_data field - - `sha256sum.txt` contains cryptographic checksums of both `acme-account.json` and `cert.pem` - - When the TDX quote is verified, it cryptographically proves the integrity of the entire evidence chain +| File | Description | +|------|-------------| +| `acme-account.json` | ACME account used to request certificates | +| `cert-{domain}.pem` | Let's Encrypt certificate for each domain | +| `sha256sum.txt` | SHA-256 checksums of all evidence files | +| `quote.json` | TDX quote with `sha256sum.txt` digest in report_data | -3. **Certificate Authentication**: - - `acme-account.json` contains the ACME account credentials used to request certificates - - When combined with the CAA DNS record, this provides evidence that certificates can only be requested from within this specific TEE application - - `cert.pem` is the Let's Encrypt certificate currently serving your custom domain +### Verification Chain -### CAA Record Verification +1. Verify the TDX quote in `quote.json` +2. Extract `report_data` — it contains the SHA-256 of `sha256sum.txt` +3. Verify checksums in `sha256sum.txt` against `acme-account.json` and `cert-*.pem` +4. This proves the certificates were obtained within the TEE -If you've enabled CAA records (`SET_CAA=true`), you can verify that only authorized Certificate Authorities can issue certificates for your domain: +## Building ```bash -dig CAA your-domain.com +./build-image.sh +# Or push directly: +./build-image.sh --push yourusername/dstack-ingress:tag ``` -The output will display CAA records that restrict certificate issuance exclusively to Let's Encrypt with your specific account URI, providing an additional layer of security. - -### TLS Certificate Transparency - -All Let's Encrypt certificates are logged in public Certificate Transparency (CT) logs, enabling independent verification: - -**CT Log Verification**: - -- Visit [crt.sh](https://crt.sh/) and search for your domain -- Confirm that the certificates match those issued by the dstack-ingress system -- This public logging ensures that all certificates are visible and can be monitored for unauthorized issuance +The build script ensures reproducibility via pinned packages, deterministic timestamps, and specific buildkit version. ## License diff --git a/custom-domain/dstack-ingress/docker-compose.multi.yaml b/custom-domain/dstack-ingress/docker-compose.multi.yaml index 3af19e4..74d92c5 100644 --- a/custom-domain/dstack-ingress/docker-compose.multi.yaml +++ b/custom-domain/dstack-ingress/docker-compose.multi.yaml @@ -1,6 +1,7 @@ services: ingress: - image: dstacktee/dstack-ingress:20250929@sha256:2b47b3e538df0b3e7724255b89369194c8c83a7cfba64d2faf0115ad0a586458 + # TODO: pin by digest for production (dstacktee/dstack-ingress@sha256:...) + image: dstacktee/dstack-ingress:latest ports: - "443:443" environment: @@ -10,58 +11,31 @@ services: GATEWAY_DOMAIN: _.dstack-prod5.phala.network SET_CAA: true DOMAINS: | - ${APP_DOMAIN} - ${API_DOMAIN} + app.example.com + api.example.com + ROUTING_MAP: | + app.example.com=app-main:80 + api.example.com=app-api:8080 volumes: - /var/run/tappd.sock:/var/run/tappd.sock - letsencrypt:/etc/letsencrypt - - configs: - - source: app_conf - target: /etc/nginx/conf.d/app.conf - mode: 0444 - - source: api_conf - target: /etc/nginx/conf.d/api.conf - mode: 0444 + - evidences:/evidences restart: unless-stopped app-main: image: nginx + volumes: + - evidences:/evidences:ro restart: unless-stopped app-api: image: nginx + volumes: + - evidences:/evidences:ro restart: unless-stopped volumes: letsencrypt: - -configs: - app_conf: - content: | - server { - listen 443 ssl; - server_name ${APP_DOMAIN}; - - ssl_certificate /etc/letsencrypt/live/${APP_DOMAIN}/fullchain.pem; - ssl_certificate_key /etc/letsencrypt/live/${APP_DOMAIN}/privkey.pem; - - location / { - proxy_pass http://app-main:80; - } - } - api_conf: - content: | - server { - listen 443 ssl; - server_name ${API_DOMAIN}; - - ssl_certificate /etc/letsencrypt/live/${API_DOMAIN}/fullchain.pem; - ssl_certificate_key /etc/letsencrypt/live/${API_DOMAIN}/privkey.pem; - - location / { - proxy_pass http://app-api:80; - } - } + evidences: diff --git a/custom-domain/dstack-ingress/docker-compose.yaml b/custom-domain/dstack-ingress/docker-compose.yaml index 89c936c..ad79ed1 100644 --- a/custom-domain/dstack-ingress/docker-compose.yaml +++ b/custom-domain/dstack-ingress/docker-compose.yaml @@ -1,6 +1,7 @@ services: dstack-ingress: - image: dstacktee/dstack-ingress:20250929@sha256:2b47b3e538df0b3e7724255b89369194c8c83a7cfba64d2faf0115ad0a586458 + # TODO: pin by digest for production (dstacktee/dstack-ingress@sha256:...) + image: dstacktee/dstack-ingress:latest ports: - "443:443" environment: @@ -14,11 +15,15 @@ services: - /var/run/dstack.sock:/var/run/dstack.sock - /var/run/tappd.sock:/var/run/tappd.sock - cert-data:/etc/letsencrypt + - evidences:/evidences restart: unless-stopped app: image: nginx + volumes: + - evidences:/evidences:ro restart: unless-stopped volumes: cert-data: + evidences: diff --git a/custom-domain/dstack-ingress/pinned-packages.txt b/custom-domain/dstack-ingress/pinned-packages.txt index 2e7657e..3058c6e 100644 --- a/custom-domain/dstack-ingress/pinned-packages.txt +++ b/custom-domain/dstack-ingress/pinned-packages.txt @@ -1,82 +1,62 @@ adduser=3.134 apt=2.6.1 -base-files=12.4+deb12u10 +base-files=12.4+deb12u11 base-passwd=3.6.1 -bash=5.2.15-2+b7 +bash=5.2.15-2+b8 bsdutils=1:2.38.1-5+deb12u3 -ca-certificates=20230311 +ca-certificates=20230311+deb12u1 coreutils=9.1-1 curl=7.88.1-10+deb12u12 dash=0.5.12-2 debconf=1.5.82 -debian-archive-keyring=2023.3+deb12u1 +debian-archive-keyring=2023.3+deb12u2 debianutils=5.7-0.5~deb12u1 diffutils=1:3.8-4 dpkg=1.21.22 e2fsprogs=1.47.0-2 findutils=4.9.0-4 -fontconfig-config=2.14.1-4 -fonts-dejavu-core=2.37-6 -gcc-12-base:amd64=12.2.0-14 -gettext-base=0.21-12 +gcc-12-base:amd64=12.2.0-14+deb12u1 gpgv=2.2.40-1.1 grep=3.8-5 gzip=1.12-1 hostname=3.23+nmu1 init-system-helpers=1.65.2 jq=1.6-2.1 -libabsl20220623:amd64=20220623.1-1 libacl1:amd64=2.3.1-3 -libaom3:amd64=3.6.0-1+deb12u1 libapt-pkg6.0:amd64=2.6.1 libattr1:amd64=1:2.5.1-4 libaudit-common=1:3.0.9-1 libaudit1:amd64=1:3.0.9-1 -libavif15:amd64=0.11.1-1 libblkid1:amd64=2.38.1-5+deb12u3 libbrotli1:amd64=1.0.9-2+b6 -libbsd0:amd64=0.11.7-2 libbz2-1.0:amd64=1.0.8-5+b1 libc-bin=2.36-9+deb12u10 libc6:amd64=2.36-9+deb12u10 libcap-ng0:amd64=0.8.3-1+b3 -libcap2:amd64=1:2.66-4 +libcap2:amd64=1:2.66-4+deb12u1 libcom-err2:amd64=1.47.0-2 libcrypt1:amd64=1:4.4.33-2 libcurl4:amd64=7.88.1-10+deb12u12 -libdav1d6:amd64=1.0.0-2+deb12u1 libdb5.3:amd64=5.3.28+dfsg2-1 -libde265-0:amd64=1.0.11-1+deb12u2 libdebconfclient0:amd64=0.270 -libdeflate0:amd64=1.14-1 -libedit2:amd64=3.1-20221030-2 libexpat1:amd64=2.5.0-1+deb12u1 libext2fs2:amd64=1.47.0-2 libffi8:amd64=3.4.4-1 -libfontconfig1:amd64=2.14.1-4 -libfreetype6:amd64=2.12.1+dfsg-5+deb12u4 -libgav1-1:amd64=0.18.0-1+b1 -libgcc-s1:amd64=12.2.0-14 +libgcc-s1:amd64=12.2.0-14+deb12u1 libgcrypt20:amd64=1.10.1-3 -libgd3:amd64=2.3.3-9 -libgeoip1:amd64=1.6.12-10 libgmp10:amd64=2:6.2.1+dfsg1-1.1 -libgnutls30:amd64=3.7.9-2+deb12u4 +libgnutls30:amd64=3.7.9-2+deb12u5 libgpg-error0:amd64=1.46-1 libgssapi-krb5-2:amd64=1.20.1-2+deb12u2 -libheif1:amd64=1.15.1-1+deb12u1 libhogweed6:amd64=3.8.1-2 -libicu72:amd64=72.1-3 libidn2-0:amd64=2.3.3-1+b1 -libjbig0:amd64=2.1-6.1 -libjpeg62-turbo:amd64=1:2.1.5-2 libjq1:amd64=1.6-2.1 libk5crypto3:amd64=1.20.1-2+deb12u2 libkeyutils1:amd64=1.6.3-2 libkrb5-3:amd64=1.20.1-2+deb12u2 libkrb5support0:amd64=1.20.1-2+deb12u2 libldap-2.5-0:amd64=2.5.13+dfsg-5 -liblerc4:amd64=4.0.0+ds-2 +liblua5.4-0:amd64=5.4.4-3+deb12u1 liblz4-1:amd64=1.9.4-1 liblzma5:amd64=5.4.1-1 libmd0:amd64=1.0.4-2 @@ -85,7 +65,6 @@ libncursesw6:amd64=6.4-4 libnettle8:amd64=3.8.1-2 libnghttp2-14:amd64=1.52.0-1+deb12u2 libnsl2:amd64=1.3.0-2 -libnuma1:amd64=2.0.16-1 libonig5:amd64=6.9.8-1 libp11-kit0:amd64=0.24.1-2 libpam-modules-bin=1.5.2-6+deb12u1 @@ -93,12 +72,10 @@ libpam-modules:amd64=1.5.2-6+deb12u1 libpam-runtime=1.5.2-6+deb12u1 libpam0g:amd64=1.5.2-6+deb12u1 libpcre2-8-0:amd64=10.42-1 -libpng16-16:amd64=1.6.39-2 libpsl5:amd64=0.21.2-1 libpython3-stdlib:amd64=3.11.2-1+b1 libpython3.11-minimal:amd64=3.11.2-6+deb12u5 libpython3.11-stdlib:amd64=3.11.2-6+deb12u5 -librav1e0:amd64=0.5.1-6 libreadline8:amd64=8.2-1.3 librtmp1:amd64=2.4+20151223.gitfa8646d.1-2+b2 libsasl2-2:amd64=2.1.28+dfsg-10 @@ -112,46 +89,30 @@ libsmartcols1:amd64=2.38.1-5+deb12u3 libsqlite3-0:amd64=3.40.1-2+deb12u1 libss2:amd64=1.47.0-2 libssh2-1:amd64=1.10.0-3+b1 -libssl3:amd64=3.0.15-1~deb12u1 -libstdc++6:amd64=12.2.0-14 -libsvtav1enc1:amd64=1.4.1+dfsg-1 -libsystemd0:amd64=252.36-1~deb12u1 +libssl3:amd64=3.0.17-1~deb12u2 +libstdc++6:amd64=12.2.0-14+deb12u1 +libsystemd0:amd64=252.38-1~deb12u1 libtasn1-6:amd64=4.19.0-2+deb12u1 -libtiff6:amd64=4.5.0-6+deb12u2 libtinfo6:amd64=6.4-4 libtirpc-common=1.3.3+ds-1 libtirpc3:amd64=1.3.3+ds-1 -libudev1:amd64=252.36-1~deb12u1 +libudev1:amd64=252.38-1~deb12u1 libunistring2:amd64=1.0-2 libuuid1:amd64=2.38.1-5+deb12u3 -libwebp7:amd64=1.2.4-0.2+deb12u1 -libx11-6:amd64=2:1.8.4-2+deb12u2 -libx11-data=2:1.8.4-2+deb12u2 -libx265-199:amd64=3.5-2+b1 -libxau6:amd64=1:1.0.9-1 -libxcb1:amd64=1.15-1 -libxdmcp6:amd64=1:1.1.2-3 -libxml2:amd64=2.9.14+dfsg-1.3~deb12u1 -libxpm4:amd64=1:3.5.12-1.1+deb12u1 -libxslt1.1:amd64=1.1.35-1+deb12u1 libxxhash0:amd64=0.8.1-1 -libyuv0:amd64=0.0~git20230123.b2528b0-1 libzstd1:amd64=1.5.4+dfsg2-5 -login=1:4.13+dfsg1-1+b1 +login=1:4.13+dfsg1-1+deb12u1 logsave=1.47.0-2 mawk=1.3.4.20200120-3.1 +lsb-base=11.6 media-types=10.0.0 +mini-httpd=1.30-3 mount=2.38.1-5+deb12u3 ncurses-base=6.4-4 ncurses-bin=6.4-4 -nginx-module-geoip=1.27.4-1~bookworm -nginx-module-image-filter=1.27.4-1~bookworm -nginx-module-njs=1.27.4+0.8.9-1~bookworm -nginx-module-xslt=1.27.4-1~bookworm -nginx=1.27.4-1~bookworm -openssl=3.0.15-1~deb12u1 -passwd=1:4.13+dfsg1-1+b1 -perl-base=5.36.0-7+deb12u1 +openssl=3.0.17-1~deb12u2 +passwd=1:4.13+dfsg1-1+deb12u1 +perl-base=5.36.0-7+deb12u2 python3-certifi=2022.9.24-1 python3-chardet=5.1.0+dfsg-2 python3-charset-normalizer=3.0.1-2 diff --git a/custom-domain/dstack-ingress/scripts/build-combined-pems.sh b/custom-domain/dstack-ingress/scripts/build-combined-pems.sh new file mode 100644 index 0000000..33f8c70 --- /dev/null +++ b/custom-domain/dstack-ingress/scripts/build-combined-pems.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# build-combined-pems.sh - Concatenate Let's Encrypt cert files into +# HAProxy combined PEM format (fullchain + privkey in one file). + +set -e + +CERT_DIR="/etc/haproxy/certs" +mkdir -p "$CERT_DIR" + +all_domains=$(get-all-domains.sh) + +while IFS= read -r domain; do + [[ -n "$domain" ]] || continue + le_dir="/etc/letsencrypt/live/${domain}" + combined="${CERT_DIR}/${domain}.pem" + if [ -f "${le_dir}/fullchain.pem" ] && [ -f "${le_dir}/privkey.pem" ]; then + cat "${le_dir}/fullchain.pem" "${le_dir}/privkey.pem" > "$combined" + chmod 600 "$combined" + echo "Combined PEM created: ${combined}" + else + echo "Warning: Cert files missing for ${domain}, skipping" + fi +done <<< "$all_domains" diff --git a/custom-domain/dstack-ingress/scripts/entrypoint.sh b/custom-domain/dstack-ingress/scripts/entrypoint.sh index d1994a3..880e078 100644 --- a/custom-domain/dstack-ingress/scripts/entrypoint.sh +++ b/custom-domain/dstack-ingress/scripts/entrypoint.sh @@ -6,6 +6,13 @@ source "/scripts/functions.sh" PORT=${PORT:-443} TXT_PREFIX=${TXT_PREFIX:-"_dstack-app-address"} +MAXCONN=${MAXCONN:-4096} +TIMEOUT_CONNECT=${TIMEOUT_CONNECT:-10s} +TIMEOUT_CLIENT=${TIMEOUT_CLIENT:-86400s} +TIMEOUT_SERVER=${TIMEOUT_SERVER:-86400s} +EVIDENCE_SERVER=${EVIDENCE_SERVER:-true} +EVIDENCE_PORT=${EVIDENCE_PORT:-80} +ALPN=${ALPN:-} if ! PORT=$(sanitize_port "$PORT"); then exit 1 @@ -16,35 +23,48 @@ fi if ! TARGET_ENDPOINT=$(sanitize_target_endpoint "$TARGET_ENDPOINT"); then exit 1 fi -if ! CLIENT_MAX_BODY_SIZE=$(sanitize_client_max_body_size "$CLIENT_MAX_BODY_SIZE"); then - exit 1 -fi -if ! PROXY_READ_TIMEOUT=$(sanitize_proxy_timeout "$PROXY_READ_TIMEOUT"); then +if ! TXT_PREFIX=$(sanitize_dns_label "$TXT_PREFIX"); then exit 1 fi -if ! PROXY_SEND_TIMEOUT=$(sanitize_proxy_timeout "$PROXY_SEND_TIMEOUT"); then +if ! MAXCONN=$(sanitize_positive_integer "$MAXCONN" "MAXCONN"); then exit 1 fi -if ! PROXY_CONNECT_TIMEOUT=$(sanitize_proxy_timeout "$PROXY_CONNECT_TIMEOUT"); then +if ! TIMEOUT_CONNECT=$(sanitize_haproxy_timeout "$TIMEOUT_CONNECT" "TIMEOUT_CONNECT"); then exit 1 fi -if ! PROXY_BUFFER_SIZE=$(sanitize_proxy_buffer_size "$PROXY_BUFFER_SIZE"); then +if ! TIMEOUT_CLIENT=$(sanitize_haproxy_timeout "$TIMEOUT_CLIENT" "TIMEOUT_CLIENT"); then exit 1 fi -if ! PROXY_BUFFERS=$(sanitize_proxy_buffers "$PROXY_BUFFERS"); then +if ! TIMEOUT_SERVER=$(sanitize_haproxy_timeout "$TIMEOUT_SERVER" "TIMEOUT_SERVER"); then exit 1 fi -if ! PROXY_BUSY_BUFFERS_SIZE=$(sanitize_proxy_buffer_size "$PROXY_BUSY_BUFFERS_SIZE"); then +if ! EVIDENCE_PORT=$(sanitize_positive_integer "$EVIDENCE_PORT" "EVIDENCE_PORT"); then exit 1 fi -if ! TXT_PREFIX=$(sanitize_dns_label "$TXT_PREFIX"); then +if ! ALPN=$(sanitize_alpn "$ALPN"); then exit 1 fi -PROXY_CMD="proxy" -if [[ "${TARGET_ENDPOINT}" == grpc://* ]]; then - PROXY_CMD="grpc" -fi +# Warn about deprecated L7 env vars +for var in CLIENT_MAX_BODY_SIZE PROXY_READ_TIMEOUT PROXY_SEND_TIMEOUT PROXY_CONNECT_TIMEOUT PROXY_BUFFER_SIZE PROXY_BUFFERS PROXY_BUSY_BUFFERS_SIZE; do + if [ -n "${!var}" ]; then + echo "Warning: $var is ignored in TCP proxy mode" + fi +done + +# Parse TARGET_ENDPOINT into host:port for haproxy backend +parse_target_endpoint() { + local endpoint="$1" + # Strip protocol prefix if present (http://, https://, grpc://) + local hostport="${endpoint#*://}" + # If no protocol was stripped, use as-is + if [ "$hostport" = "$endpoint" ]; then + hostport="$endpoint" + fi + # Strip any trailing path + hostport="${hostport%%/*}" + echo "$hostport" +} echo "Setting up certbot environment" @@ -105,106 +125,139 @@ EOF setup_py_env -setup_nginx_conf() { - local cert_name - cert_name=$(cert_dir_name "$DOMAIN") +# Emit common haproxy global/defaults/frontend preamble. +# Both single-domain and multi-domain modes share this identical config. +emit_haproxy_preamble() { + # "crt " loads all PEM files from the directory. + # ALPN is appended conditionally via ${ALPN:+ alpn ${ALPN}}. + cat </etc/haproxy/haproxy.cfg +global + log stdout format raw local0 + maxconn ${MAXCONN} + pidfile /var/run/haproxy/haproxy.pid + ssl-default-bind-ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305 + ssl-default-bind-ciphersuites TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256 + ssl-default-bind-options ssl-min-ver TLSv1.2 no-tls-tickets + ssl-default-bind-curves secp384r1 + +defaults + log global + mode tcp + option tcplog + timeout connect ${TIMEOUT_CONNECT} + timeout client ${TIMEOUT_CLIENT} + timeout server ${TIMEOUT_SERVER} + +frontend tls_in + bind :${PORT} ssl crt /etc/haproxy/certs/${ALPN:+ alpn ${ALPN}} +EOF - local client_max_body_size_conf="" - if [ -n "$CLIENT_MAX_BODY_SIZE" ]; then - client_max_body_size_conf=" client_max_body_size ${CLIENT_MAX_BODY_SIZE};" - fi + if [ "$EVIDENCE_SERVER" = "true" ]; then + cat <<'EVIDENCE_BLOCK' >>/etc/haproxy/haproxy.cfg - local proxy_read_timeout_conf="" - if [ -n "$PROXY_READ_TIMEOUT" ]; then - proxy_read_timeout_conf=" ${PROXY_CMD}_read_timeout ${PROXY_READ_TIMEOUT};" + # Route /evidences requests to local evidence HTTP server + tcp-request inspect-delay 5s + tcp-request content accept if WAIT_END + acl is_evidence payload(0,0) -m beg "GET /evidences" + acl is_evidence payload(0,0) -m beg "HEAD /evidences" + use_backend be_evidence if is_evidence +EVIDENCE_BLOCK fi +} - local proxy_send_timeout_conf="" - if [ -n "$PROXY_SEND_TIMEOUT" ]; then - proxy_send_timeout_conf=" ${PROXY_CMD}_send_timeout ${PROXY_SEND_TIMEOUT};" - fi +# Append the evidence backend block to haproxy.cfg +emit_evidence_backend() { + if [ "$EVIDENCE_SERVER" = "true" ]; then + cat <>/etc/haproxy/haproxy.cfg - local proxy_connect_timeout_conf="" - if [ -n "$PROXY_CONNECT_TIMEOUT" ]; then - proxy_connect_timeout_conf=" ${PROXY_CMD}_connect_timeout ${PROXY_CONNECT_TIMEOUT};" +backend be_evidence + mode http + http-request replace-path /evidences(.*) \1 + server evidence 127.0.0.1:${EVIDENCE_PORT} +EOF fi +} - local proxy_buffer_size_conf="" - if [ -n "$PROXY_BUFFER_SIZE" ]; then - proxy_buffer_size_conf=" proxy_buffer_size ${PROXY_BUFFER_SIZE};" - fi +# Generate haproxy.cfg for single-domain mode (DOMAIN + TARGET_ENDPOINT) +setup_haproxy_cfg() { + local target_hostport + target_hostport=$(parse_target_endpoint "$TARGET_ENDPOINT") - local proxy_buffers_conf="" - if [ -n "$PROXY_BUFFERS" ]; then - proxy_buffers_conf=" proxy_buffers ${PROXY_BUFFERS};" - fi + emit_haproxy_preamble - local proxy_busy_buffers_size_conf="" - if [ -n "$PROXY_BUSY_BUFFERS_SIZE" ]; then - proxy_busy_buffers_size_conf=" proxy_busy_buffers_size ${PROXY_BUSY_BUFFERS_SIZE};" - fi + cat <>/etc/haproxy/haproxy.cfg - cat </etc/nginx/conf.d/default.conf -server { - listen ${PORT} ssl; - http2 on; - server_name ${DOMAIN}; - - # SSL certificate configuration - ssl_certificate /etc/letsencrypt/live/${cert_name}/fullchain.pem; - ssl_certificate_key /etc/letsencrypt/live/${cert_name}/privkey.pem; - - # Modern SSL configuration - TLS 1.2 and 1.3 only - ssl_protocols TLSv1.2 TLSv1.3; - - # Strong cipher suites - Only AES-GCM and ChaCha20-Poly1305 - ssl_ciphers 'TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305'; - - # Prefer server cipher suites - ssl_prefer_server_ciphers on; - - # ECDH curve for ECDHE ciphers - ssl_ecdh_curve secp384r1; - - # Enable OCSP stapling - ssl_stapling on; - ssl_stapling_verify on; - ssl_trusted_certificate /etc/letsencrypt/live/${cert_name}/fullchain.pem; - resolver 8.8.8.8 8.8.4.4 valid=300s; - resolver_timeout 5s; - - # SSL session configuration - ssl_session_timeout 1d; - ssl_session_cache shared:SSL:50m; - ssl_session_tickets off; - - # SSL buffer size (optimized for TLS 1.3) - ssl_buffer_size 4k; -${proxy_buffer_size_conf} -${proxy_buffers_conf} -${proxy_busy_buffers_size_conf} - - # Disable SSL renegotiation - ssl_early_data off; -${client_max_body_size_conf} - - location / { - ${PROXY_CMD}_pass ${TARGET_ENDPOINT}; - ${PROXY_CMD}_set_header Host \$host; - ${PROXY_CMD}_set_header X-Real-IP \$remote_addr; - ${PROXY_CMD}_set_header X-Forwarded-For \$proxy_add_x_forwarded_for; - ${PROXY_CMD}_set_header X-Forwarded-Proto \$scheme; -${proxy_read_timeout_conf} -${proxy_send_timeout_conf} -${proxy_connect_timeout_conf} - } - - location /evidences/ { - alias /evidences/; - autoindex on; - } -} + default_backend be_upstream + +backend be_upstream + server app1 ${target_hostport} EOF + + emit_evidence_backend +} + +# Generate haproxy.cfg for multi-domain mode (ROUTING_MAP) +setup_haproxy_cfg_multi() { + emit_haproxy_preamble + + # Parse ROUTING_MAP and generate use_backend rules + backend sections + # Support both newline-separated and comma-separated formats + local routing_map_normalized + routing_map_normalized=$(echo "$ROUTING_MAP" | tr ',' '\n') + + local backend_rules="" + local backend_sections="" + local first_be_name="" + local domain target be_name + + while IFS= read -r line; do + [[ -n "$line" ]] || continue + [[ "$line" == \#* ]] && continue + domain="${line%%=*}" + target="${line#*=}" + domain=$(echo "$domain" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') + target=$(echo "$target" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') + [[ -n "$domain" && -n "$target" ]] || continue + + # Validate domain and target to prevent config injection + if ! domain=$(sanitize_domain "$domain"); then + echo "Error: Invalid domain in ROUTING_MAP: ${line}" >&2 + exit 1 + fi + if ! target=$(sanitize_target_endpoint "$target"); then + echo "Error: Invalid target in ROUTING_MAP: ${line}" >&2 + exit 1 + fi + + # Strip protocol prefix from target if present + target=$(parse_target_endpoint "$target") + + # Generate safe backend name from domain + be_name="be_$(echo "$domain" | sed 's/[^A-Za-z0-9]/_/g')" + + if [ -z "$first_be_name" ]; then + first_be_name="$be_name" + fi + + backend_rules="${backend_rules} + use_backend ${be_name} if { ssl_fc_sni -i ${domain} }" + backend_sections="${backend_sections} + +backend ${be_name} + server s1 ${target}" + done <<< "$routing_map_normalized" + + echo "$backend_rules" >> /etc/haproxy/haproxy.cfg + + # Default to first backend in ROUTING_MAP + if [ -n "$first_be_name" ]; then + echo "" >> /etc/haproxy/haproxy.cfg + echo " default_backend ${first_be_name}" >> /etc/haproxy/haproxy.cfg + fi + + echo "$backend_sections" >> /etc/haproxy/haproxy.cfg + + emit_evidence_backend } set_alias_record() { @@ -341,12 +394,22 @@ else generate-evidences.sh fi -renewal-daemon.sh & +# Build combined PEM files for haproxy +build-combined-pems.sh -mkdir -p /var/log/nginx +# Generate haproxy config +if [ -n "$ROUTING_MAP" ]; then + setup_haproxy_cfg_multi +elif [ -n "$DOMAIN" ] && [ -n "$TARGET_ENDPOINT" ]; then + setup_haproxy_cfg +fi -if [ -n "$DOMAIN" ] && [ -n "$TARGET_ENDPOINT" ]; then - setup_nginx_conf +# Start evidence HTTP server if enabled +if [ "$EVIDENCE_SERVER" = "true" ]; then + mini_httpd -d /evidences -p "${EVIDENCE_PORT}" -D -l /dev/stderr & + echo "Evidence server started on port ${EVIDENCE_PORT} (mini_httpd)" fi +renewal-daemon.sh & + exec "$@" diff --git a/custom-domain/dstack-ingress/scripts/functions.sh b/custom-domain/dstack-ingress/scripts/functions.sh index f699a0a..2ff37e4 100644 --- a/custom-domain/dstack-ingress/scripts/functions.sh +++ b/custom-domain/dstack-ingress/scripts/functions.sh @@ -33,7 +33,9 @@ sanitize_target_endpoint() { echo "" return 0 fi - if [[ "$candidate" =~ ^(grpc|https?)://[A-Za-z0-9._-]+(:[0-9]{1,5})?(/[A-Za-z0-9._~:/?&=%-]*)?$ ]]; then + # Accept protocol://host:port/path or bare host:port + if [[ "$candidate" =~ ^(grpc|https?)://[A-Za-z0-9._-]+(:[0-9]{1,5})?(/[A-Za-z0-9._~:/?&=%-]*)?$ ]] || + [[ "$candidate" =~ ^[A-Za-z0-9._-]+(:[0-9]{1,5})?$ ]]; then echo "$candidate" else echo "Error: Invalid TARGET_ENDPOINT value: $candidate" >&2 @@ -69,6 +71,46 @@ sanitize_dns_label() { fi } +sanitize_positive_integer() { + local candidate="$1" + local name="${2:-value}" + if [[ "$candidate" =~ ^[0-9]+$ ]] && (( candidate >= 1 )); then + echo "$candidate" + else + echo "Error: Invalid ${name}: $candidate (must be a positive integer)" >&2 + return 1 + fi +} + +sanitize_haproxy_timeout() { + local candidate="$1" + local name="${2:-timeout}" + # Require a time suffix — bare numbers are milliseconds in HAProxy, + # which is almost never what users intend. + if [[ "$candidate" =~ ^[0-9]+(us|ms|s|m|h|d)$ ]]; then + echo "$candidate" + else + echo "Error: Invalid ${name}: $candidate (must include suffix, e.g. 10s, 5m, 86400s)" >&2 + return 1 + fi +} + +sanitize_alpn() { + local candidate="$1" + if [ -z "$candidate" ]; then + echo "" + return 0 + fi + # ALPN value is comma-separated protocol names (e.g. "h2,http/1.1") + # Only allow alphanumeric, dots, slashes, hyphens, and commas. + if [[ "$candidate" =~ ^[A-Za-z0-9./-]+(,[A-Za-z0-9./-]+)*$ ]]; then + echo "$candidate" + else + echo "Error: Invalid ALPN value: $candidate (e.g. h2,http/1.1)" >&2 + return 1 + fi +} + sanitize_proxy_timeout() { local candidate="$1" if [ -z "$candidate" ]; then diff --git a/custom-domain/dstack-ingress/scripts/renewal-daemon.sh b/custom-domain/dstack-ingress/scripts/renewal-daemon.sh index 795f327..d20614d 100755 --- a/custom-domain/dstack-ingress/scripts/renewal-daemon.sh +++ b/custom-domain/dstack-ingress/scripts/renewal-daemon.sh @@ -20,10 +20,17 @@ while true; do if [ "$renewal_occurred" = true ]; then echo "[$(date)] Generating evidence files after renewals..." generate-evidences.sh || echo "Evidence generation failed" - if ! nginx -s reload; then - echo "Nginx reload failed" >&2 + + # Rebuild combined PEM files for haproxy + build-combined-pems.sh || echo "Combined PEM build failed" + + # Graceful reload: send SIGUSR2 to haproxy master process + if [ ! -f /var/run/haproxy/haproxy.pid ]; then + echo "HAProxy reload failed: PID file /var/run/haproxy/haproxy.pid not found" >&2 + elif ! kill -USR2 "$(cat /var/run/haproxy/haproxy.pid)"; then + echo "HAProxy reload failed: SIGUSR2 to PID $(cat /var/run/haproxy/haproxy.pid) failed" >&2 else - echo "Certificate renewed and Nginx reloaded successfully" + echo "Certificate renewed and HAProxy reloaded successfully" fi fi else diff --git a/custom-domain/dstack-ingress/scripts/tests/e2e-test.sh b/custom-domain/dstack-ingress/scripts/tests/e2e-test.sh new file mode 100755 index 0000000..0983454 --- /dev/null +++ b/custom-domain/dstack-ingress/scripts/tests/e2e-test.sh @@ -0,0 +1,429 @@ +#!/bin/bash +# +# End-to-end test for dstack-ingress 2.0 +# +# Deploys dstack-ingress with multi-protocol backends to a Phala CVM, +# verifies HTTP/1.1, HTTP/2, gRPC, TLS, and evidence serving, then cleans up. +# +# Required env vars: +# DOMAIN - Test domain (e.g., test-ingress.example.com) +# CLOUDFLARE_API_TOKEN - Cloudflare API token for DNS management +# CERTBOT_EMAIL - Email for Let's Encrypt registration +# +# Optional env vars: +# GATEWAY_DOMAIN - dstack gateway domain (default: _.dstack-prod5.phala.network) +# IMAGE - dstack-ingress image (default: dstacktee/dstack-ingress:latest) +# INSTANCE_TYPE - CVM instance type (default: tdx.small) +# CERTBOT_STAGING - Use LE staging (default: true) +# SKIP_CLEANUP - Don't delete CVM on exit (default: false) +# BOOT_TIMEOUT - Max seconds to wait for CVM boot (default: 300) +# READY_TIMEOUT - Max seconds to wait for HTTPS ready (default: 600) +# + +set -uo pipefail + +# ── Configuration ────────────────────────────────────────────────────────────── + +: "${DOMAIN:?DOMAIN is required}" +: "${CLOUDFLARE_API_TOKEN:?CLOUDFLARE_API_TOKEN is required}" +: "${CERTBOT_EMAIL:?CERTBOT_EMAIL is required}" +GATEWAY_DOMAIN="${GATEWAY_DOMAIN:-_.dstack-prod5.phala.network}" +IMAGE="${IMAGE:-dstacktee/dstack-ingress:latest}" +INSTANCE_TYPE="${INSTANCE_TYPE:-tdx.small}" +CERTBOT_STAGING="${CERTBOT_STAGING:-true}" +SKIP_CLEANUP="${SKIP_CLEANUP:-false}" +BOOT_TIMEOUT="${BOOT_TIMEOUT:-300}" +READY_TIMEOUT="${READY_TIMEOUT:-600}" + +# Derived domains for multi-protocol testing +if [[ "$DOMAIN" == \** ]]; then + echo "Error: DOMAIN must not be a wildcard for e2e testing (got $DOMAIN)" >&2 + exit 1 +fi +GRPC_DOMAIN="grpc-${DOMAIN}" + +CVM_NAME="ingress-e2e-$(date +%s)" +COMPOSE_FILE="$(mktemp /tmp/e2e-compose-XXXXXX.yaml)" +TESTS_PASSED=0 +TESTS_FAILED=0 + +# ── Helpers ──────────────────────────────────────────────────────────────────── + +log() { echo "[$(date '+%H:%M:%S')] $*"; } +pass() { TESTS_PASSED=$((TESTS_PASSED + 1)); log "PASS: $1"; } +fail() { TESTS_FAILED=$((TESTS_FAILED + 1)); log "FAIL: $1" >&2; } + +# Resolve domain IP via public DNS (local resolver may not have it yet) +resolve_domain() { + dig +short A "$1" @8.8.8.8 2>/dev/null | grep -E '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$' | head -1 +} + +# curl with common flags (TLS insecure for staging, DNS resolve bypass) +do_curl() { + local flags=("--max-time" "10") + if [ "$CERTBOT_STAGING" = "true" ]; then + flags+=(-k) + fi + if [ -n "${DOMAIN_IP:-}" ]; then + flags+=("--resolve" "${DOMAIN}:443:${DOMAIN_IP}") + flags+=("--resolve" "${GRPC_DOMAIN}:443:${DOMAIN_IP}") + fi + curl "${flags[@]}" "$@" +} + +cleanup() { + log "Cleaning up..." + rm -f "$COMPOSE_FILE" + if [ "$SKIP_CLEANUP" = "true" ]; then + log "SKIP_CLEANUP=true, CVM '$CVM_NAME' left running" + return + fi + if phala cvms get "$CVM_NAME" --json >/dev/null 2>&1; then + log "Deleting CVM: $CVM_NAME" + echo y | phala cvms delete "$CVM_NAME" 2>/dev/null || true + fi +} +trap cleanup EXIT + +# ── Generate test compose ────────────────────────────────────────────────────── +# +# Architecture: +# client ──TLS──► haproxy (L4 proxy) ──TCP──► whoami (HTTP/1.1 + h2c) +# └─TCP──► grpcbin (gRPC / h2c) +# +# haproxy uses SNI to route: +# ${DOMAIN} → whoami:80 +# ${GRPC_DOMAIN} → grpcbin:9000 +# + +log "Generating test compose: $COMPOSE_FILE" +cat > "$COMPOSE_FILE" </dev/null | jq -r '.status // empty' 2>/dev/null || echo "") + if [ "$status" = "$target_status" ]; then + log "CVM status: $status" + return 0 + fi + log "CVM status: ${status:-unknown} (waiting for $target_status, ${elapsed}s/${timeout}s)" + sleep "$interval" + elapsed=$((elapsed + interval)) + done + return 1 +} + +if wait_for_status "running" "$BOOT_TIMEOUT"; then + pass "CVM reached running state" +else + fail "CVM did not reach running state within ${BOOT_TIMEOUT}s" + log "Fetching serial logs..." + phala logs --serial --cvm-id "$CVM_NAME" -n 50 2>/dev/null || true + exit 1 +fi + +# ── Resolve domain IPs ──────────────────────────────────────────────────────── + +log "Resolving domain IPs via public DNS..." +DOMAIN_IP="" +for i in $(seq 1 30); do + DOMAIN_IP=$(resolve_domain "$DOMAIN") + if [ -n "$DOMAIN_IP" ]; then + log "Domain resolves to: $DOMAIN_IP" + break + fi + log "DNS not propagated yet (attempt $i/30)" + sleep 10 +done + +if [ -z "$DOMAIN_IP" ]; then + fail "Domain $DOMAIN did not resolve within 5 minutes" + exit 1 +fi + +# ── Wait for HTTPS ready ────────────────────────────────────────────────────── + +log "Waiting for HTTPS to become available at https://${DOMAIN}/" + +wait_for_https() { + local domain="$1" + local timeout="$2" + local elapsed=0 + local interval=15 + + while [ "$elapsed" -lt "$timeout" ]; do + if do_curl -sf --http1.1 -o /dev/null "https://${domain}/" 2>/dev/null; then + log "HTTPS responding on ${domain}" + return 0 + fi + log "HTTPS not ready yet on ${domain} (${elapsed}s/${timeout}s)" + sleep "$interval" + elapsed=$((elapsed + interval)) + done + return 1 +} + +if wait_for_https "$DOMAIN" "$READY_TIMEOUT"; then + pass "HTTPS endpoint is reachable" +else + fail "HTTPS endpoint not reachable within ${READY_TIMEOUT}s" + log "Fetching ingress container logs..." + phala logs --cvm-id "$CVM_NAME" --serial -n 100 2>/dev/null || true + exit 1 +fi + +# ══════════════════════════════════════════════════════════════════════════════ +# Verification tests +# ══════════════════════════════════════════════════════════════════════════════ + +# ── HTTP/1.1 tests ─────────────────────────────────────────────────────────── + +log "Test: HTTP/1.1 through TCP proxy" +H1_STATUS=$(do_curl -s -o /dev/null -w '%{http_code}' --http1.1 "https://${DOMAIN}/") +if [ "$H1_STATUS" = "200" ]; then + pass "HTTP/1.1 returns 200" +else + fail "HTTP/1.1 expected 200, got $H1_STATUS" +fi + +# Verify response came from whoami backend +log "Test: HTTP/1.1 routed to correct backend" +H1_BODY=$(do_curl -sf --http1.1 "https://${DOMAIN}/" || echo "") +if echo "$H1_BODY" | grep -qi "hostname"; then + pass "HTTP/1.1 routed to whoami backend" +else + fail "HTTP/1.1 response doesn't look like whoami" +fi + +# ── HTTP/2 tests (against gRPC backend which supports h2c) ─────────────────── +# Note: with L4 proxy + ALPN h2, the backend MUST support h2c (cleartext HTTP/2). +# whoami only speaks HTTP/1.1, so we test H2 against grpcbin which is a Go +# gRPC server and natively supports h2c. + +log "Test: HTTP/2 through TCP proxy (via gRPC domain)" +H2_STATUS=$(do_curl -s -o /dev/null -w '%{http_code}' --http2 "https://${GRPC_DOMAIN}/" 2>/dev/null || echo "000") +if [ "$H2_STATUS" != "000" ]; then + pass "HTTP/2 connection successful (status: $H2_STATUS)" +else + fail "HTTP/2 connection failed" +fi + +log "Test: HTTP/2 ALPN negotiation" +H2_VER=$(do_curl -s -o /dev/null -w '%{http_version}' --http2 "https://${GRPC_DOMAIN}/" 2>/dev/null || echo "") +if [ "$H2_VER" = "2" ]; then + pass "HTTP/2 negotiated via ALPN (version: $H2_VER)" +else + fail "HTTP/2 not negotiated (version: $H2_VER)" +fi + +# ── gRPC tests ─────────────────────────────────────────────────────────────── + +log "Test: gRPC through TCP proxy" +GRPC_FLAGS=() +if [ "$CERTBOT_STAGING" = "true" ]; then + GRPC_FLAGS+=("-insecure") +fi + +# Wait for gRPC domain to be ready (may take a moment after HTTP domain) +log "Waiting for gRPC domain..." +GRPC_READY=false +for i in $(seq 1 20); do + if grpcurl "${GRPC_FLAGS[@]}" \ + -authority "${GRPC_DOMAIN}" \ + "${DOMAIN_IP}:443" \ + list >/dev/null 2>&1; then + GRPC_READY=true + break + fi + sleep 5 +done + +if [ "$GRPC_READY" = "true" ]; then + pass "gRPC endpoint reachable" +else + fail "gRPC endpoint not reachable" +fi + +# List available gRPC services (tests reflection) +if [ "$GRPC_READY" = "true" ]; then + log "Test: gRPC service listing (reflection)" + GRPC_SERVICES=$(grpcurl "${GRPC_FLAGS[@]}" \ + -authority "${GRPC_DOMAIN}" \ + "${DOMAIN_IP}:443" \ + list 2>/dev/null || echo "") + if echo "$GRPC_SERVICES" | grep -q "grpc"; then + pass "gRPC reflection lists services" + log " Services: $(echo "$GRPC_SERVICES" | tr '\n' ', ')" + else + fail "gRPC reflection returned no services" + fi + + # Make an actual gRPC call + log "Test: gRPC unary call" + GRPC_RESULT=$(grpcurl "${GRPC_FLAGS[@]}" \ + -authority "${GRPC_DOMAIN}" \ + -d '{"greeting": "e2e-test"}' \ + "${DOMAIN_IP}:443" \ + hello.HelloService/SayHello 2>/dev/null || echo "ERROR") + if echo "$GRPC_RESULT" | grep -q "e2e-test"; then + pass "gRPC unary call returned correct response" + elif echo "$GRPC_RESULT" | grep -qi "error"; then + fail "gRPC unary call failed: $GRPC_RESULT" + else + pass "gRPC unary call completed (response: $(echo "$GRPC_RESULT" | head -1))" + fi +fi + +# ── TLS tests ──────────────────────────────────────────────────────────────── + +log "Test: TLS certificate" +CERT_ISSUER=$(echo | openssl s_client -connect "${DOMAIN_IP}:443" -servername "${DOMAIN}" 2>/dev/null | openssl x509 -noout -issuer 2>/dev/null || echo "") +if echo "$CERT_ISSUER" | grep -qi "let's encrypt\|letsencrypt\|fake\|staging"; then + pass "TLS certificate from Let's Encrypt" +else + fail "Unexpected certificate issuer: $CERT_ISSUER" +fi + +log "Test: TLS version" +TLS_INFO=$(echo | openssl s_client -connect "${DOMAIN_IP}:443" -servername "${DOMAIN}" 2>&1 || true) +TLS_VERSION=$(echo "$TLS_INFO" | grep -oE "TLSv1\.[0-9]" | head -1 || echo "unknown") +if [ -n "$TLS_VERSION" ]; then + pass "TLS version: $TLS_VERSION" +else + fail "Could not determine TLS version" +fi + +# ── Evidence tests ─────────────────────────────────────────────────────────── + +log "Test: Evidence endpoint /evidences/" +EVIDENCE_STATUS=$(do_curl -s -o /dev/null -w '%{http_code}' --http1.1 "https://${DOMAIN}/evidences/") +if [ "$EVIDENCE_STATUS" = "200" ]; then + pass "Evidence endpoint returns 200" +else + fail "Evidence endpoint returned $EVIDENCE_STATUS" +fi + +log "Test: Evidence files" +for file in acme-account.json sha256sum.txt quote.json; do + FILE_STATUS=$(do_curl -s -o /dev/null -w '%{http_code}' --http1.1 "https://${DOMAIN}/evidences/${file}") + if [ "$FILE_STATUS" = "200" ]; then + pass "Evidence file /${file} exists" + else + fail "Evidence file /${file} returned $FILE_STATUS" + fi +done + +log "Test: Evidence integrity" +SHA256_CONTENT=$(do_curl -sf --http1.1 "https://${DOMAIN}/evidences/sha256sum.txt" || echo "") +if echo "$SHA256_CONTENT" | grep -q "acme-account.json"; then + pass "sha256sum.txt references acme-account.json" +else + fail "sha256sum.txt missing acme-account.json reference" +fi + +# ── SNI routing test ───────────────────────────────────────────────────────── + +log "Test: SNI routes different domains to different backends" +# whoami backend returns "Hostname:" header +WHOAMI_RESP=$(do_curl -sf --http1.1 "https://${DOMAIN}/" || echo "") +# grpc domain should NOT return whoami response +GRPC_HTTP=$(do_curl -s -o /dev/null -w '%{http_code}' --http1.1 "https://${GRPC_DOMAIN}/" 2>/dev/null || echo "000") +if echo "$WHOAMI_RESP" | grep -qi "hostname" && [ "$GRPC_HTTP" != "200" ]; then + pass "SNI routing separates HTTP and gRPC backends" +elif echo "$WHOAMI_RESP" | grep -qi "hostname"; then + pass "SNI routing confirmed (HTTP domain serves whoami)" +else + fail "SNI routing may not be working correctly" +fi + +# ── Results ──────────────────────────────────────────────────────────────────── + +echo "" +log "════════════════════════════════════════════" +log "Results: ${TESTS_PASSED} passed, ${TESTS_FAILED} failed" +log "════════════════════════════════════════════" + +if [ "$TESTS_FAILED" -gt 0 ]; then + log "Dumping ingress logs for debugging:" + phala logs --cvm-id "$CVM_NAME" --serial -n 100 2>/dev/null || true + exit 1 +fi + +log "All tests passed!" diff --git a/custom-domain/dstack-ingress/scripts/tests/test_sanitizers.sh b/custom-domain/dstack-ingress/scripts/tests/test_sanitizers.sh index 8c6d71e..5c1bda4 100644 --- a/custom-domain/dstack-ingress/scripts/tests/test_sanitizers.sh +++ b/custom-domain/dstack-ingress/scripts/tests/test_sanitizers.sh @@ -44,6 +44,9 @@ assert_equal "$(sanitize_domain example.com)" "example.com" "sanitize_domain acc assert_equal "$(sanitize_domain '*.example.com')" "*.example.com" "sanitize_domain accepts wildcard" assert_equal "$(sanitize_target_endpoint http://service:80/path)" "http://service:80/path" "sanitize_target_endpoint accepts http" assert_equal "$(sanitize_target_endpoint grpc://svc:50051)" "grpc://svc:50051" "sanitize_target_endpoint accepts grpc" +assert_equal "$(sanitize_target_endpoint app:80)" "app:80" "sanitize_target_endpoint accepts bare host:port" +assert_equal "$(sanitize_target_endpoint app-main)" "app-main" "sanitize_target_endpoint accepts bare hostname" +assert_equal "$(sanitize_target_endpoint 10.0.0.1:8080)" "10.0.0.1:8080" "sanitize_target_endpoint accepts IP:port" assert_equal "$(sanitize_client_max_body_size 50m)" "50m" "sanitize_client_max_body_size accepts suffix" assert_equal "$(sanitize_dns_label test_label)" "test_label" "sanitize_dns_label accepts lowercase" assert_equal "$(sanitize_dns_label test-label)" "test-label" "sanitize_dns_label accepts hyphen" @@ -52,6 +55,18 @@ assert_equal "$(sanitize_proxy_timeout 30s)" "30s" "sanitize_proxy_timeout accep assert_equal "$(sanitize_proxy_timeout 5m)" "5m" "sanitize_proxy_timeout accepts minutes suffix" assert_equal "$(sanitize_proxy_timeout 1h)" "1h" "sanitize_proxy_timeout accepts hours suffix" assert_equal "$(sanitize_proxy_timeout '')" "" "sanitize_proxy_timeout accepts empty value" +assert_equal "$(sanitize_positive_integer 4096 MAXCONN)" "4096" "sanitize_positive_integer accepts 4096" +assert_equal "$(sanitize_positive_integer 1 MAXCONN)" "1" "sanitize_positive_integer accepts 1" +assert_equal "$(sanitize_haproxy_timeout 10s TIMEOUT_CONNECT)" "10s" "sanitize_haproxy_timeout accepts 10s" +assert_equal "$(sanitize_haproxy_timeout 86400s TIMEOUT_CLIENT)" "86400s" "sanitize_haproxy_timeout accepts 86400s" +assert_equal "$(sanitize_haproxy_timeout 5m TIMEOUT)" "5m" "sanitize_haproxy_timeout accepts 5m" +assert_equal "$(sanitize_haproxy_timeout 500ms TIMEOUT)" "500ms" "sanitize_haproxy_timeout accepts 500ms" +assert_equal "$(sanitize_haproxy_timeout 100us TIMEOUT)" "100us" "sanitize_haproxy_timeout accepts 100us" +assert_equal "$(sanitize_haproxy_timeout 1d TIMEOUT)" "1d" "sanitize_haproxy_timeout accepts 1d" +assert_equal "$(sanitize_alpn 'h2,http/1.1')" "h2,http/1.1" "sanitize_alpn accepts h2,http/1.1" +assert_equal "$(sanitize_alpn 'h2')" "h2" "sanitize_alpn accepts h2" +assert_equal "$(sanitize_alpn 'http/1.1')" "http/1.1" "sanitize_alpn accepts http/1.1" +assert_equal "$(sanitize_alpn '')" "" "sanitize_alpn accepts empty" # Failing cases assert_fails "sanitize_port rejects non-numeric" sanitize_port abc @@ -86,6 +101,13 @@ else fi assert_fails "sanitize_dns_label rejects invalid characters" sanitize_dns_label "bad*label" +assert_fails "sanitize_positive_integer rejects zero" sanitize_positive_integer 0 MAXCONN +assert_fails "sanitize_positive_integer rejects non-numeric" sanitize_positive_integer abc MAXCONN +assert_fails "sanitize_haproxy_timeout rejects bare text" sanitize_haproxy_timeout abc TIMEOUT +assert_fails "sanitize_haproxy_timeout rejects bare number" sanitize_haproxy_timeout 10 TIMEOUT +assert_fails "sanitize_alpn rejects semicolons" sanitize_alpn "h2;drop" +assert_fails "sanitize_alpn rejects newlines" sanitize_alpn $'h2\nhttp/1.1' +assert_fails "sanitize_alpn rejects spaces" sanitize_alpn "h2, http/1.1" if [[ $failures -eq 0 ]]; then echo "All sanitizer tests passed"