Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
511 changes: 303 additions & 208 deletions frameworks/trillium-tuned/Cargo.lock

Large diffs are not rendered by default.

16 changes: 10 additions & 6 deletions frameworks/trillium-tuned/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,17 @@ version = "0.1.0"
edition = "2024"

[dependencies]
trillium = "1.1"
trillium-tokio = "0.6"
trillium = "1"
trillium-tokio = { version = "0.6", features = ["reuseport"] }
trillium-router = "0.5"
trillium-rustls = "0.11"
trillium-quinn = "0.1"
trillium-websockets = "0.8"
trillium-compression = "0.2"
trillium-logger = "0.4"
trillium-compression = "0.3"
trillium-grpc = "0.2"
trillium-askama = "0.5"
askama = "0.16"
trillium-logger = "0.5"

swansong = "0.3.4"
futures-lite = "2"
Expand All @@ -27,13 +30,14 @@ dashmap = "6"
log = "0.4"
env_logger = "0.11"
mimalloc = { version = "0.1", default-features = false }
socket2 = { version = "0.5", features = ["all"] }
socket2 = { version = "0.6", features = ["all"] }
num_cpus = "1"
signal-hook = "0.3"
signal-hook = "0.4"

[profile.release]
opt-level = 3
codegen-units = 1
lto = "fat"
panic = "abort"
strip = true

2 changes: 2 additions & 0 deletions frameworks/trillium-tuned/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ RUN mkdir src && echo "fn main() {}" > src/main.rs && \
cargo build --release && \
rm -rf src/ target/release/httparena-trillium-tuned* target/release/deps/httparena_trillium_tuned*
COPY src ./src
COPY templates ./templates
COPY askama.toml .
RUN RUSTFLAGS="-C target-cpu=native" cargo build --release

FROM debian:bookworm-slim
Expand Down
3 changes: 3 additions & 0 deletions frameworks/trillium-tuned/askama.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[[escaper]]
path = "crate::handlers::fortunes::NamedHtmlEscaper"
extensions = ["html"]
51 changes: 51 additions & 0 deletions frameworks/trillium-tuned/compose.gateway-h3.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# gateway-h3 — same two-service stack as compose.gateway.yml, but the edge is
# exercised over HTTP/3 (QUIC/UDP). Stock Caddy already binds 8443 on both tcp
# and udp with `protocols h1 h2 h3`, so the proxy config is identical; only the
# load generator differs (h2load-h3). The proxy->server upstream stays UDS h1.
services:
proxy:
build:
context: ./proxy
network_mode: host
cpuset: "${PROXY_CPUSET:-0-15,64-79}"
ulimits:
memlock: -1
nofile:
soft: 1048576
hard: 1048576
security_opt:
- seccomp:unconfined
volumes:
- ${CERTS_DIR}:/certs:ro
- ${DATA_DIR}/static:/data/static:ro
- appsock:/run/app
depends_on:
- server

server:
build:
context: .
dockerfile: Dockerfile
network_mode: host
cpuset: "${SERVER_CPUSET:-16-31,80-95}"
ulimits:
memlock: -1
nofile:
soft: 1048576
hard: 1048576
security_opt:
- seccomp:unconfined
environment:
- LISTEN_UDS=/run/app/app.sock
- DATABASE_URL=${DATABASE_URL}
- DATABASE_MAX_CONN=256
- DATASET_PATH=/data/dataset.json
- STATIC_DIR=/data/static
- RUST_LOG=info
volumes:
- ${DATA_DIR}/dataset.json:/data/dataset.json:ro
- ${DATA_DIR}/static:/data/static:ro
- appsock:/run/app

volumes:
appsock:
58 changes: 58 additions & 0 deletions frameworks/trillium-tuned/compose.gateway.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# gateway-64 — stock Caddy (TLS + h2 on 8443) in front of the trillium-tuned
# server, forwarding dynamic endpoints over a shared Unix domain socket (h1
# keepalive). The server gets no certs, so it comes up cleartext on 8080 + the
# UDS only. n_workers auto-sizes to the server's cpuset via num_cpus.
#
# CPU split is the #1 tuning knob — start even 16/16 physical and sweep via
# PROXY_CPUSET / SERVER_CPUSET. The TLS-terminating proxy is often the
# bottleneck under multiplexed h2 load, so give it more if it saturates.
services:
proxy:
build:
context: ./proxy
network_mode: host
cpuset: "${PROXY_CPUSET:-0-15,64-79}"
ulimits:
memlock: -1
nofile:
soft: 1048576
hard: 1048576
security_opt:
- seccomp:unconfined
volumes:
- ${CERTS_DIR}:/certs:ro
- ${DATA_DIR}/static:/data/static:ro
- appsock:/run/app
depends_on:
- server

server:
build:
context: .
dockerfile: Dockerfile
network_mode: host
cpuset: "${SERVER_CPUSET:-16-31,80-95}"
ulimits:
memlock: -1
nofile:
soft: 1048576
hard: 1048576
security_opt:
- seccomp:unconfined
environment:
- LISTEN_UDS=/run/app/app.sock
- DATABASE_URL=${DATABASE_URL}
- DATABASE_MAX_CONN=256
- DATASET_PATH=/data/dataset.json
- STATIC_DIR=/data/static
- RUST_LOG=info
volumes:
- ${DATA_DIR}/dataset.json:/data/dataset.json:ro
- ${DATA_DIR}/static:/data/static:ro
- appsock:/run/app

# Shared rendezvous for the UDS. network_mode: host shares the net namespace
# but not the filesystem, so the socket needs an explicit shared volume mounted
# into both services at the same path.
volumes:
appsock:
11 changes: 9 additions & 2 deletions frameworks/trillium-tuned/meta.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"language": "Rust",
"type": "tuned",
"engine": "trillium-http",
"description": "Trillium 1.x with one current_thread tokio runtime per CPU, SO_REUSEPORT TCP sharding (single QUIC endpoint for h3), tuned HttpConfig (larger response/body buffers, 64K h2 frames, eager body preallocation), and static files preloaded into memory at startup. sonic-rs for JSON, deadpool-postgres, mimalloc.",
"description": "Trillium 1.x with one current_thread tokio runtime per CPU, SO_REUSEPORT TCP sharding (single QUIC endpoint for h3), tuned HttpConfig (larger response/body buffers, 64K h2 frames, eager body preallocation), and static files preloaded into memory at startup. sonic-rs for JSON, deadpool-postgres, mimalloc, trillium-grpc for the benchmark.BenchmarkService gRPC endpoints.",
"repo": "https://github.com/trillium-rs/trillium",
"enabled": true,
"tests": [
Expand All @@ -17,14 +17,21 @@
"static",
"async-db",
"crud",
"fortunes",
"api-4",
"api-16",
"baseline-h2",
"static-h2",
"baseline-h3",
"static-h3",
"unary-grpc",
"unary-grpc-tls",
"stream-grpc",
"stream-grpc-tls",
"echo-ws",
"echo-ws-pipeline"
"echo-ws-pipeline",
"gateway-64",
"gateway-h3"
],
"maintainers": ["jbr"]
}
37 changes: 37 additions & 0 deletions frameworks/trillium-tuned/proto/benchmark.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Canonical benchmark.proto used by ghz for gRPC streaming tests.
// Each gRPC framework must implement the RPCs it subscribes to in its own
// service; the shapes must match this file exactly.

syntax = "proto3";

package benchmark;

service BenchmarkService {
// Unary — 1 request, 1 reply. Used by unary-grpc (h2load).
rpc GetSum (SumRequest) returns (SumReply);

// Server streaming — 1 request, server emits `count` replies. Used by
// stream-grpc / stream-grpc-tls (ghz).
rpc StreamSum (StreamRequest) returns (stream SumReply);

// Client streaming — N requests streamed in, 1 reply with the total.
rpc CollectSum (stream SumRequest) returns (SumReply);

// Bidirectional streaming — echo: 1 reply per request over a persistent stream.
rpc EchoSum (stream SumRequest) returns (stream SumReply);
}

message SumRequest {
int32 a = 1;
int32 b = 2;
}

message StreamRequest {
int32 a = 1;
int32 b = 2;
int32 count = 3;
}

message SumReply {
int32 result = 1;
}
46 changes: 46 additions & 0 deletions frameworks/trillium-tuned/proxy/Caddyfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Trillium-tuned gateway edge.
#
# Caddy terminates TLS + h1/h2/h3 on 8443, serves /static/* directly from disk
# with precompressed sidecars, and forwards every dynamic endpoint to the
# trillium-tuned server over a Unix domain socket (h1 + keepalive pool).
#
# Same neutral standard-proxy shape as the production entry; "tuned" here means
# the *backend* is the optimized trillium-tuned server (sonic-rs, mimalloc,
# tuned HttpConfig). Tuned rules also allow more aggressive proxy tuning, so the
# keepalive pool is sized generously for the multiplexed gateway load.

{
admin off
auto_https off
servers {
protocols h1 h2 h3
}
}

https://localhost:8443 {
tls /certs/server.crt /certs/server.key

# Static assets served by Caddy directly from /data/static. `precompressed`
# prefers foo.br / foo.gz sidecars per Accept-Encoding, falling back to the
# raw file. (root /data + request /static/foo -> /data/static/foo.)
handle /static/* {
root * /data
file_server {
precompressed br gzip
}
}

# Dynamic endpoints -> trillium-tuned server over the shared UDS. Plain h1
# with a large keepalive pool: many independent streams fan out across the
# backend's per-core workers without h2c's single-connection HoL blocking.
handle {
reverse_proxy unix//run/app/app.sock {
transport http {
versions 1.1
keepalive 5m
keepalive_idle_conns 4096
keepalive_idle_conns_per_host 4096
}
}
}
}
10 changes: 10 additions & 0 deletions frameworks/trillium-tuned/proxy/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Stock Caddy — ships h1/h2/h3 (QUIC) in the base image, no custom build.
# Covers both gateway-64 (h2 over TCP) and gateway-h3 (QUIC over UDP) from
# the same Caddyfile, which binds 8443 on both tcp and udp.
FROM caddy:2-alpine

COPY Caddyfile /etc/caddy/Caddyfile

EXPOSE 8443/tcp 8443/udp

CMD ["caddy", "run", "--config", "/etc/caddy/Caddyfile", "--adapter", "caddyfile"]
75 changes: 75 additions & 0 deletions frameworks/trillium-tuned/src/grpc.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
//! gRPC `benchmark.BenchmarkService`, served over trillium-grpc.
//!
//! Mounts into the same handler tree as the HTTP endpoints — the server handler
//! matches its own `/benchmark.BenchmarkService/*` path prefix and passes every
//! other request through. Exercised by the `unary-grpc` (`GetSum`) and
//! `stream-grpc` (`StreamSum`) profiles over h2c on 8080 and h2-over-TLS on 8443;
//! `CollectSum` and `EchoSum` round out the service the proto defines.

use futures_lite::stream;
use trillium_grpc::{BidiResponder, Channel, GrpcServerConn, Status, Stream};

#[allow(dead_code)] // the generated client half is unused on the server
mod benchmark {
include!("grpc/benchmark.rs");
}

pub use benchmark::BenchmarkServiceServer;
use benchmark::{BenchmarkService, StreamRequest, SumReply, SumRequest};

pub struct Benchmark;

impl BenchmarkService for Benchmark {
async fn get_sum(
&self,
_conn: &mut GrpcServerConn,
request: SumRequest,
) -> Result<SumReply, Status> {
Ok(SumReply {
result: request.a + request.b,
})
}

async fn stream_sum(
&self,
_conn: &mut GrpcServerConn,
request: StreamRequest,
) -> Result<impl Stream<Item = Result<SumReply, Status>> + Send + use<>, Status> {
let result = request.a + request.b;
let count = request.count.max(0) as usize;
Ok(stream::iter(
(0..count).map(move |_| Ok(SumReply { result })),
))
}

async fn collect_sum(&self, conn: &mut GrpcServerConn) -> Result<SumReply, Status> {
let mut result = 0;
let mut requests = conn.requests::<SumRequest>();
while let Some(request) = requests.recv().await? {
result += request.a + request.b;
}
Ok(SumReply { result })
}

async fn echo_sum(
&self,
_conn: &mut GrpcServerConn,
) -> Result<impl BidiResponder<SumRequest, SumReply> + use<>, Status> {
Ok(EchoSum)
}
}

struct EchoSum;

impl BidiResponder<SumRequest, SumReply> for EchoSum {
async fn respond(self, mut channel: Channel<'_, SumRequest, SumReply>) -> Result<(), Status> {
while let Some(request) = channel.recv().await.transpose()? {
channel
.send(SumReply {
result: request.a + request.b,
})
.await?;
}
Ok(())
}
}
Loading