-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathrolling-update.env.example
More file actions
160 lines (143 loc) · 7.6 KB
/
rolling-update.env.example
File metadata and controls
160 lines (143 loc) · 7.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# Copy this file outside the repo or export the same variables in your shell.
# Required: rollout order and advertised raft hosts.
NODES="n1=raft-1.internal.example,n2=raft-2.internal.example,n3=raft-3.internal.example"
# Optional: if SSH targets differ from advertised raft hosts.
# Values may be either hostnames or full user@host targets.
# SSH_TARGETS="n1=admin@ssh-1.internal.example,n2=ssh-2.internal.example,n3=ssh-3.internal.example"
# Optional: override rollout order without changing NODES.
# ROLLING_ORDER="n2,n3,n1"
IMAGE="ghcr.io/bootjp/elastickv:latest"
SSH_USER="deploy"
CONTAINER_NAME="elastickv"
DATA_DIR="/var/lib/elastickv"
SERVER_ENTRYPOINT="/app"
# Default rollout targets the etcd runtime. Keep this aligned with the engine
# that initialized the target data dir; switching an existing data dir between
# hashicorp and etcd is intentionally rejected.
RAFT_ENGINE="etcd"
RAFT_PORT="50051"
REDIS_PORT="6379"
DYNAMO_PORT="8000"
ENABLE_S3="true"
S3_PORT="9000"
S3_REGION="us-east-1"
S3_PATH_STYLE_ONLY="true"
# Optional: path to a JSON file containing static S3 credentials on the remote host.
# The file is bind-mounted read-only into the container.
# S3_CREDENTIALS_FILE="/etc/elastickv/s3-credentials.json"
# SQS-compatible adapter (opt-in). Required to mount the admin
# /admin/api/v1/sqs/* endpoints — those handlers are gated on a
# non-nil sqsServer, which the binary only constructs when
# --sqsAddress is non-empty.
ENABLE_SQS="false"
SQS_PORT="9324"
SQS_REGION="us-east-1"
# Optional: open endpoint when SQS_CREDENTIALS_FILE is empty.
# Same JSON shape as S3_CREDENTIALS_FILE.
# SQS_CREDENTIALS_FILE="/etc/elastickv/sqs-credentials.json"
# Optional: HT-FIFO partition routing map. Empty disables coverage check.
# SQS_FIFO_PARTITION_MAP="orders.fifo:4=1,1,2,2"
# Optional: override if SQS routing addresses differ from raft hosts.
# RAFT_TO_SQS_MAP="raft-1.internal.example:50051=sqs-1.internal.example:9324,..."
# Optional: override if Redis routing addresses differ from the advertised raft hosts.
# RAFT_TO_REDIS_MAP="raft-1.internal.example:50051=redis-1.internal.example:6379,raft-2.internal.example:50051=redis-2.internal.example:6379,raft-3.internal.example:50051=redis-3.internal.example:6379"
# Optional: override if S3 routing addresses differ from the advertised raft hosts.
# RAFT_TO_S3_MAP="raft-1.internal.example:50051=s3-1.internal.example:9000,raft-2.internal.example:50051=s3-2.internal.example:9000,raft-3.internal.example:50051=s3-3.internal.example:9000"
HEALTH_TIMEOUT_SECONDS="60"
LEADERSHIP_TRANSFER_TIMEOUT_SECONDS="30"
LEADER_DISCOVERY_TIMEOUT_SECONDS="30"
ROLLING_DELAY_SECONDS="2"
SSH_CONNECT_TIMEOUT_SECONDS="10"
SSH_STRICT_HOST_KEY_CHECKING="accept-new"
# If set, this binary must already be executable on the local control host.
# RAFTADMIN_BIN="/absolute/path/to/linux/raftadmin"
RAFTADMIN_REMOTE_BIN="/tmp/elastickv-raftadmin"
# Bumped from 5 to 15 (2026-05-22) so leadership-transfer RPCs survive
# raft's transient pre-stable state right after a peer restart. The
# 2026-05-21 reproduction (Actions run 26198185540) needed ~10 s of
# headroom for the candidate's log to catch up before the transfer
# could commit.
RAFTADMIN_RPC_TIMEOUT_SECONDS="15"
RAFTADMIN_ALLOW_INSECURE="true"
# Retry the targeted leadership_transfer_to_server RPC up to N times
# before falling back to generic transfer. Each retry waits
# LEADERSHIP_TRANSFER_RETRY_BACKOFF_SECONDS to let the candidate's
# log catch up. Counts the first attempt toward the budget; set to 1
# to disable retry.
LEADERSHIP_TRANSFER_RETRY_ATTEMPTS="3"
LEADERSHIP_TRANSFER_RETRY_BACKOFF_SECONDS="5"
# OOM defenses applied on 2026-04-24 after kernel OOM-SIGKILL cascades.
# GOMEMLIMIT makes Go GC before the container hits --memory; --memory keeps
# any kill scoped to the container, not host processes. Set either to "" to
# opt out. User EXTRA_ENV keys override matching keys in DEFAULT_EXTRA_ENV.
DEFAULT_EXTRA_ENV="GOMEMLIMIT=1800MiB"
CONTAINER_MEMORY_LIMIT="2500m"
# Admin dashboard. Disabled by default; flip ADMIN_ENABLED=true to turn the
# listener on. When enabled, ADMIN_SESSION_SIGNING_KEY_FILE plus at least one
# of ADMIN_FULL_ACCESS_KEYS / ADMIN_READ_ONLY_ACCESS_KEYS is required. The
# script bind-mounts the referenced files into the container read-only at
# the same path. Read docs/admin.md and docs/admin_deployment.md before
# enabling on a real deployment.
ADMIN_ENABLED="false"
# ADMIN_ADDRESS="0.0.0.0:8080"
# ADMIN_FULL_ACCESS_KEYS="AKIA_ADMIN"
# ADMIN_READ_ONLY_ACCESS_KEYS="AKIA_OBSERVER1,AKIA_OBSERVER2"
# ADMIN_SESSION_SIGNING_KEY_FILE="/etc/elastickv/admin-hs256.b64"
# ADMIN_SESSION_SIGNING_KEY_PREVIOUS_FILE="/etc/elastickv/admin-hs256.previous.b64"
# ADMIN_TLS_CERT_FILE="/etc/elastickv/admin-tls.crt"
# ADMIN_TLS_KEY_FILE="/etc/elastickv/admin-tls.key"
# ADMIN_ALLOW_PLAINTEXT_NON_LOOPBACK="false"
# ADMIN_ALLOW_INSECURE_DEV_COOKIE="false"
# KeyViz heatmap sampler. Disabled by default; flip KEYVIZ_ENABLED=true
# to feed the admin dashboard's /admin/api/v1/keyviz/matrix endpoint.
# The sampler is in-memory and read-only, so it is safe to enable
# regardless of whether ADMIN_ENABLED is on; it just produces no
# callers without --adminEnabled.
#
# KEYVIZ_FANOUT_NODES is an optional comma-separated host:port list of
# every admin listener in the cluster. When set, the admin handler
# merges matrices from each peer so the dashboard renders a cluster-
# wide heatmap regardless of which node served the request. The
# aggregator forwards the operator's session cookie to each peer
# (PR #692), so peers running with --adminEnabled accept the fan-out
# call as long as the cookie is valid on every node — i.e. the same
# admin signing key (ADMIN_SESSION_SIGNING_KEY_FILE) and matching
# role allow-lists must be configured cluster-wide. Peers without
# --adminEnabled expose an unauthenticated keyviz endpoint and
# respond unconditionally.
# See docs/design/2026_04_27_proposed_keyviz_cluster_fanout.md for the
# full design.
KEYVIZ_ENABLED="false"
# KEYVIZ_FANOUT_NODES="10.0.0.1:8080,10.0.0.2:8080,10.0.0.3:8080"
# KeyViz hot-key Top-K drill-down (Phase 2-A++). When
# KEYVIZ_HOT_KEYS_ENABLED=true (and KEYVIZ_ENABLED is also true), every
# tracked route grows a Space-Saving sketch that powers the
# /admin/api/v1/keyviz/hotkeys endpoint and the heatmap cell-click
# drill-down. Disabled-case overhead on the sampler hot path is one
# nil-check branch.
#
# Tuning (all four are sampler-side clamped to the limits documented
# in keyviz/sampler.go; an out-of-range value rounds into the band):
# KEYVIZ_HOT_KEYS_PER_ROUTE — Space-Saving capacity m per route.
# Default 64, max 256. Each route holds at most m keys; with
# m=64 a key whose true frequency f > N/64 is guaranteed to
# survive eviction. Memory: m × (maxKeyLen + 16) per tracked
# route.
# KEYVIZ_HOT_KEYS_SAMPLE_RATE — R: the hot path enqueues 1 in R
# observes. Default 16, max 1024. Higher R = less CPU on the
# hot path + a coarser sketch (the response scales counts back
# up by R and surfaces `error_bound`).
# KEYVIZ_HOT_KEYS_QUEUE_SIZE — bounded channel between the hot
# path and the aggregator goroutine. Default 8192, max 65536.
# Overflows increment `dropped_samples` and set `degraded`
# on the next snapshot.
# KEYVIZ_HOT_KEYS_MAX_KEY_LEN — keys longer than this are skipped
# BEFORE the sample gate and increment `skipped_long_keys`.
# Default 1024, max 4096.
# See docs/design/2026_05_28_proposed_keyviz_hot_key_topk.md §8 for the
# full memory / overhead table.
KEYVIZ_HOT_KEYS_ENABLED="false"
# KEYVIZ_HOT_KEYS_PER_ROUTE="64"
# KEYVIZ_HOT_KEYS_SAMPLE_RATE="16"
# KEYVIZ_HOT_KEYS_QUEUE_SIZE="8192"
# KEYVIZ_HOT_KEYS_MAX_KEY_LEN="1024"