From e706f826df87e5475f35c2713fe4d76639161f13 Mon Sep 17 00:00:00 2001 From: Vincenzo Mauro Date: Wed, 27 May 2026 16:14:13 +0200 Subject: [PATCH 1/8] feat: initial taint addition setup --- .../files/hacluster-taint-sudoers.yaml | 5 ++ .../files/k8s-taint-alert.yaml | 14 ++++ .../files/taint-fenced-node.yaml | 73 +++++++++++++++++++ .../units/taint-node@.service.yaml | 10 +++ 4 files changed, 102 insertions(+) create mode 100644 templates/master/00-master/two-node-with-fencing/files/hacluster-taint-sudoers.yaml create mode 100644 templates/master/00-master/two-node-with-fencing/files/k8s-taint-alert.yaml create mode 100644 templates/master/00-master/two-node-with-fencing/files/taint-fenced-node.yaml create mode 100644 templates/master/00-master/two-node-with-fencing/units/taint-node@.service.yaml diff --git a/templates/master/00-master/two-node-with-fencing/files/hacluster-taint-sudoers.yaml b/templates/master/00-master/two-node-with-fencing/files/hacluster-taint-sudoers.yaml new file mode 100644 index 0000000000..9d833816c9 --- /dev/null +++ b/templates/master/00-master/two-node-with-fencing/files/hacluster-taint-sudoers.yaml @@ -0,0 +1,5 @@ +mode: 0440 +path: "/etc/sudoers.d/hacluster-taint" +contents: + inline: | + hacluster ALL=(root) NOPASSWD: /usr/bin/systemctl start taint-node@* diff --git a/templates/master/00-master/two-node-with-fencing/files/k8s-taint-alert.yaml b/templates/master/00-master/two-node-with-fencing/files/k8s-taint-alert.yaml new file mode 100644 index 0000000000..5ba191804d --- /dev/null +++ b/templates/master/00-master/two-node-with-fencing/files/k8s-taint-alert.yaml @@ -0,0 +1,14 @@ +mode: 0755 +path: "/var/lib/pacemaker/alerts/k8s-taint-alert.sh" +contents: + inline: | + #!/bin/bash + # Pacemaker alert agent for fencing events. + # CRM_alert_kind=fencing + CRM_alert_rc=0 identifies a successful fence + # completion. Device-add/remove/topology notifications never set + # CRM_alert_kind=fencing, so they are already excluded. + if [ "$CRM_alert_kind" = "fencing" ] && [ "$CRM_alert_rc" = "0" ] && [ -n "$CRM_alert_node" ]; then + logger -t k8s-taint-alert "Fencing succeeded for ${CRM_alert_node}, triggering taint service" + sudo systemctl start --no-block "taint-node@${CRM_alert_node}.service" + fi + exit 0 diff --git a/templates/master/00-master/two-node-with-fencing/files/taint-fenced-node.yaml b/templates/master/00-master/two-node-with-fencing/files/taint-fenced-node.yaml new file mode 100644 index 0000000000..8895c58f78 --- /dev/null +++ b/templates/master/00-master/two-node-with-fencing/files/taint-fenced-node.yaml @@ -0,0 +1,73 @@ +mode: 0755 +path: "/usr/local/bin/taint-fenced-node.sh" +contents: + inline: | + #!/bin/bash + set -euo pipefail + FENCED_NODE="$1" + KUBECONFIG="/etc/kubernetes/static-pod-resources/kube-apiserver-certs/secrets/node-kubeconfigs/localhost.kubeconfig" + TAINT_KEY="node.kubernetes.io/out-of-service" + TAINT_VALUE="nodeshutdown" + ANNOTATION_KEY="node.kubernetes.io/out-of-service-applied-by" + ANNOTATION_VALUE="pacemaker" + OC="oc --kubeconfig=$KUBECONFIG" + + if [[ ! "$FENCED_NODE" =~ ^[a-z0-9]([a-z0-9.-]*[a-z0-9])?$ ]]; then + logger -t taint-fenced-node "ERROR: Invalid or empty node name: '${FENCED_NODE:-}'" + exit 1 + fi + + logger -t taint-fenced-node "Waiting for API server to taint fenced node ${FENCED_NODE}" + + # Phase 1: wait for API server to become writable (etcd quorum recovery) + # Read-only checks (oc get nodes) can pass from kube-apiserver watch cache + # before etcd is fully writable. Use a write probe to confirm writes work. + until $OC annotate node "$(hostname)" --overwrite __taint-probe-="" &>/dev/null; do + sleep 5 + done + + # Phase 2: verify the fenced node exists + if ! $OC get node "$FENCED_NODE" &>/dev/null; then + logger -t taint-fenced-node "ERROR: Node ${FENCED_NODE} not found in cluster — name mismatch?" + exit 1 + fi + + # Phase 3: only taint if the node is still NotReady (see Open Issues — race condition) + NODE_READY=$($OC get node "$FENCED_NODE" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}') + if [ "$NODE_READY" = "True" ]; then + logger -t taint-fenced-node "Node ${FENCED_NODE} is already Ready — skipping taint" + exit 0 + fi + + logger -t taint-fenced-node "API server responsive, node ${FENCED_NODE} is NotReady — applying annotation and taint" + + # Apply annotation + taint atomically via a single strategic-merge patch. + # This eliminates the crash-safety concern of two separate API calls + # (annotation-then-taint ordering) — both land in one etcd write. + PATCH=$(cat </dev/null) || true + if [ "$NODE_READY" = "True" ]; then + logger -t taint-fenced-node "Node ${FENCED_NODE} became Ready before taint could be applied — aborting" + exit 0 + fi + + if $OC patch node "$FENCED_NODE" --type=strategic -p "$PATCH" 2>/dev/null; then + logger -t taint-fenced-node "Successfully tainted and annotated ${FENCED_NODE}" + exit 0 + fi + + logger -t taint-fenced-node "Attempt ${attempt}/${MAX_RETRIES} failed, retrying in ${RETRY_INTERVAL}s" + sleep $RETRY_INTERVAL + done + + logger -t taint-fenced-node "ERROR: Failed to taint ${FENCED_NODE} after ${MAX_RETRIES} attempts" + exit 1 diff --git a/templates/master/00-master/two-node-with-fencing/units/taint-node@.service.yaml b/templates/master/00-master/two-node-with-fencing/units/taint-node@.service.yaml new file mode 100644 index 0000000000..f762da2a29 --- /dev/null +++ b/templates/master/00-master/two-node-with-fencing/units/taint-node@.service.yaml @@ -0,0 +1,10 @@ +name: "taint-node@.service" +contents: | + [Unit] + Description=Taint fenced node %i in Kubernetes + After=network.target + + [Service] + Type=oneshot + ExecStart=/usr/local/bin/taint-fenced-node.sh %i + TimeoutStartSec=600 From 1f50cc1ea40e2778b9b4b1acc2ceb6b93a682b12 Mon Sep 17 00:00:00 2001 From: Vincenzo Mauro Date: Thu, 28 May 2026 13:54:08 +0200 Subject: [PATCH 2/8] scripts fixes and CR comments --- .../files/hacluster-taint-sudoers.yaml | 2 +- .../files/k8s-taint-alert.yaml | 4 ++ .../files/taint-fenced-node.yaml | 45 ++++++++++++++----- 3 files changed, 38 insertions(+), 13 deletions(-) diff --git a/templates/master/00-master/two-node-with-fencing/files/hacluster-taint-sudoers.yaml b/templates/master/00-master/two-node-with-fencing/files/hacluster-taint-sudoers.yaml index 9d833816c9..30b9a32c50 100644 --- a/templates/master/00-master/two-node-with-fencing/files/hacluster-taint-sudoers.yaml +++ b/templates/master/00-master/two-node-with-fencing/files/hacluster-taint-sudoers.yaml @@ -2,4 +2,4 @@ mode: 0440 path: "/etc/sudoers.d/hacluster-taint" contents: inline: | - hacluster ALL=(root) NOPASSWD: /usr/bin/systemctl start taint-node@* + hacluster ALL=(root) NOPASSWD: /usr/bin/systemctl start --no-block taint-node@* diff --git a/templates/master/00-master/two-node-with-fencing/files/k8s-taint-alert.yaml b/templates/master/00-master/two-node-with-fencing/files/k8s-taint-alert.yaml index 5ba191804d..53c8a6c582 100644 --- a/templates/master/00-master/two-node-with-fencing/files/k8s-taint-alert.yaml +++ b/templates/master/00-master/two-node-with-fencing/files/k8s-taint-alert.yaml @@ -8,6 +8,10 @@ contents: # completion. Device-add/remove/topology notifications never set # CRM_alert_kind=fencing, so they are already excluded. if [ "$CRM_alert_kind" = "fencing" ] && [ "$CRM_alert_rc" = "0" ] && [ -n "$CRM_alert_node" ]; then + if ! [[ "$CRM_alert_node" =~ ^[a-zA-Z0-9._-]+$ ]]; then + logger -t k8s-taint-alert "ERROR: Invalid node name from pacemaker: '${CRM_alert_node}'" + exit 0 + fi logger -t k8s-taint-alert "Fencing succeeded for ${CRM_alert_node}, triggering taint service" sudo systemctl start --no-block "taint-node@${CRM_alert_node}.service" fi diff --git a/templates/master/00-master/two-node-with-fencing/files/taint-fenced-node.yaml b/templates/master/00-master/two-node-with-fencing/files/taint-fenced-node.yaml index 8895c58f78..eedc776ffc 100644 --- a/templates/master/00-master/two-node-with-fencing/files/taint-fenced-node.yaml +++ b/templates/master/00-master/two-node-with-fencing/files/taint-fenced-node.yaml @@ -22,28 +22,46 @@ contents: # Phase 1: wait for API server to become writable (etcd quorum recovery) # Read-only checks (oc get nodes) can pass from kube-apiserver watch cache # before etcd is fully writable. Use a write probe to confirm writes work. - until $OC annotate node "$(hostname)" --overwrite __taint-probe-="" &>/dev/null; do + until $OC annotate node "$(hostname)" --overwrite tnf-taint-probe="" &>/dev/null; do sleep 5 done # Phase 2: verify the fenced node exists if ! $OC get node "$FENCED_NODE" &>/dev/null; then - logger -t taint-fenced-node "ERROR: Node ${FENCED_NODE} not found in cluster — name mismatch?" + logger -t taint-fenced-node "ERROR: Node ${FENCED_NODE} not found in cluster - name mismatch?" exit 1 fi - # Phase 3: only taint if the node is still NotReady (see Open Issues — race condition) + # Phase 3: check if fenced node is still an active etcd voter. + # After fencing, CEO removes the dead member from etcd. If the node + # is no longer a voter, fencing is confirmed and the node's Ready + # status may be stale (node controller hasn't caught up yet). + node_is_etcd_voter() { + local node="$1" + local members + members=$(podman exec etcd sh -lc 'etcdctl member list -w json' 2>/dev/null) || return 1 + echo "$members" | python3 -c " + import sys, json + data = json.load(sys.stdin) + for m in data.get('members', []): + if m.get('name') == '$node' and not m.get('isLearner', False): + sys.exit(0) + sys.exit(1) + " 2>/dev/null + } + NODE_READY=$($OC get node "$FENCED_NODE" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}') if [ "$NODE_READY" = "True" ]; then - logger -t taint-fenced-node "Node ${FENCED_NODE} is already Ready — skipping taint" - exit 0 + if node_is_etcd_voter "$FENCED_NODE"; then + logger -t taint-fenced-node "Node ${FENCED_NODE} is Ready and still an etcd voter - skipping taint" + exit 0 + fi + logger -t taint-fenced-node "Node ${FENCED_NODE} shows Ready but is no longer an etcd voter - status is stale, proceeding with taint" fi - logger -t taint-fenced-node "API server responsive, node ${FENCED_NODE} is NotReady — applying annotation and taint" + logger -t taint-fenced-node "API server responsive, applying annotation and taint to ${FENCED_NODE}" # Apply annotation + taint atomically via a single strategic-merge patch. - # This eliminates the crash-safety concern of two separate API calls - # (annotation-then-taint ordering) — both land in one etcd write. PATCH=$(cat </dev/null) || true - if [ "$NODE_READY" = "True" ]; then - logger -t taint-fenced-node "Node ${FENCED_NODE} became Ready before taint could be applied — aborting" + # Re-check node existence and status on every attempt + if ! NODE_READY=$($OC get node "$FENCED_NODE" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null); then + logger -t taint-fenced-node "ERROR: Node ${FENCED_NODE} no longer exists - aborting" + exit 1 + fi + if [ "$NODE_READY" = "True" ] && node_is_etcd_voter "$FENCED_NODE"; then + logger -t taint-fenced-node "Node ${FENCED_NODE} became Ready and rejoined etcd - aborting" exit 0 fi From ceb1f70528d17c003fb3f8e7677fad87739ae2ed Mon Sep 17 00:00:00 2001 From: Vincenzo Mauro Date: Thu, 28 May 2026 17:35:03 +0200 Subject: [PATCH 3/8] eat: add untaint-on-rejoin alert agent and remove etcd voter check from taint logic --- .../files/hacluster-taint-sudoers.yaml | 1 + .../files/k8s-untaint-alert.yaml | 17 ++++ .../files/taint-fenced-node.yaml | 37 --------- .../files/untaint-fenced-node.yaml | 79 +++++++++++++++++++ .../units/untaint-node@.service.yaml | 10 +++ 5 files changed, 107 insertions(+), 37 deletions(-) create mode 100644 templates/master/00-master/two-node-with-fencing/files/k8s-untaint-alert.yaml create mode 100644 templates/master/00-master/two-node-with-fencing/files/untaint-fenced-node.yaml create mode 100644 templates/master/00-master/two-node-with-fencing/units/untaint-node@.service.yaml diff --git a/templates/master/00-master/two-node-with-fencing/files/hacluster-taint-sudoers.yaml b/templates/master/00-master/two-node-with-fencing/files/hacluster-taint-sudoers.yaml index 30b9a32c50..cdca638b9d 100644 --- a/templates/master/00-master/two-node-with-fencing/files/hacluster-taint-sudoers.yaml +++ b/templates/master/00-master/two-node-with-fencing/files/hacluster-taint-sudoers.yaml @@ -3,3 +3,4 @@ path: "/etc/sudoers.d/hacluster-taint" contents: inline: | hacluster ALL=(root) NOPASSWD: /usr/bin/systemctl start --no-block taint-node@* + hacluster ALL=(root) NOPASSWD: /usr/bin/systemctl start --no-block untaint-node@* diff --git a/templates/master/00-master/two-node-with-fencing/files/k8s-untaint-alert.yaml b/templates/master/00-master/two-node-with-fencing/files/k8s-untaint-alert.yaml new file mode 100644 index 0000000000..a58b142a06 --- /dev/null +++ b/templates/master/00-master/two-node-with-fencing/files/k8s-untaint-alert.yaml @@ -0,0 +1,17 @@ +mode: 0755 +path: "/var/lib/pacemaker/alerts/k8s-untaint-alert.sh" +contents: + inline: | + #!/bin/bash + # Pacemaker alert agent for node-rejoin events. + # CRM_alert_kind=node + CRM_alert_desc=member identifies a node + # that has rejoined the cluster after being fenced. + if [ "$CRM_alert_kind" = "node" ] && [ "$CRM_alert_desc" = "member" ] && [ -n "$CRM_alert_node" ]; then + if ! [[ "$CRM_alert_node" =~ ^[a-zA-Z0-9._-]+$ ]]; then + logger -t k8s-untaint-alert "ERROR: Invalid node name from pacemaker: '${CRM_alert_node}'" + exit 0 + fi + logger -t k8s-untaint-alert "Node ${CRM_alert_node} rejoined cluster, triggering untaint service" + sudo systemctl start --no-block "untaint-node@${CRM_alert_node}.service" + fi + exit 0 diff --git a/templates/master/00-master/two-node-with-fencing/files/taint-fenced-node.yaml b/templates/master/00-master/two-node-with-fencing/files/taint-fenced-node.yaml index eedc776ffc..5e34aef541 100644 --- a/templates/master/00-master/two-node-with-fencing/files/taint-fenced-node.yaml +++ b/templates/master/00-master/two-node-with-fencing/files/taint-fenced-node.yaml @@ -32,33 +32,6 @@ contents: exit 1 fi - # Phase 3: check if fenced node is still an active etcd voter. - # After fencing, CEO removes the dead member from etcd. If the node - # is no longer a voter, fencing is confirmed and the node's Ready - # status may be stale (node controller hasn't caught up yet). - node_is_etcd_voter() { - local node="$1" - local members - members=$(podman exec etcd sh -lc 'etcdctl member list -w json' 2>/dev/null) || return 1 - echo "$members" | python3 -c " - import sys, json - data = json.load(sys.stdin) - for m in data.get('members', []): - if m.get('name') == '$node' and not m.get('isLearner', False): - sys.exit(0) - sys.exit(1) - " 2>/dev/null - } - - NODE_READY=$($OC get node "$FENCED_NODE" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}') - if [ "$NODE_READY" = "True" ]; then - if node_is_etcd_voter "$FENCED_NODE"; then - logger -t taint-fenced-node "Node ${FENCED_NODE} is Ready and still an etcd voter - skipping taint" - exit 0 - fi - logger -t taint-fenced-node "Node ${FENCED_NODE} shows Ready but is no longer an etcd voter - status is stale, proceeding with taint" - fi - logger -t taint-fenced-node "API server responsive, applying annotation and taint to ${FENCED_NODE}" # Apply annotation + taint atomically via a single strategic-merge patch. @@ -71,16 +44,6 @@ contents: MAX_RETRIES=12 RETRY_INTERVAL=5 for attempt in $(seq 1 $MAX_RETRIES); do - # Re-check node existence and status on every attempt - if ! NODE_READY=$($OC get node "$FENCED_NODE" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null); then - logger -t taint-fenced-node "ERROR: Node ${FENCED_NODE} no longer exists - aborting" - exit 1 - fi - if [ "$NODE_READY" = "True" ] && node_is_etcd_voter "$FENCED_NODE"; then - logger -t taint-fenced-node "Node ${FENCED_NODE} became Ready and rejoined etcd - aborting" - exit 0 - fi - if $OC patch node "$FENCED_NODE" --type=strategic -p "$PATCH" 2>/dev/null; then logger -t taint-fenced-node "Successfully tainted and annotated ${FENCED_NODE}" exit 0 diff --git a/templates/master/00-master/two-node-with-fencing/files/untaint-fenced-node.yaml b/templates/master/00-master/two-node-with-fencing/files/untaint-fenced-node.yaml new file mode 100644 index 0000000000..03685b6c1e --- /dev/null +++ b/templates/master/00-master/two-node-with-fencing/files/untaint-fenced-node.yaml @@ -0,0 +1,79 @@ +mode: 0755 +path: "/usr/local/bin/untaint-fenced-node.sh" +contents: + inline: | + #!/bin/bash + set -euo pipefail + REJOINED_NODE="$1" + KUBECONFIG="/etc/kubernetes/static-pod-resources/kube-apiserver-certs/secrets/node-kubeconfigs/localhost.kubeconfig" + TAINT_KEY="node.kubernetes.io/out-of-service" + ANNOTATION_KEY="node.kubernetes.io/out-of-service-applied-by" + ANNOTATION_VALUE="pacemaker" + OC="oc --kubeconfig=$KUBECONFIG" + + if [[ ! "$REJOINED_NODE" =~ ^[a-z0-9]([a-z0-9.-]*[a-z0-9])?$ ]]; then + logger -t untaint-fenced-node "ERROR: Invalid or empty node name: '${REJOINED_NODE:-}'" + exit 1 + fi + + logger -t untaint-fenced-node "Node ${REJOINED_NODE} rejoined cluster, checking if untaint is needed" + + # Phase 1: wait for API server to become writable + until $OC annotate node "$(hostname)" --overwrite tnf-taint-probe="" &>/dev/null; do + sleep 5 + done + + # Phase 2: verify the node exists + if ! $OC get node "$REJOINED_NODE" &>/dev/null; then + logger -t untaint-fenced-node "ERROR: Node ${REJOINED_NODE} not found in cluster - name mismatch?" + exit 1 + fi + + # Phase 3: check if we applied the taint (annotation guard) + APPLIED_BY=$($OC get node "$REJOINED_NODE" -o jsonpath="{.metadata.annotations['${ANNOTATION_KEY}']}" 2>/dev/null) || true + if [ "$APPLIED_BY" != "$ANNOTATION_VALUE" ]; then + logger -t untaint-fenced-node "Node ${REJOINED_NODE} has no pacemaker out-of-service annotation - nothing to do" + exit 0 + fi + + # Phase 4: wait for the node to become Ready (kubelet is running) + MAX_READY_WAIT=60 + READY_INTERVAL=5 + for i in $(seq 1 $((MAX_READY_WAIT / READY_INTERVAL))); do + NODE_READY=$($OC get node "$REJOINED_NODE" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null) || true + if [ "$NODE_READY" = "True" ]; then + break + fi + logger -t untaint-fenced-node "Waiting for node ${REJOINED_NODE} to become Ready (attempt ${i})" + sleep $READY_INTERVAL + done + + if [ "$NODE_READY" != "True" ]; then + logger -t untaint-fenced-node "WARNING: Node ${REJOINED_NODE} not Ready after ${MAX_READY_WAIT}s - proceeding with untaint anyway" + fi + + logger -t untaint-fenced-node "Removing out-of-service taint and annotation from ${REJOINED_NODE}" + + # Phase 5: remove taint and annotation with retries + MAX_RETRIES=12 + RETRY_INTERVAL=5 + for attempt in $(seq 1 $MAX_RETRIES); do + # Re-check annotation in case another process already cleaned up + APPLIED_BY=$($OC get node "$REJOINED_NODE" -o jsonpath="{.metadata.annotations['${ANNOTATION_KEY}']}" 2>/dev/null) || true + if [ "$APPLIED_BY" != "$ANNOTATION_VALUE" ]; then + logger -t untaint-fenced-node "Annotation already removed from ${REJOINED_NODE} - done" + exit 0 + fi + + if $OC adm taint node "$REJOINED_NODE" "${TAINT_KEY}-" 2>/dev/null && \ + $OC annotate node "$REJOINED_NODE" "${ANNOTATION_KEY}-" 2>/dev/null; then + logger -t untaint-fenced-node "Successfully untainted and removed annotation from ${REJOINED_NODE}" + exit 0 + fi + + logger -t untaint-fenced-node "Attempt ${attempt}/${MAX_RETRIES} failed, retrying in ${RETRY_INTERVAL}s" + sleep $RETRY_INTERVAL + done + + logger -t untaint-fenced-node "ERROR: Failed to untaint ${REJOINED_NODE} after ${MAX_RETRIES} attempts" + exit 1 diff --git a/templates/master/00-master/two-node-with-fencing/units/untaint-node@.service.yaml b/templates/master/00-master/two-node-with-fencing/units/untaint-node@.service.yaml new file mode 100644 index 0000000000..91e148d951 --- /dev/null +++ b/templates/master/00-master/two-node-with-fencing/units/untaint-node@.service.yaml @@ -0,0 +1,10 @@ +name: "untaint-node@.service" +contents: | + [Unit] + Description=Untaint rejoined node %i in Kubernetes + After=network.target + + [Service] + Type=oneshot + ExecStart=/usr/local/bin/untaint-fenced-node.sh %i + TimeoutStartSec=600 From a355aa808b9423be39bffbb7b9b0b26fc2960101 Mon Sep 17 00:00:00 2001 From: Vincenzo Mauro Date: Fri, 29 May 2026 11:36:43 +0200 Subject: [PATCH 4/8] CR comments and bug fixes --- .../files/k8s-taint-alert.yaml | 2 +- .../files/k8s-untaint-alert.yaml | 7 ++- .../files/taint-fenced-node.yaml | 20 +-------- .../files/untaint-fenced-node.yaml | 44 ++++++++++++++----- .../units/untaint-node@.service.yaml | 2 +- 5 files changed, 41 insertions(+), 34 deletions(-) diff --git a/templates/master/00-master/two-node-with-fencing/files/k8s-taint-alert.yaml b/templates/master/00-master/two-node-with-fencing/files/k8s-taint-alert.yaml index 53c8a6c582..baf7714cfe 100644 --- a/templates/master/00-master/two-node-with-fencing/files/k8s-taint-alert.yaml +++ b/templates/master/00-master/two-node-with-fencing/files/k8s-taint-alert.yaml @@ -8,7 +8,7 @@ contents: # completion. Device-add/remove/topology notifications never set # CRM_alert_kind=fencing, so they are already excluded. if [ "$CRM_alert_kind" = "fencing" ] && [ "$CRM_alert_rc" = "0" ] && [ -n "$CRM_alert_node" ]; then - if ! [[ "$CRM_alert_node" =~ ^[a-zA-Z0-9._-]+$ ]]; then + if ! [[ "$CRM_alert_node" =~ ^[a-z0-9]([a-z0-9.-]*[a-z0-9])?$ ]]; then logger -t k8s-taint-alert "ERROR: Invalid node name from pacemaker: '${CRM_alert_node}'" exit 0 fi diff --git a/templates/master/00-master/two-node-with-fencing/files/k8s-untaint-alert.yaml b/templates/master/00-master/two-node-with-fencing/files/k8s-untaint-alert.yaml index a58b142a06..1d7440f67b 100644 --- a/templates/master/00-master/two-node-with-fencing/files/k8s-untaint-alert.yaml +++ b/templates/master/00-master/two-node-with-fencing/files/k8s-untaint-alert.yaml @@ -7,11 +7,14 @@ contents: # CRM_alert_kind=node + CRM_alert_desc=member identifies a node # that has rejoined the cluster after being fenced. if [ "$CRM_alert_kind" = "node" ] && [ "$CRM_alert_desc" = "member" ] && [ -n "$CRM_alert_node" ]; then - if ! [[ "$CRM_alert_node" =~ ^[a-zA-Z0-9._-]+$ ]]; then + if ! [[ "$CRM_alert_node" =~ ^[a-z0-9]([a-z0-9.-]*[a-z0-9])?$ ]]; then logger -t k8s-untaint-alert "ERROR: Invalid node name from pacemaker: '${CRM_alert_node}'" exit 0 fi logger -t k8s-untaint-alert "Node ${CRM_alert_node} rejoined cluster, triggering untaint service" - sudo systemctl start --no-block "untaint-node@${CRM_alert_node}.service" + if ! sudo systemctl start --no-block "untaint-node@${CRM_alert_node}.service"; then + logger -t k8s-untaint-alert "ERROR: Failed to start untaint-node@${CRM_alert_node}.service" + exit 1 + fi fi exit 0 diff --git a/templates/master/00-master/two-node-with-fencing/files/taint-fenced-node.yaml b/templates/master/00-master/two-node-with-fencing/files/taint-fenced-node.yaml index 5e34aef541..3c3ea01310 100644 --- a/templates/master/00-master/two-node-with-fencing/files/taint-fenced-node.yaml +++ b/templates/master/00-master/two-node-with-fencing/files/taint-fenced-node.yaml @@ -17,31 +17,15 @@ contents: exit 1 fi - logger -t taint-fenced-node "Waiting for API server to taint fenced node ${FENCED_NODE}" + logger -t taint-fenced-node "Applying out-of-service taint to fenced node ${FENCED_NODE}" - # Phase 1: wait for API server to become writable (etcd quorum recovery) - # Read-only checks (oc get nodes) can pass from kube-apiserver watch cache - # before etcd is fully writable. Use a write probe to confirm writes work. - until $OC annotate node "$(hostname)" --overwrite tnf-taint-probe="" &>/dev/null; do - sleep 5 - done - - # Phase 2: verify the fenced node exists - if ! $OC get node "$FENCED_NODE" &>/dev/null; then - logger -t taint-fenced-node "ERROR: Node ${FENCED_NODE} not found in cluster - name mismatch?" - exit 1 - fi - - logger -t taint-fenced-node "API server responsive, applying annotation and taint to ${FENCED_NODE}" - - # Apply annotation + taint atomically via a single strategic-merge patch. PATCH=$(cat </dev/null; then diff --git a/templates/master/00-master/two-node-with-fencing/files/untaint-fenced-node.yaml b/templates/master/00-master/two-node-with-fencing/files/untaint-fenced-node.yaml index 03685b6c1e..f1d8991db1 100644 --- a/templates/master/00-master/two-node-with-fencing/files/untaint-fenced-node.yaml +++ b/templates/master/00-master/two-node-with-fencing/files/untaint-fenced-node.yaml @@ -18,25 +18,38 @@ contents: logger -t untaint-fenced-node "Node ${REJOINED_NODE} rejoined cluster, checking if untaint is needed" - # Phase 1: wait for API server to become writable - until $OC annotate node "$(hostname)" --overwrite tnf-taint-probe="" &>/dev/null; do - sleep 5 + # Phase 1: wait for pacemaker to report the node as Online. + # The alert fires on corosync membership ("member"), but pacemaker Online + # confirms the full cluster join protocol succeeded — the node has probed + # its resources and registered with the DC. This prevents untainting a node + # that joined corosync but may still fail pacemaker startup and get re-fenced. + MAX_PCMK_WAIT=120 + PCMK_INTERVAL=5 + PCMK_ONLINE=false + for i in $(seq 1 $((MAX_PCMK_WAIT / PCMK_INTERVAL))); do + if crm_mon -1 2>/dev/null | grep "Online:" | grep -qw "$REJOINED_NODE"; then + PCMK_ONLINE=true + break + fi + logger -t untaint-fenced-node "Waiting for pacemaker to report ${REJOINED_NODE} as Online (attempt ${i})" + sleep $PCMK_INTERVAL done - # Phase 2: verify the node exists - if ! $OC get node "$REJOINED_NODE" &>/dev/null; then - logger -t untaint-fenced-node "ERROR: Node ${REJOINED_NODE} not found in cluster - name mismatch?" + if [ "$PCMK_ONLINE" != "true" ]; then + logger -t untaint-fenced-node "ERROR: Pacemaker does not report ${REJOINED_NODE} as Online after ${MAX_PCMK_WAIT}s - aborting untaint" exit 1 fi - # Phase 3: check if we applied the taint (annotation guard) + logger -t untaint-fenced-node "Pacemaker reports ${REJOINED_NODE} as Online" + + # Phase 2: check if we applied the taint (annotation guard) APPLIED_BY=$($OC get node "$REJOINED_NODE" -o jsonpath="{.metadata.annotations['${ANNOTATION_KEY}']}" 2>/dev/null) || true if [ "$APPLIED_BY" != "$ANNOTATION_VALUE" ]; then logger -t untaint-fenced-node "Node ${REJOINED_NODE} has no pacemaker out-of-service annotation - nothing to do" exit 0 fi - # Phase 4: wait for the node to become Ready (kubelet is running) + # Phase 3: wait for the node to become Ready (kubelet is running) MAX_READY_WAIT=60 READY_INTERVAL=5 for i in $(seq 1 $((MAX_READY_WAIT / READY_INTERVAL))); do @@ -54,7 +67,9 @@ contents: logger -t untaint-fenced-node "Removing out-of-service taint and annotation from ${REJOINED_NODE}" - # Phase 5: remove taint and annotation with retries + # Phase 4: remove taint and annotation with retries. + # Operations are independent — if taint was already removed (partial prior run), + # we still proceed to remove the annotation. MAX_RETRIES=12 RETRY_INTERVAL=5 for attempt in $(seq 1 $MAX_RETRIES); do @@ -65,13 +80,18 @@ contents: exit 0 fi - if $OC adm taint node "$REJOINED_NODE" "${TAINT_KEY}-" 2>/dev/null && \ - $OC annotate node "$REJOINED_NODE" "${ANNOTATION_KEY}-" 2>/dev/null; then + $OC adm taint node "$REJOINED_NODE" "${TAINT_KEY}-" 2>/dev/null || true + $OC annotate node "$REJOINED_NODE" "${ANNOTATION_KEY}-" 2>/dev/null || true + + # Verify both are actually gone + REMAINING_TAINT=$($OC get node "$REJOINED_NODE" -o jsonpath="{.spec.taints[?(@.key=='${TAINT_KEY}')].key}" 2>/dev/null) || true + REMAINING_ANNOT=$($OC get node "$REJOINED_NODE" -o jsonpath="{.metadata.annotations['${ANNOTATION_KEY}']}" 2>/dev/null) || true + if [ -z "$REMAINING_TAINT" ] && [ -z "$REMAINING_ANNOT" ]; then logger -t untaint-fenced-node "Successfully untainted and removed annotation from ${REJOINED_NODE}" exit 0 fi - logger -t untaint-fenced-node "Attempt ${attempt}/${MAX_RETRIES} failed, retrying in ${RETRY_INTERVAL}s" + logger -t untaint-fenced-node "Attempt ${attempt}/${MAX_RETRIES}: taint='${REMAINING_TAINT}' annotation='${REMAINING_ANNOT}' - retrying in ${RETRY_INTERVAL}s" sleep $RETRY_INTERVAL done diff --git a/templates/master/00-master/two-node-with-fencing/units/untaint-node@.service.yaml b/templates/master/00-master/two-node-with-fencing/units/untaint-node@.service.yaml index 91e148d951..02d5470636 100644 --- a/templates/master/00-master/two-node-with-fencing/units/untaint-node@.service.yaml +++ b/templates/master/00-master/two-node-with-fencing/units/untaint-node@.service.yaml @@ -2,7 +2,7 @@ name: "untaint-node@.service" contents: | [Unit] Description=Untaint rejoined node %i in Kubernetes - After=network.target + After=network.target taint-node@%i.service [Service] Type=oneshot From dedcee4fc1454d8e2cecdd0f7d3f1823646e1119 Mon Sep 17 00:00:00 2001 From: Vincenzo Mauro Date: Fri, 29 May 2026 12:15:40 +0200 Subject: [PATCH 5/8] untaint logic fixes --- .../files/k8s-taint-alert.yaml | 18 ---------- .../files/taint-fenced-node.yaml | 4 +-- .../files/tnf-taint-alert.yaml | 14 ++++++++ ...aint-alert.yaml => tnf-untaint-alert.yaml} | 10 ++---- .../files/untaint-fenced-node.yaml | 36 +++++++++++-------- 5 files changed, 40 insertions(+), 42 deletions(-) delete mode 100644 templates/master/00-master/two-node-with-fencing/files/k8s-taint-alert.yaml create mode 100644 templates/master/00-master/two-node-with-fencing/files/tnf-taint-alert.yaml rename templates/master/00-master/two-node-with-fencing/files/{k8s-untaint-alert.yaml => tnf-untaint-alert.yaml} (58%) diff --git a/templates/master/00-master/two-node-with-fencing/files/k8s-taint-alert.yaml b/templates/master/00-master/two-node-with-fencing/files/k8s-taint-alert.yaml deleted file mode 100644 index baf7714cfe..0000000000 --- a/templates/master/00-master/two-node-with-fencing/files/k8s-taint-alert.yaml +++ /dev/null @@ -1,18 +0,0 @@ -mode: 0755 -path: "/var/lib/pacemaker/alerts/k8s-taint-alert.sh" -contents: - inline: | - #!/bin/bash - # Pacemaker alert agent for fencing events. - # CRM_alert_kind=fencing + CRM_alert_rc=0 identifies a successful fence - # completion. Device-add/remove/topology notifications never set - # CRM_alert_kind=fencing, so they are already excluded. - if [ "$CRM_alert_kind" = "fencing" ] && [ "$CRM_alert_rc" = "0" ] && [ -n "$CRM_alert_node" ]; then - if ! [[ "$CRM_alert_node" =~ ^[a-z0-9]([a-z0-9.-]*[a-z0-9])?$ ]]; then - logger -t k8s-taint-alert "ERROR: Invalid node name from pacemaker: '${CRM_alert_node}'" - exit 0 - fi - logger -t k8s-taint-alert "Fencing succeeded for ${CRM_alert_node}, triggering taint service" - sudo systemctl start --no-block "taint-node@${CRM_alert_node}.service" - fi - exit 0 diff --git a/templates/master/00-master/two-node-with-fencing/files/taint-fenced-node.yaml b/templates/master/00-master/two-node-with-fencing/files/taint-fenced-node.yaml index 3c3ea01310..15136bcc40 100644 --- a/templates/master/00-master/two-node-with-fencing/files/taint-fenced-node.yaml +++ b/templates/master/00-master/two-node-with-fencing/files/taint-fenced-node.yaml @@ -12,8 +12,8 @@ contents: ANNOTATION_VALUE="pacemaker" OC="oc --kubeconfig=$KUBECONFIG" - if [[ ! "$FENCED_NODE" =~ ^[a-z0-9]([a-z0-9.-]*[a-z0-9])?$ ]]; then - logger -t taint-fenced-node "ERROR: Invalid or empty node name: '${FENCED_NODE:-}'" + if [[ -z "${FENCED_NODE:-}" ]]; then + logger -t taint-fenced-node "ERROR: Node name is empty or not set" exit 1 fi diff --git a/templates/master/00-master/two-node-with-fencing/files/tnf-taint-alert.yaml b/templates/master/00-master/two-node-with-fencing/files/tnf-taint-alert.yaml new file mode 100644 index 0000000000..0e03d01162 --- /dev/null +++ b/templates/master/00-master/two-node-with-fencing/files/tnf-taint-alert.yaml @@ -0,0 +1,14 @@ +mode: 0755 +path: "/var/lib/pacemaker/alerts/tnf-taint-alert.sh" +contents: + inline: | + #!/bin/bash + # Pacemaker alert agent for fencing events. + # CRM_alert_kind=fencing, CRM_alert_rc=0 identifies a successful fence + # completion. + # CRM_alert_kind=fencing, so they are already excluded. + if [ "$CRM_alert_kind" = "fencing" ] && [ "$CRM_alert_rc" = "0" ] && [ -n "$CRM_alert_node" ]; then + logger -t tnf-taint-alert "Fencing succeeded for ${CRM_alert_node}, triggering taint service" + sudo systemctl start --no-block "taint-node@${CRM_alert_node}.service" + fi + exit 0 diff --git a/templates/master/00-master/two-node-with-fencing/files/k8s-untaint-alert.yaml b/templates/master/00-master/two-node-with-fencing/files/tnf-untaint-alert.yaml similarity index 58% rename from templates/master/00-master/two-node-with-fencing/files/k8s-untaint-alert.yaml rename to templates/master/00-master/two-node-with-fencing/files/tnf-untaint-alert.yaml index 1d7440f67b..892ba29449 100644 --- a/templates/master/00-master/two-node-with-fencing/files/k8s-untaint-alert.yaml +++ b/templates/master/00-master/two-node-with-fencing/files/tnf-untaint-alert.yaml @@ -1,5 +1,5 @@ mode: 0755 -path: "/var/lib/pacemaker/alerts/k8s-untaint-alert.sh" +path: "/var/lib/pacemaker/alerts/tnf-untaint-alert.sh" contents: inline: | #!/bin/bash @@ -7,13 +7,9 @@ contents: # CRM_alert_kind=node + CRM_alert_desc=member identifies a node # that has rejoined the cluster after being fenced. if [ "$CRM_alert_kind" = "node" ] && [ "$CRM_alert_desc" = "member" ] && [ -n "$CRM_alert_node" ]; then - if ! [[ "$CRM_alert_node" =~ ^[a-z0-9]([a-z0-9.-]*[a-z0-9])?$ ]]; then - logger -t k8s-untaint-alert "ERROR: Invalid node name from pacemaker: '${CRM_alert_node}'" - exit 0 - fi - logger -t k8s-untaint-alert "Node ${CRM_alert_node} rejoined cluster, triggering untaint service" + logger -t tnf-untaint-alert "Node ${CRM_alert_node} rejoined cluster, triggering untaint service" if ! sudo systemctl start --no-block "untaint-node@${CRM_alert_node}.service"; then - logger -t k8s-untaint-alert "ERROR: Failed to start untaint-node@${CRM_alert_node}.service" + logger -t tnf-untaint-alert "ERROR: Failed to start untaint-node@${CRM_alert_node}.service" exit 1 fi fi diff --git a/templates/master/00-master/two-node-with-fencing/files/untaint-fenced-node.yaml b/templates/master/00-master/two-node-with-fencing/files/untaint-fenced-node.yaml index f1d8991db1..1cc19a7565 100644 --- a/templates/master/00-master/two-node-with-fencing/files/untaint-fenced-node.yaml +++ b/templates/master/00-master/two-node-with-fencing/files/untaint-fenced-node.yaml @@ -11,19 +11,18 @@ contents: ANNOTATION_VALUE="pacemaker" OC="oc --kubeconfig=$KUBECONFIG" - if [[ ! "$REJOINED_NODE" =~ ^[a-z0-9]([a-z0-9.-]*[a-z0-9])?$ ]]; then - logger -t untaint-fenced-node "ERROR: Invalid or empty node name: '${REJOINED_NODE:-}'" + if [[ -z "${REJOINED_NODE:-}" ]]; then + logger -t untaint-fenced-node "ERROR: Node name is empty or not set" exit 1 fi logger -t untaint-fenced-node "Node ${REJOINED_NODE} rejoined cluster, checking if untaint is needed" - # Phase 1: wait for pacemaker to report the node as Online. + # Wait for pacemaker to report the node as Online. # The alert fires on corosync membership ("member"), but pacemaker Online - # confirms the full cluster join protocol succeeded — the node has probed - # its resources and registered with the DC. This prevents untainting a node + # confirms the full cluster join succeeded. This prevents untainting a node # that joined corosync but may still fail pacemaker startup and get re-fenced. - MAX_PCMK_WAIT=120 + MAX_PCMK_WAIT=180 PCMK_INTERVAL=5 PCMK_ONLINE=false for i in $(seq 1 $((MAX_PCMK_WAIT / PCMK_INTERVAL))); do @@ -42,15 +41,15 @@ contents: logger -t untaint-fenced-node "Pacemaker reports ${REJOINED_NODE} as Online" - # Phase 2: check if we applied the taint (annotation guard) + # Check if the taint was applied by pacemaker APPLIED_BY=$($OC get node "$REJOINED_NODE" -o jsonpath="{.metadata.annotations['${ANNOTATION_KEY}']}" 2>/dev/null) || true if [ "$APPLIED_BY" != "$ANNOTATION_VALUE" ]; then logger -t untaint-fenced-node "Node ${REJOINED_NODE} has no pacemaker out-of-service annotation - nothing to do" exit 0 fi - # Phase 3: wait for the node to become Ready (kubelet is running) - MAX_READY_WAIT=60 + # Wait for the node to become Ready (kubelet is running) + MAX_READY_WAIT=120 READY_INTERVAL=5 for i in $(seq 1 $((MAX_READY_WAIT / READY_INTERVAL))); do NODE_READY=$($OC get node "$REJOINED_NODE" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null) || true @@ -67,9 +66,8 @@ contents: logger -t untaint-fenced-node "Removing out-of-service taint and annotation from ${REJOINED_NODE}" - # Phase 4: remove taint and annotation with retries. - # Operations are independent — if taint was already removed (partial prior run), - # we still proceed to remove the annotation. + # Remove taint and annotation with retries. + # If taint was already removed, we still proceed to remove the annotation. MAX_RETRIES=12 RETRY_INTERVAL=5 for attempt in $(seq 1 $MAX_RETRIES); do @@ -83,9 +81,17 @@ contents: $OC adm taint node "$REJOINED_NODE" "${TAINT_KEY}-" 2>/dev/null || true $OC annotate node "$REJOINED_NODE" "${ANNOTATION_KEY}-" 2>/dev/null || true - # Verify both are actually gone - REMAINING_TAINT=$($OC get node "$REJOINED_NODE" -o jsonpath="{.spec.taints[?(@.key=='${TAINT_KEY}')].key}" 2>/dev/null) || true - REMAINING_ANNOT=$($OC get node "$REJOINED_NODE" -o jsonpath="{.metadata.annotations['${ANNOTATION_KEY}']}" 2>/dev/null) || true + # Verify both are actually gone - treat oc failures as retryable + if ! REMAINING_TAINT=$($OC get node "$REJOINED_NODE" -o jsonpath="{.spec.taints[?(@.key=='${TAINT_KEY}')].key}"); then + logger -t untaint-fenced-node "Attempt ${attempt}/${MAX_RETRIES}: oc get failed during taint verification - retrying in ${RETRY_INTERVAL}s" + sleep $RETRY_INTERVAL + continue + fi + if ! REMAINING_ANNOT=$($OC get node "$REJOINED_NODE" -o jsonpath="{.metadata.annotations['${ANNOTATION_KEY}']}"); then + logger -t untaint-fenced-node "Attempt ${attempt}/${MAX_RETRIES}: oc get failed during annotation verification - retrying in ${RETRY_INTERVAL}s" + sleep $RETRY_INTERVAL + continue + fi if [ -z "$REMAINING_TAINT" ] && [ -z "$REMAINING_ANNOT" ]; then logger -t untaint-fenced-node "Successfully untainted and removed annotation from ${REJOINED_NODE}" exit 0 From f320d37e7ccdfbcc15091ac14e652d13854087be Mon Sep 17 00:00:00 2001 From: Vincenzo Mauro Date: Fri, 29 May 2026 16:42:11 +0200 Subject: [PATCH 6/8] fixed kubelet checks --- .../files/tnf-untaint-alert.yaml | 20 ++++++++-- .../files/untaint-fenced-node.yaml | 37 ++++++++++--------- 2 files changed, 36 insertions(+), 21 deletions(-) diff --git a/templates/master/00-master/two-node-with-fencing/files/tnf-untaint-alert.yaml b/templates/master/00-master/two-node-with-fencing/files/tnf-untaint-alert.yaml index 892ba29449..6169a1f8c4 100644 --- a/templates/master/00-master/two-node-with-fencing/files/tnf-untaint-alert.yaml +++ b/templates/master/00-master/two-node-with-fencing/files/tnf-untaint-alert.yaml @@ -4,10 +4,24 @@ contents: inline: | #!/bin/bash # Pacemaker alert agent for node-rejoin events. - # CRM_alert_kind=node + CRM_alert_desc=member identifies a node - # that has rejoined the cluster after being fenced. + # Registered with: select_nodes + select_attributes + # + # Triggers on two event types: + # 1. CRM_alert_kind=node + CRM_alert_desc=member → corosync membership (normal rejoin after fence) + # 2. CRM_alert_kind=attribute + standby=off → pacemaker standby→online transition + TRIGGER=false + if [ "$CRM_alert_kind" = "node" ] && [ "$CRM_alert_desc" = "member" ] && [ -n "$CRM_alert_node" ]; then - logger -t tnf-untaint-alert "Node ${CRM_alert_node} rejoined cluster, triggering untaint service" + logger -t tnf-untaint-alert "Node ${CRM_alert_node} rejoined cluster (membership), triggering untaint service" + TRIGGER=true + fi + + if [ "$CRM_alert_kind" = "attribute" ] && [ "$CRM_alert_attribute_name" = "standby" ] && [ "$CRM_alert_attribute_value" = "off" ] && [ -n "$CRM_alert_node" ]; then + logger -t tnf-untaint-alert "Node ${CRM_alert_node} left standby, triggering untaint service" + TRIGGER=true + fi + + if [ "$TRIGGER" = "true" ]; then if ! sudo systemctl start --no-block "untaint-node@${CRM_alert_node}.service"; then logger -t tnf-untaint-alert "ERROR: Failed to start untaint-node@${CRM_alert_node}.service" exit 1 diff --git a/templates/master/00-master/two-node-with-fencing/files/untaint-fenced-node.yaml b/templates/master/00-master/two-node-with-fencing/files/untaint-fenced-node.yaml index 1cc19a7565..e7676238e7 100644 --- a/templates/master/00-master/two-node-with-fencing/files/untaint-fenced-node.yaml +++ b/templates/master/00-master/two-node-with-fencing/files/untaint-fenced-node.yaml @@ -41,18 +41,15 @@ contents: logger -t untaint-fenced-node "Pacemaker reports ${REJOINED_NODE} as Online" - # Check if the taint was applied by pacemaker - APPLIED_BY=$($OC get node "$REJOINED_NODE" -o jsonpath="{.metadata.annotations['${ANNOTATION_KEY}']}" 2>/dev/null) || true - if [ "$APPLIED_BY" != "$ANNOTATION_VALUE" ]; then - logger -t untaint-fenced-node "Node ${REJOINED_NODE} has no pacemaker out-of-service annotation - nothing to do" - exit 0 - fi - - # Wait for the node to become Ready (kubelet is running) + # Wait for the rejoined node's kubelet to become Ready. + # This must happen before the annotation check: the taint script may still + # be retrying against the API while etcd recovers quorum, and kubelet Ready + # is a natural signal that the cluster is healthy enough + # for the taint write to have completed. MAX_READY_WAIT=120 READY_INTERVAL=5 for i in $(seq 1 $((MAX_READY_WAIT / READY_INTERVAL))); do - NODE_READY=$($OC get node "$REJOINED_NODE" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null) || true + NODE_READY=$($OC get node "$REJOINED_NODE" -o json 2>/dev/null | jq -r '.status.conditions[] | select(.type=="Ready") | .status // empty') || true if [ "$NODE_READY" = "True" ]; then break fi @@ -64,6 +61,13 @@ contents: logger -t untaint-fenced-node "WARNING: Node ${REJOINED_NODE} not Ready after ${MAX_READY_WAIT}s - proceeding with untaint anyway" fi + # Check if the taint was applied by pacemaker + APPLIED_BY=$($OC get node "$REJOINED_NODE" -o json 2>/dev/null | jq -r --arg key "$ANNOTATION_KEY" '.metadata.annotations[$key] // empty') || true + if [ "$APPLIED_BY" != "$ANNOTATION_VALUE" ]; then + logger -t untaint-fenced-node "Node ${REJOINED_NODE} has no pacemaker out-of-service annotation - nothing to do" + exit 0 + fi + logger -t untaint-fenced-node "Removing out-of-service taint and annotation from ${REJOINED_NODE}" # Remove taint and annotation with retries. @@ -72,7 +76,7 @@ contents: RETRY_INTERVAL=5 for attempt in $(seq 1 $MAX_RETRIES); do # Re-check annotation in case another process already cleaned up - APPLIED_BY=$($OC get node "$REJOINED_NODE" -o jsonpath="{.metadata.annotations['${ANNOTATION_KEY}']}" 2>/dev/null) || true + APPLIED_BY=$($OC get node "$REJOINED_NODE" -o json 2>/dev/null | jq -r --arg key "$ANNOTATION_KEY" '.metadata.annotations[$key] // empty') || true if [ "$APPLIED_BY" != "$ANNOTATION_VALUE" ]; then logger -t untaint-fenced-node "Annotation already removed from ${REJOINED_NODE} - done" exit 0 @@ -82,16 +86,13 @@ contents: $OC annotate node "$REJOINED_NODE" "${ANNOTATION_KEY}-" 2>/dev/null || true # Verify both are actually gone - treat oc failures as retryable - if ! REMAINING_TAINT=$($OC get node "$REJOINED_NODE" -o jsonpath="{.spec.taints[?(@.key=='${TAINT_KEY}')].key}"); then - logger -t untaint-fenced-node "Attempt ${attempt}/${MAX_RETRIES}: oc get failed during taint verification - retrying in ${RETRY_INTERVAL}s" + NODE_JSON=$($OC get node "$REJOINED_NODE" -o json 2>/dev/null) || { + logger -t untaint-fenced-node "Attempt ${attempt}/${MAX_RETRIES}: oc get failed during verification - retrying in ${RETRY_INTERVAL}s" sleep $RETRY_INTERVAL continue - fi - if ! REMAINING_ANNOT=$($OC get node "$REJOINED_NODE" -o jsonpath="{.metadata.annotations['${ANNOTATION_KEY}']}"); then - logger -t untaint-fenced-node "Attempt ${attempt}/${MAX_RETRIES}: oc get failed during annotation verification - retrying in ${RETRY_INTERVAL}s" - sleep $RETRY_INTERVAL - continue - fi + } + REMAINING_TAINT=$(echo "$NODE_JSON" | jq -r --arg key "$TAINT_KEY" '.spec.taints // [] | map(select(.key == $key)) | .[0].key // empty') + REMAINING_ANNOT=$(echo "$NODE_JSON" | jq -r --arg key "$ANNOTATION_KEY" '.metadata.annotations[$key] // empty') if [ -z "$REMAINING_TAINT" ] && [ -z "$REMAINING_ANNOT" ]; then logger -t untaint-fenced-node "Successfully untainted and removed annotation from ${REJOINED_NODE}" exit 0 From e2c34e9927e21fb0b190e4e64d2390a37e551445 Mon Sep 17 00:00:00 2001 From: Vincenzo Mauro Date: Thu, 4 Jun 2026 14:36:10 +0200 Subject: [PATCH 7/8] fix: added volumeattachments wait before untainting --- .../files/untaint-fenced-node.yaml | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/templates/master/00-master/two-node-with-fencing/files/untaint-fenced-node.yaml b/templates/master/00-master/two-node-with-fencing/files/untaint-fenced-node.yaml index e7676238e7..2c32f052d8 100644 --- a/templates/master/00-master/two-node-with-fencing/files/untaint-fenced-node.yaml +++ b/templates/master/00-master/two-node-with-fencing/files/untaint-fenced-node.yaml @@ -61,6 +61,38 @@ contents: logger -t untaint-fenced-node "WARNING: Node ${REJOINED_NODE} not Ready after ${MAX_READY_WAIT}s - proceeding with untaint anyway" fi + # Wait for VolumeAttachments to drain from the fenced node. + # The out-of-service taint triggers immediate pod eviction and volume + # detachment. We must wait for this to complete before removing the + # taint, otherwise pods remain on the rebooted node and wait for the + # local storage stack to recover instead of failing over to the + # surviving node where volumes can attach immediately. + MAX_VA_WAIT=300 + VA_INTERVAL=5 + VA_COUNT=$($OC get volumeattachments -o json 2>/dev/null \ + | jq --arg node "$REJOINED_NODE" '[.items[] | select(.spec.nodeName == $node)] | length') || VA_COUNT="0" + + if [ "$VA_COUNT" -gt 0 ]; then + logger -t untaint-fenced-node "Waiting for ${VA_COUNT} VolumeAttachment(s) to detach from ${REJOINED_NODE}" + for i in $(seq 1 $((MAX_VA_WAIT / VA_INTERVAL))); do + VA_COUNT=$($OC get volumeattachments -o json 2>/dev/null \ + | jq --arg node "$REJOINED_NODE" '[.items[] | select(.spec.nodeName == $node)] | length') || VA_COUNT="-1" + if [ "$VA_COUNT" = "0" ]; then + logger -t untaint-fenced-node "All VolumeAttachments detached from ${REJOINED_NODE}" + break + fi + if [ "$VA_COUNT" = "-1" ]; then + logger -t untaint-fenced-node "API unavailable checking VolumeAttachments (attempt ${i})" + else + logger -t untaint-fenced-node "Waiting for ${VA_COUNT} VolumeAttachment(s) to detach from ${REJOINED_NODE} (attempt ${i})" + fi + sleep $VA_INTERVAL + done + if [ "$VA_COUNT" != "0" ]; then + logger -t untaint-fenced-node "WARNING: ${VA_COUNT} VolumeAttachment(s) still on ${REJOINED_NODE} after ${MAX_VA_WAIT}s - proceeding with untaint" + fi + fi + # Check if the taint was applied by pacemaker APPLIED_BY=$($OC get node "$REJOINED_NODE" -o json 2>/dev/null | jq -r --arg key "$ANNOTATION_KEY" '.metadata.annotations[$key] // empty') || true if [ "$APPLIED_BY" != "$ANNOTATION_VALUE" ]; then From aca50bababf0bdeec37bbe5947fee939fa149cc7 Mon Sep 17 00:00:00 2001 From: Vincenzo Mauro Date: Fri, 5 Jun 2026 16:10:15 +0200 Subject: [PATCH 8/8] fix: harden error handling in taint/untaint scripts from CR feedback Co-Authored-By: Claude Opus 4.6 --- .../files/taint-fenced-node.yaml | 6 +++--- .../files/tnf-taint-alert.yaml | 5 ++++- .../files/tnf-untaint-alert.yaml | 17 ++--------------- .../files/untaint-fenced-node.yaml | 15 ++++++++++++--- 4 files changed, 21 insertions(+), 22 deletions(-) diff --git a/templates/master/00-master/two-node-with-fencing/files/taint-fenced-node.yaml b/templates/master/00-master/two-node-with-fencing/files/taint-fenced-node.yaml index 15136bcc40..94189783f1 100644 --- a/templates/master/00-master/two-node-with-fencing/files/taint-fenced-node.yaml +++ b/templates/master/00-master/two-node-with-fencing/files/taint-fenced-node.yaml @@ -28,12 +28,12 @@ contents: MAX_RETRIES=120 RETRY_INTERVAL=5 for attempt in $(seq 1 $MAX_RETRIES); do - if $OC patch node "$FENCED_NODE" --type=strategic -p "$PATCH" 2>/dev/null; then + OC_ERR=$($OC patch node "$FENCED_NODE" --type=strategic -p "$PATCH" 2>&1) && { logger -t taint-fenced-node "Successfully tainted and annotated ${FENCED_NODE}" exit 0 - fi + } - logger -t taint-fenced-node "Attempt ${attempt}/${MAX_RETRIES} failed, retrying in ${RETRY_INTERVAL}s" + logger -t taint-fenced-node "Attempt ${attempt}/${MAX_RETRIES} failed: ${OC_ERR}, retrying in ${RETRY_INTERVAL}s" sleep $RETRY_INTERVAL done diff --git a/templates/master/00-master/two-node-with-fencing/files/tnf-taint-alert.yaml b/templates/master/00-master/two-node-with-fencing/files/tnf-taint-alert.yaml index 0e03d01162..1285e24c8d 100644 --- a/templates/master/00-master/two-node-with-fencing/files/tnf-taint-alert.yaml +++ b/templates/master/00-master/two-node-with-fencing/files/tnf-taint-alert.yaml @@ -9,6 +9,9 @@ contents: # CRM_alert_kind=fencing, so they are already excluded. if [ "$CRM_alert_kind" = "fencing" ] && [ "$CRM_alert_rc" = "0" ] && [ -n "$CRM_alert_node" ]; then logger -t tnf-taint-alert "Fencing succeeded for ${CRM_alert_node}, triggering taint service" - sudo systemctl start --no-block "taint-node@${CRM_alert_node}.service" + if ! sudo systemctl start --no-block "taint-node@${CRM_alert_node}.service"; then + logger -t tnf-taint-alert "ERROR: Failed to start taint-node@${CRM_alert_node}.service" + exit 1 + fi fi exit 0 diff --git a/templates/master/00-master/two-node-with-fencing/files/tnf-untaint-alert.yaml b/templates/master/00-master/two-node-with-fencing/files/tnf-untaint-alert.yaml index 6169a1f8c4..4f10b10be0 100644 --- a/templates/master/00-master/two-node-with-fencing/files/tnf-untaint-alert.yaml +++ b/templates/master/00-master/two-node-with-fencing/files/tnf-untaint-alert.yaml @@ -4,24 +4,11 @@ contents: inline: | #!/bin/bash # Pacemaker alert agent for node-rejoin events. - # Registered with: select_nodes + select_attributes + # Registered with: select_nodes # - # Triggers on two event types: - # 1. CRM_alert_kind=node + CRM_alert_desc=member → corosync membership (normal rejoin after fence) - # 2. CRM_alert_kind=attribute + standby=off → pacemaker standby→online transition - TRIGGER=false - + # Triggers on: CRM_alert_kind=node + CRM_alert_desc=member → corosync membership (rejoin after fence) if [ "$CRM_alert_kind" = "node" ] && [ "$CRM_alert_desc" = "member" ] && [ -n "$CRM_alert_node" ]; then logger -t tnf-untaint-alert "Node ${CRM_alert_node} rejoined cluster (membership), triggering untaint service" - TRIGGER=true - fi - - if [ "$CRM_alert_kind" = "attribute" ] && [ "$CRM_alert_attribute_name" = "standby" ] && [ "$CRM_alert_attribute_value" = "off" ] && [ -n "$CRM_alert_node" ]; then - logger -t tnf-untaint-alert "Node ${CRM_alert_node} left standby, triggering untaint service" - TRIGGER=true - fi - - if [ "$TRIGGER" = "true" ]; then if ! sudo systemctl start --no-block "untaint-node@${CRM_alert_node}.service"; then logger -t tnf-untaint-alert "ERROR: Failed to start untaint-node@${CRM_alert_node}.service" exit 1 diff --git a/templates/master/00-master/two-node-with-fencing/files/untaint-fenced-node.yaml b/templates/master/00-master/two-node-with-fencing/files/untaint-fenced-node.yaml index 2c32f052d8..528f5d7e7e 100644 --- a/templates/master/00-master/two-node-with-fencing/files/untaint-fenced-node.yaml +++ b/templates/master/00-master/two-node-with-fencing/files/untaint-fenced-node.yaml @@ -26,7 +26,7 @@ contents: PCMK_INTERVAL=5 PCMK_ONLINE=false for i in $(seq 1 $((MAX_PCMK_WAIT / PCMK_INTERVAL))); do - if crm_mon -1 2>/dev/null | grep "Online:" | grep -qw "$REJOINED_NODE"; then + if crm_mon -1 2>/dev/null | grep "Online:" | grep -cw "$REJOINED_NODE" >/dev/null; then PCMK_ONLINE=true break fi @@ -94,7 +94,11 @@ contents: fi # Check if the taint was applied by pacemaker - APPLIED_BY=$($OC get node "$REJOINED_NODE" -o json 2>/dev/null | jq -r --arg key "$ANNOTATION_KEY" '.metadata.annotations[$key] // empty') || true + NODE_JSON=$($OC get node "$REJOINED_NODE" -o json 2>/dev/null) || { + logger -t untaint-fenced-node "ERROR: Cannot reach API to check annotation - aborting" + exit 1 + } + APPLIED_BY=$(echo "$NODE_JSON" | jq -r --arg key "$ANNOTATION_KEY" '.metadata.annotations[$key] // empty') if [ "$APPLIED_BY" != "$ANNOTATION_VALUE" ]; then logger -t untaint-fenced-node "Node ${REJOINED_NODE} has no pacemaker out-of-service annotation - nothing to do" exit 0 @@ -108,7 +112,12 @@ contents: RETRY_INTERVAL=5 for attempt in $(seq 1 $MAX_RETRIES); do # Re-check annotation in case another process already cleaned up - APPLIED_BY=$($OC get node "$REJOINED_NODE" -o json 2>/dev/null | jq -r --arg key "$ANNOTATION_KEY" '.metadata.annotations[$key] // empty') || true + NODE_JSON=$($OC get node "$REJOINED_NODE" -o json 2>/dev/null) || { + logger -t untaint-fenced-node "Attempt ${attempt}/${MAX_RETRIES}: oc get failed during annotation re-check - retrying in ${RETRY_INTERVAL}s" + sleep $RETRY_INTERVAL + continue + } + APPLIED_BY=$(echo "$NODE_JSON" | jq -r --arg key "$ANNOTATION_KEY" '.metadata.annotations[$key] // empty') if [ "$APPLIED_BY" != "$ANNOTATION_VALUE" ]; then logger -t untaint-fenced-node "Annotation already removed from ${REJOINED_NODE} - done" exit 0