Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,7 @@ def primary_endpoint(self) -> str | None:
logger.debug("primary endpoint early exit: Peer relation not joined yet.")
return None
try:
self._patroni.log_raft_status()
primary = self._patroni.get_primary() or self._patroni.get_standby_leader()
primary_endpoint = self._patroni.get_member_ip(primary) if primary else None
# Force a retry if there is no primary or the member that was
Expand Down
13 changes: 13 additions & 0 deletions src/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -840,6 +840,7 @@ def has_raft_quorum(self) -> bool:
raft_host = "127.0.0.1:2222"
try:
raft_status = syncobj_util.executeCommand(raft_host, ["status"])
logger.debug("Local raft status: %s", raft_status)
except UtilityException:
logger.warning("Has raft quorum: Cannot connect to raft cluster")
return False
Expand Down Expand Up @@ -909,6 +910,16 @@ def get_running_cluster_members(self) -> list[str]:
except Exception:
return []

def log_raft_status(self) -> None:
"""Best-effort dump of local raft status for debugging."""
raft_host = "127.0.0.1:2222"
try:
syncobj_util = TcpUtility(password=self.raft_password, timeout=3)
raft_status = syncobj_util.executeCommand(raft_host, ["status"])
logger.debug("Local raft status: %s", raft_status)
except UtilityException:
logger.debug("Local raft status unavailable")

def remove_raft_member(self, member_ip: str) -> None:
"""Remove a member from the raft cluster.

Expand All @@ -930,12 +941,14 @@ def remove_raft_member(self, member_ip: str) -> None:
raft_host = "127.0.0.1:2222"
try:
raft_status = syncobj_util.executeCommand(raft_host, ["status"])
logger.debug("Local raft status: %s", raft_status)
except UtilityException:
logger.warning("Remove raft member: Cannot connect to raft cluster")
raise RemoveRaftMemberFailedError() from None

# Check whether the member is still part of the raft cluster.
if not member_ip or f"partner_node_status_server_{member_ip}:2222" not in raft_status:
logger.debug(f"Local raft member %s is missing in cluster", member_ip)
return

# If there's no quorum and the leader left raft cluster is stuck
Expand Down
Loading