From 1ace763ba8397b70869fc80c0c6699de6455d338 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 15 Jul 2024 11:40:30 +0200 Subject: [PATCH 1/4] public_inbox: Fix type hints Some type hints were set to return a given type, while the function was actually returning that type, or a None object. Fix the type hints when relevant. Signed-off-by: Maxime Ripard --- did/plugins/public_inbox.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/did/plugins/public_inbox.py b/did/plugins/public_inbox.py index 063cbf46..f575d0fa 100644 --- a/did/plugins/public_inbox.py +++ b/did/plugins/public_inbox.py @@ -41,17 +41,17 @@ class Message(): def __init__(self, msg: mailbox.mboxMessage) -> None: self.msg = msg - def __msg_id(self, keyid: str) -> str: + def __msg_id(self, keyid: str) -> typing.Optional[str]: msgid = self.msg[keyid] if msgid is None: return None return msgid.lstrip("<").rstrip(">") - def id(self) -> str: + def id(self) -> typing.Optional[str]: return self.__msg_id("Message-Id") - def parent_id(self) -> str: + def parent_id(self) -> typing.Optional[str]: return self.__msg_id("In-Reply-To") def subject(self) -> str: From 7136edb7a818ca9ae4ac0681252e92b3073eb709 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 15 Jul 2024 11:46:29 +0200 Subject: [PATCH 2/4] public_inbox: Improve log on poorly constructed messages Some messages will not be perfect, and will have inconsistent or missing headers. We were mostly handling them fine, but didn't log anything when that was happening, making debugging issues fairly difficult. Let's improve the logging a bit. Signed-off-by: Maxime Ripard --- did/plugins/public_inbox.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/did/plugins/public_inbox.py b/did/plugins/public_inbox.py index f575d0fa..1878c280 100644 --- a/did/plugins/public_inbox.py +++ b/did/plugins/public_inbox.py @@ -44,6 +44,7 @@ def __init__(self, msg: mailbox.mboxMessage) -> None: def __msg_id(self, keyid: str) -> typing.Optional[str]: msgid = self.msg[keyid] if msgid is None: + log.debug("Missing header %s", keyid) return None return msgid.lstrip("<").rstrip(">") @@ -176,7 +177,9 @@ def __fetch_thread_root(self, initial_msg: Message) -> Message: if msg.is_thread_root(): log.debug("Found message %s thread root: %s.", msg_id, msg.id()) return msg + # if root is not found, return initial message as root. + log.warning("Couldn't find message root") return initial_msg def __get_thread_root(self, msg: Message) -> Message: From a7d8d19ec1ef37a89fee77b0e3d2c5d8eee23b5b Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 15 Jul 2024 11:49:27 +0200 Subject: [PATCH 3/4] public_inbox: Make __fetch_thread_root() return optional value Commit af2b8ac97122 ("Fix pylint R1710") made __fetch_thread_root() return the original message argument if no thread root could be found. However, and even though it wasn't really handled yet, the intent was always to return None in such a case. Restore the previous behaviour, but in an explicit way this time, and we'll handle it in the callers in the next commit. Signed-off-by: Maxime Ripard --- did/plugins/public_inbox.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/did/plugins/public_inbox.py b/did/plugins/public_inbox.py index 1878c280..8d96a58b 100644 --- a/did/plugins/public_inbox.py +++ b/did/plugins/public_inbox.py @@ -146,7 +146,7 @@ def __get_msgs_from_mbox(self, mbox: mailbox.mbox) -> list[Message]: return msgs - def __fetch_thread_root(self, initial_msg: Message) -> Message: + def __fetch_thread_root(self, initial_msg: Message) -> typing.Optional[Message]: msg_id = initial_msg.id() url = self.__get_url(f"/all/{msg_id}/t.mbox.gz") @@ -178,11 +178,10 @@ def __fetch_thread_root(self, initial_msg: Message) -> Message: log.debug("Found message %s thread root: %s.", msg_id, msg.id()) return msg - # if root is not found, return initial message as root. log.warning("Couldn't find message root") - return initial_msg + return None - def __get_thread_root(self, msg: Message) -> Message: + def __get_thread_root(self, msg: Message) -> typing.Optional[Message]: log.debug("Looking for thread root of message %s", msg.id()) if msg.is_thread_root(): log.debug("Message is thread root already. Returning.") From a30cda7a67ff8d097804e24c3608ab1b9c55f87d Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 15 Jul 2024 12:08:32 +0200 Subject: [PATCH 4/4] public_inbox: Handle missing parent is __get_thread_root() A message can have an In-Reply-To header but the message it points to might not be available for some reason. In such a case, the Message.parent_id() method will return a proper value, but __fetch_thread_root() on that message might not and would return None. Let's handle that case by returning the latest message we found as the root message. Signed-off-by: Maxime Ripard --- did/plugins/public_inbox.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/did/plugins/public_inbox.py b/did/plugins/public_inbox.py index 8d96a58b..f1749979 100644 --- a/did/plugins/public_inbox.py +++ b/did/plugins/public_inbox.py @@ -181,7 +181,7 @@ def __fetch_thread_root(self, initial_msg: Message) -> typing.Optional[Message]: log.warning("Couldn't find message root") return None - def __get_thread_root(self, msg: Message) -> typing.Optional[Message]: + def __get_thread_root(self, msg: Message) -> Message: log.debug("Looking for thread root of message %s", msg.id()) if msg.is_thread_root(): log.debug("Message is thread root already. Returning.") @@ -190,6 +190,10 @@ def __get_thread_root(self, msg: Message) -> typing.Optional[Message]: parent_id = msg.parent_id() if parent_id not in self.messages_cache: root = self.__fetch_thread_root(msg) + if root is None: + log.debug("Can't retrieve the thread root, returning.") + return msg + log.debug("Found root message %s for message %s", root.id(), msg.id()) return root @@ -203,7 +207,11 @@ def __get_thread_root(self, msg: Message) -> typing.Optional[Message]: parent_id = parent.parent_id() if parent_id not in self.messages_cache: - root = self.__fetch_thread_root(msg) + root = self.__fetch_thread_root(parent) + if root is None: + log.debug("Can't retrieve the message parent, returning.") + return parent + log.debug("Found root message %s for message %s", root.id(), msg.id()) return root