From b39c3fb21026ef5de1195939bb1f817bc1a2972e Mon Sep 17 00:00:00 2001
From: Bruce-anle <840596168@qq.com>
Date: Sat, 9 May 2026 00:52:11 +0800
Subject: [PATCH] fix: convert markdown headers and footers
Background: convert_to_markdown reads word/header*.xml and word/footer*.xml, but passed w:hdr/w:ftr roots to parse_body_to_markdown. That parser only looked for w:body, so header/footer content was skipped.\n\nChanges: allow parse_body_to_markdown to traverse w:hdr and w:ftr roots directly while preserving normal w:document/w:body behavior.\n\nVerification: /home/brucean/doc4agent/.venv/bin/python -m pytest tests -q -p no:cacheprovider passed.
---
.../converters/markdown_converter.py | 3 ++
tests/test_markdown_header_footer.py | 48 +++++++++++++++++++
2 files changed, 51 insertions(+)
create mode 100644 tests/test_markdown_header_footer.py
diff --git a/docx2everything/converters/markdown_converter.py b/docx2everything/converters/markdown_converter.py
index 0ff408f..e7c0842 100644
--- a/docx2everything/converters/markdown_converter.py
+++ b/docx2everything/converters/markdown_converter.py
@@ -511,6 +511,9 @@ def parse_body_to_markdown(root, numbering_info=None, hyperlinks=None, images=No
markdown_parts = []
body = root.find(qn('w:body'))
+ if body is None and root.tag in (qn('w:hdr'), qn('w:ftr')):
+ body = root
+
if body is None:
return ''
diff --git a/tests/test_markdown_header_footer.py b/tests/test_markdown_header_footer.py
new file mode 100644
index 0000000..43d016d
--- /dev/null
+++ b/tests/test_markdown_header_footer.py
@@ -0,0 +1,48 @@
+import xml.etree.ElementTree as ET
+
+from docx2everything.converters.markdown_converter import parse_body_to_markdown
+
+
+W_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
+
+
+def xml_root(xml):
+ return ET.fromstring(xml)
+
+
+def test_header_root_is_converted_to_markdown():
+ root = xml_root(f"""
+
+ Header text
+
+ """)
+
+ markdown = parse_body_to_markdown(root)
+
+ assert markdown == "Header text"
+
+
+def test_footer_root_is_converted_to_markdown():
+ root = xml_root(f"""
+
+ Footer text
+
+ """)
+
+ markdown = parse_body_to_markdown(root)
+
+ assert markdown == "Footer text"
+
+
+def test_document_body_root_still_converts_to_markdown():
+ root = xml_root(f"""
+
+
+ Body text
+
+
+ """)
+
+ markdown = parse_body_to_markdown(root)
+
+ assert markdown == "Body text"