From b39c3fb21026ef5de1195939bb1f817bc1a2972e Mon Sep 17 00:00:00 2001 From: Bruce-anle <840596168@qq.com> Date: Sat, 9 May 2026 00:52:11 +0800 Subject: [PATCH] fix: convert markdown headers and footers Background: convert_to_markdown reads word/header*.xml and word/footer*.xml, but passed w:hdr/w:ftr roots to parse_body_to_markdown. That parser only looked for w:body, so header/footer content was skipped.\n\nChanges: allow parse_body_to_markdown to traverse w:hdr and w:ftr roots directly while preserving normal w:document/w:body behavior.\n\nVerification: /home/brucean/doc4agent/.venv/bin/python -m pytest tests -q -p no:cacheprovider passed. --- .../converters/markdown_converter.py | 3 ++ tests/test_markdown_header_footer.py | 48 +++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 tests/test_markdown_header_footer.py diff --git a/docx2everything/converters/markdown_converter.py b/docx2everything/converters/markdown_converter.py index 0ff408f..e7c0842 100644 --- a/docx2everything/converters/markdown_converter.py +++ b/docx2everything/converters/markdown_converter.py @@ -511,6 +511,9 @@ def parse_body_to_markdown(root, numbering_info=None, hyperlinks=None, images=No markdown_parts = [] body = root.find(qn('w:body')) + if body is None and root.tag in (qn('w:hdr'), qn('w:ftr')): + body = root + if body is None: return '' diff --git a/tests/test_markdown_header_footer.py b/tests/test_markdown_header_footer.py new file mode 100644 index 0000000..43d016d --- /dev/null +++ b/tests/test_markdown_header_footer.py @@ -0,0 +1,48 @@ +import xml.etree.ElementTree as ET + +from docx2everything.converters.markdown_converter import parse_body_to_markdown + + +W_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main" + + +def xml_root(xml): + return ET.fromstring(xml) + + +def test_header_root_is_converted_to_markdown(): + root = xml_root(f""" + + Header text + + """) + + markdown = parse_body_to_markdown(root) + + assert markdown == "Header text" + + +def test_footer_root_is_converted_to_markdown(): + root = xml_root(f""" + + Footer text + + """) + + markdown = parse_body_to_markdown(root) + + assert markdown == "Footer text" + + +def test_document_body_root_still_converts_to_markdown(): + root = xml_root(f""" + + + Body text + + + """) + + markdown = parse_body_to_markdown(root) + + assert markdown == "Body text"