From b8e26859997ca68f59cbc9d85c494e4864b45fa4 Mon Sep 17 00:00:00 2001
From: Bruce-anle <840596168@qq.com>
Date: Sat, 9 May 2026 00:30:14 +0800
Subject: [PATCH] fix: detect paragraph property section breaks
Background: DOCX section properties are normally stored under w:pPr/w:sectPr. The markdown converter only checked for direct w:p/w:sectPr children, so normal section breaks were missed.\n\nChanges: check both direct and paragraph-property section breaks, with tests for both forms.\n\nVerification: /home/brucean/doc4agent/.venv/bin/python -m pytest tests -q -p no:cacheprovider passed.
---
.../converters/markdown_converter.py | 3 +-
tests/test_markdown_section_break.py | 46 +++++++++++++++++++
2 files changed, 48 insertions(+), 1 deletion(-)
create mode 100644 tests/test_markdown_section_break.py
diff --git a/docx2everything/converters/markdown_converter.py b/docx2everything/converters/markdown_converter.py
index 0ff408f..475f714 100644
--- a/docx2everything/converters/markdown_converter.py
+++ b/docx2everything/converters/markdown_converter.py
@@ -167,7 +167,8 @@ def parse_paragraph_to_markdown(p_elem, numbering_info=None, hyperlinks=None, im
# Check for section break
has_section_break = False
sectPr = p_elem.find(qn('w:sectPr'))
- if sectPr is not None:
+ nested_sectPr = pPr.find(qn('w:sectPr')) if pPr is not None else None
+ if sectPr is not None or nested_sectPr is not None:
has_section_break = True
# Check for heading
diff --git a/tests/test_markdown_section_break.py b/tests/test_markdown_section_break.py
new file mode 100644
index 0000000..ce71a6e
--- /dev/null
+++ b/tests/test_markdown_section_break.py
@@ -0,0 +1,46 @@
+import xml.etree.ElementTree as ET
+
+from docx2everything.converters.markdown_converter import parse_paragraph_to_markdown
+
+
+W_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
+
+
+def paragraph_with_nested_section_break():
+ xml = f"""
+
+
+
+
+ Body text
+
+ """
+ return ET.fromstring(xml)
+
+
+def paragraph_with_direct_section_break():
+ xml = f"""
+
+
+ Body text
+
+ """
+ return ET.fromstring(xml)
+
+
+def test_nested_section_break_outputs_section_break_marker():
+ paragraph = paragraph_with_nested_section_break()
+
+ markdown = parse_paragraph_to_markdown(paragraph)
+
+ assert markdown.startswith("")
+ assert markdown.endswith("Body text")
+
+
+def test_direct_section_break_still_outputs_section_break_marker():
+ paragraph = paragraph_with_direct_section_break()
+
+ markdown = parse_paragraph_to_markdown(paragraph)
+
+ assert markdown.startswith("")
+ assert markdown.endswith("Body text")