Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions docx2everything/converters/markdown_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,22 @@ def get_heading_level(pStyle_val, styles_info=None):
return None


def is_on_off_enabled(elem):
"""
Returns the effective value of an OpenXML CT_OnOff element.

Missing w:val defaults to true; explicit 0/false/off disables it.
"""
if elem is None:
return False

val = elem.get(qn('w:val'))
if val is None:
return True

return val.strip().lower() not in ('0', 'false', 'off')


def parse_run_to_markdown(r_elem, hyperlinks=None, images=None, img_dir=None, zipf=None, link_url=None, footnotes=None, endnotes=None):
"""
Converts a text run (<w:r>) to markdown with formatting.
Expand Down Expand Up @@ -161,8 +177,7 @@ def parse_paragraph_to_markdown(p_elem, numbering_info=None, hyperlinks=None, im
has_page_break = False
if pPr is not None:
page_break_before = pPr.find(qn('w:pageBreakBefore'))
if page_break_before is not None:
has_page_break = True
has_page_break = is_on_off_enabled(page_break_before)

# Check for section break
has_section_break = False
Expand Down
57 changes: 57 additions & 0 deletions tests/test_markdown_pagebreak.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import xml.etree.ElementTree as ET

from docx2everything.converters.markdown_converter import parse_paragraph_to_markdown


W_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"


def paragraph_with_page_break_before(value_marker):
value_attr = "" if value_marker is None else f' w:val="{value_marker}"'
xml = f"""
<w:p xmlns:w="{W_NS}">
<w:pPr>
<w:pageBreakBefore{value_attr}/>
</w:pPr>
<w:r><w:t>Body text</w:t></w:r>
</w:p>
"""
return ET.fromstring(xml)


def test_page_break_before_val_zero_is_disabled():
paragraph = paragraph_with_page_break_before("0")

markdown = parse_paragraph_to_markdown(paragraph)

assert "<!-- Page Break -->" not in markdown
assert markdown == "Body text"


def test_page_break_before_without_val_is_enabled():
paragraph = paragraph_with_page_break_before(None)

markdown = parse_paragraph_to_markdown(paragraph)

assert markdown.startswith("<!-- Page Break -->")
assert markdown.endswith("Body text")


def test_page_break_before_true_values_are_enabled():
for value in ("1", "true", "on"):
paragraph = paragraph_with_page_break_before(value)

markdown = parse_paragraph_to_markdown(paragraph)

assert markdown.startswith("<!-- Page Break -->")
assert markdown.endswith("Body text")


def test_page_break_before_false_values_are_disabled():
for value in ("false", "off"):
paragraph = paragraph_with_page_break_before(value)

markdown = parse_paragraph_to_markdown(paragraph)

assert "<!-- Page Break -->" not in markdown
assert markdown == "Body text"