diff --git a/docx2everything/converters/markdown_converter.py b/docx2everything/converters/markdown_converter.py index 0ff408f..f32470a 100644 --- a/docx2everything/converters/markdown_converter.py +++ b/docx2everything/converters/markdown_converter.py @@ -65,6 +65,22 @@ def get_heading_level(pStyle_val, styles_info=None): return None +def is_on_off_enabled(elem): + """ + Returns the effective value of an OpenXML CT_OnOff element. + + Missing w:val defaults to true; explicit 0/false/off disables it. + """ + if elem is None: + return False + + val = elem.get(qn('w:val')) + if val is None: + return True + + return val.strip().lower() not in ('0', 'false', 'off') + + def parse_run_to_markdown(r_elem, hyperlinks=None, images=None, img_dir=None, zipf=None, link_url=None, footnotes=None, endnotes=None): """ Converts a text run () to markdown with formatting. @@ -161,8 +177,7 @@ def parse_paragraph_to_markdown(p_elem, numbering_info=None, hyperlinks=None, im has_page_break = False if pPr is not None: page_break_before = pPr.find(qn('w:pageBreakBefore')) - if page_break_before is not None: - has_page_break = True + has_page_break = is_on_off_enabled(page_break_before) # Check for section break has_section_break = False diff --git a/tests/test_markdown_pagebreak.py b/tests/test_markdown_pagebreak.py new file mode 100644 index 0000000..bd11df9 --- /dev/null +++ b/tests/test_markdown_pagebreak.py @@ -0,0 +1,57 @@ +import xml.etree.ElementTree as ET + +from docx2everything.converters.markdown_converter import parse_paragraph_to_markdown + + +W_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main" + + +def paragraph_with_page_break_before(value_marker): + value_attr = "" if value_marker is None else f' w:val="{value_marker}"' + xml = f""" + + + + + Body text + + """ + return ET.fromstring(xml) + + +def test_page_break_before_val_zero_is_disabled(): + paragraph = paragraph_with_page_break_before("0") + + markdown = parse_paragraph_to_markdown(paragraph) + + assert "" not in markdown + assert markdown == "Body text" + + +def test_page_break_before_without_val_is_enabled(): + paragraph = paragraph_with_page_break_before(None) + + markdown = parse_paragraph_to_markdown(paragraph) + + assert markdown.startswith("") + assert markdown.endswith("Body text") + + +def test_page_break_before_true_values_are_enabled(): + for value in ("1", "true", "on"): + paragraph = paragraph_with_page_break_before(value) + + markdown = parse_paragraph_to_markdown(paragraph) + + assert markdown.startswith("") + assert markdown.endswith("Body text") + + +def test_page_break_before_false_values_are_disabled(): + for value in ("false", "off"): + paragraph = paragraph_with_page_break_before(value) + + markdown = parse_paragraph_to_markdown(paragraph) + + assert "" not in markdown + assert markdown == "Body text"