diff --git a/docx2everything/converters/markdown_converter.py b/docx2everything/converters/markdown_converter.py index 0ff408f..d6d1f8b 100644 --- a/docx2everything/converters/markdown_converter.py +++ b/docx2everything/converters/markdown_converter.py @@ -65,6 +65,22 @@ def get_heading_level(pStyle_val, styles_info=None): return None +def is_on_off_enabled(elem): + """ + Returns the effective value of an OpenXML CT_OnOff element. + + Missing w:val defaults to true; explicit 0/false/off disables it. + """ + if elem is None: + return False + + val = elem.get(qn('w:val')) + if val is None: + return True + + return val.strip().lower() not in ('0', 'false', 'off') + + def parse_run_to_markdown(r_elem, hyperlinks=None, images=None, img_dir=None, zipf=None, link_url=None, footnotes=None, endnotes=None): """ Converts a text run () to markdown with formatting. @@ -115,10 +131,10 @@ def parse_run_to_markdown(r_elem, hyperlinks=None, images=None, img_dir=None, zi # Apply formatting (check all formatting first, then apply appropriately) if rPr is not None: - is_bold = rPr.find(qn('w:b')) is not None - is_italic = rPr.find(qn('w:i')) is not None - is_strike = (rPr.find(qn('w:strike')) is not None or - rPr.find(qn('w:delText')) is not None) + is_bold = is_on_off_enabled(rPr.find(qn('w:b'))) + is_italic = is_on_off_enabled(rPr.find(qn('w:i'))) + is_strike = (is_on_off_enabled(rPr.find(qn('w:strike'))) or + is_on_off_enabled(rPr.find(qn('w:delText')))) # Apply formatting in correct order (strikethrough, then bold/italic) if is_strike: diff --git a/tests/test_markdown_run_onoff.py b/tests/test_markdown_run_onoff.py new file mode 100644 index 0000000..47e7c23 --- /dev/null +++ b/tests/test_markdown_run_onoff.py @@ -0,0 +1,73 @@ +import xml.etree.ElementTree as ET + +from docx2everything.converters.markdown_converter import parse_run_to_markdown + + +W_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main" + + +def run_with_property(property_name, value_marker): + value_attr = "" if value_marker is None else f' w:val="{value_marker}"' + xml = f""" + + + + + Body text + + """ + return ET.fromstring(xml) + + +def test_disabled_bold_values_are_not_rendered_as_bold(): + for value in ("0", "false", "off"): + run = run_with_property("b", value) + + markdown = parse_run_to_markdown(run) + + assert markdown == "Body text" + + +def test_enabled_bold_values_are_rendered_as_bold(): + for value in (None, "1", "true", "on"): + run = run_with_property("b", value) + + markdown = parse_run_to_markdown(run) + + assert markdown == "**Body text**" + + +def test_disabled_italic_values_are_not_rendered_as_italic(): + for value in ("0", "false", "off"): + run = run_with_property("i", value) + + markdown = parse_run_to_markdown(run) + + assert markdown == "Body text" + + +def test_enabled_italic_values_are_rendered_as_italic(): + for value in (None, "1", "true", "on"): + run = run_with_property("i", value) + + markdown = parse_run_to_markdown(run) + + assert markdown == "*Body text*" + + +def test_disabled_strike_values_are_not_rendered_as_strikethrough(): + for value in ("0", "false", "off"): + run = run_with_property("strike", value) + + markdown = parse_run_to_markdown(run) + + assert markdown == "Body text" + + +def test_enabled_strike_values_are_rendered_as_strikethrough(): + for value in (None, "1", "true", "on"): + run = run_with_property("strike", value) + + markdown = parse_run_to_markdown(run) + + assert markdown == "~~Body text~~"