Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 20 additions & 4 deletions docx2everything/converters/markdown_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,22 @@ def get_heading_level(pStyle_val, styles_info=None):
return None


def is_on_off_enabled(elem):
"""
Returns the effective value of an OpenXML CT_OnOff element.

Missing w:val defaults to true; explicit 0/false/off disables it.
"""
if elem is None:
return False

val = elem.get(qn('w:val'))
if val is None:
return True

return val.strip().lower() not in ('0', 'false', 'off')


def parse_run_to_markdown(r_elem, hyperlinks=None, images=None, img_dir=None, zipf=None, link_url=None, footnotes=None, endnotes=None):
"""
Converts a text run (<w:r>) to markdown with formatting.
Expand Down Expand Up @@ -115,10 +131,10 @@ def parse_run_to_markdown(r_elem, hyperlinks=None, images=None, img_dir=None, zi

# Apply formatting (check all formatting first, then apply appropriately)
if rPr is not None:
is_bold = rPr.find(qn('w:b')) is not None
is_italic = rPr.find(qn('w:i')) is not None
is_strike = (rPr.find(qn('w:strike')) is not None or
rPr.find(qn('w:delText')) is not None)
is_bold = is_on_off_enabled(rPr.find(qn('w:b')))
is_italic = is_on_off_enabled(rPr.find(qn('w:i')))
is_strike = (is_on_off_enabled(rPr.find(qn('w:strike'))) or
is_on_off_enabled(rPr.find(qn('w:delText'))))

# Apply formatting in correct order (strikethrough, then bold/italic)
if is_strike:
Expand Down
73 changes: 73 additions & 0 deletions tests/test_markdown_run_onoff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import xml.etree.ElementTree as ET

from docx2everything.converters.markdown_converter import parse_run_to_markdown


W_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"


def run_with_property(property_name, value_marker):
value_attr = "" if value_marker is None else f' w:val="{value_marker}"'
xml = f"""
<w:r xmlns:w="{W_NS}">
<w:rPr>
<w:{property_name}{value_attr}/>
</w:rPr>
<w:t>Body text</w:t>
</w:r>
"""
return ET.fromstring(xml)


def test_disabled_bold_values_are_not_rendered_as_bold():
for value in ("0", "false", "off"):
run = run_with_property("b", value)

markdown = parse_run_to_markdown(run)

assert markdown == "Body text"


def test_enabled_bold_values_are_rendered_as_bold():
for value in (None, "1", "true", "on"):
run = run_with_property("b", value)

markdown = parse_run_to_markdown(run)

assert markdown == "**Body text**"


def test_disabled_italic_values_are_not_rendered_as_italic():
for value in ("0", "false", "off"):
run = run_with_property("i", value)

markdown = parse_run_to_markdown(run)

assert markdown == "Body text"


def test_enabled_italic_values_are_rendered_as_italic():
for value in (None, "1", "true", "on"):
run = run_with_property("i", value)

markdown = parse_run_to_markdown(run)

assert markdown == "*Body text*"


def test_disabled_strike_values_are_not_rendered_as_strikethrough():
for value in ("0", "false", "off"):
run = run_with_property("strike", value)

markdown = parse_run_to_markdown(run)

assert markdown == "Body text"


def test_enabled_strike_values_are_rendered_as_strikethrough():
for value in (None, "1", "true", "on"):
run = run_with_property("strike", value)

markdown = parse_run_to_markdown(run)

assert markdown == "~~Body text~~"