Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions docx2everything/converters/markdown_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,7 @@ def parse_table_to_markdown(tbl_elem, hyperlinks=None, images=None, img_dir=None

markdown_rows = []
num_cols = 0
vertical_merge_values = {}

# First pass: determine number of columns and extract all rows
col_alignments = [] # Track column alignments
Expand All @@ -408,12 +409,17 @@ def parse_table_to_markdown(tbl_elem, hyperlinks=None, images=None, img_dir=None
tcPr = cell.find(qn('w:tcPr'))
grid_span = 1
cell_alignment = 'left' # Default alignment
v_merge = None

if tcPr is not None:
gridSpan_elem = tcPr.find(qn('w:gridSpan'))
if gridSpan_elem is not None:
grid_span = int(gridSpan_elem.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val', 1))

vMerge_elem = tcPr.find(qn('w:vMerge'))
if vMerge_elem is not None:
v_merge = vMerge_elem.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val', 'continue')

# Check for cell alignment
jc_elem = tcPr.find(qn('w:jc'))
if jc_elem is not None:
Expand All @@ -433,6 +439,14 @@ def parse_table_to_markdown(tbl_elem, hyperlinks=None, images=None, img_dir=None
cell_text += p_text + ' '

cell_text = cell_text.strip().replace('\n', ' ').replace('|', '\\|')
col_idx = len(row_data)

if v_merge == 'restart':
vertical_merge_values[col_idx] = cell_text
elif v_merge == 'continue':
cell_text = vertical_merge_values.get(col_idx, cell_text)
else:
vertical_merge_values.pop(col_idx, None)

# Add merged cells
row_data.append(cell_text)
Expand Down
61 changes: 61 additions & 0 deletions tests/test_markdown_table_vmerge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import xml.etree.ElementTree as ET

from docx2everything.converters.markdown_converter import parse_table_to_markdown


W_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"


def test_vertical_merge_continuation_repeats_restart_cell_text():
table = ET.fromstring(f"""
<w:tbl xmlns:w="{W_NS}">
<w:tr>
<w:tc>
<w:tcPr><w:vMerge w:val="restart"/></w:tcPr>
<w:p><w:r><w:t>Merged</w:t></w:r></w:p>
</w:tc>
<w:tc><w:p><w:r><w:t>Header</w:t></w:r></w:p></w:tc>
</w:tr>
<w:tr>
<w:tc>
<w:tcPr><w:vMerge/></w:tcPr>
<w:p/>
</w:tc>
<w:tc><w:p><w:r><w:t>Value</w:t></w:r></w:p></w:tc>
</w:tr>
</w:tbl>
""")

markdown = parse_table_to_markdown(table)

assert markdown == "\n".join([
"| Merged | Header |",
"| --- | --- |",
"| Merged | Value |",
])


def test_vertical_merge_does_not_affect_later_normal_cells():
table = ET.fromstring(f"""
<w:tbl xmlns:w="{W_NS}">
<w:tr>
<w:tc>
<w:tcPr><w:vMerge w:val="restart"/></w:tcPr>
<w:p><w:r><w:t>Merged</w:t></w:r></w:p>
</w:tc>
<w:tc><w:p><w:r><w:t>Header</w:t></w:r></w:p></w:tc>
</w:tr>
<w:tr>
<w:tc><w:p><w:r><w:t>Normal</w:t></w:r></w:p></w:tc>
<w:tc><w:p><w:r><w:t>Value</w:t></w:r></w:p></w:tc>
</w:tr>
</w:tbl>
""")

markdown = parse_table_to_markdown(table)

assert markdown == "\n".join([
"| Merged | Header |",
"| --- | --- |",
"| Normal | Value |",
])