Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions docx2everything/converters/markdown_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,9 @@ def parse_body_to_markdown(root, numbering_info=None, hyperlinks=None, images=No
markdown_parts = []
body = root.find(qn('w:body'))

if body is None and root.tag in (qn('w:hdr'), qn('w:ftr')):
body = root

if body is None:
return ''

Expand Down Expand Up @@ -616,7 +619,9 @@ def convert_to_markdown(zipf, filelist, img_dir=None):
try:
header_xml = zipf.read(fname)
header_root = ET.fromstring(header_xml)
header_md = parse_body_to_markdown(header_root, numbering_info, hyperlinks, images, img_dir, zipf, footnotes=footnotes, endnotes=endnotes, comments=comments, styles_info=styles_info, charts=charts)
header_rels = 'word/_rels/' + os.path.basename(fname) + '.rels'
header_hyperlinks, header_images = parse_relationships(zipf, header_rels)
header_md = parse_body_to_markdown(header_root, numbering_info, header_hyperlinks, header_images, img_dir, zipf, footnotes=footnotes, endnotes=endnotes, comments=comments, styles_info=styles_info, charts=charts)
if header_md:
markdown_parts.append(header_md)
except Exception:
Expand All @@ -641,7 +646,9 @@ def convert_to_markdown(zipf, filelist, img_dir=None):
try:
footer_xml = zipf.read(fname)
footer_root = ET.fromstring(footer_xml)
footer_md = parse_body_to_markdown(footer_root, numbering_info, hyperlinks, images, img_dir, zipf, footnotes=footnotes, endnotes=endnotes, comments=comments, styles_info=styles_info, charts=charts)
footer_rels = 'word/_rels/' + os.path.basename(fname) + '.rels'
footer_hyperlinks, footer_images = parse_relationships(zipf, footer_rels)
footer_md = parse_body_to_markdown(footer_root, numbering_info, footer_hyperlinks, footer_images, img_dir, zipf, footnotes=footnotes, endnotes=endnotes, comments=comments, styles_info=styles_info, charts=charts)
if footer_md:
markdown_parts.append(footer_md)
except Exception:
Expand Down
48 changes: 48 additions & 0 deletions tests/test_markdown_header_footer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import xml.etree.ElementTree as ET

from docx2everything.converters.markdown_converter import parse_body_to_markdown


W_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"


def xml_root(xml):
return ET.fromstring(xml)


def test_header_root_is_converted_to_markdown():
root = xml_root(f"""
<w:hdr xmlns:w="{W_NS}">
<w:p><w:r><w:t>Header text</w:t></w:r></w:p>
</w:hdr>
""")

markdown = parse_body_to_markdown(root)

assert markdown == "Header text"


def test_footer_root_is_converted_to_markdown():
root = xml_root(f"""
<w:ftr xmlns:w="{W_NS}">
<w:p><w:r><w:t>Footer text</w:t></w:r></w:p>
</w:ftr>
""")

markdown = parse_body_to_markdown(root)

assert markdown == "Footer text"


def test_document_body_root_still_converts_to_markdown():
root = xml_root(f"""
<w:document xmlns:w="{W_NS}">
<w:body>
<w:p><w:r><w:t>Body text</w:t></w:r></w:p>
</w:body>
</w:document>
""")

markdown = parse_body_to_markdown(root)

assert markdown == "Body text"
80 changes: 80 additions & 0 deletions tests/test_markdown_header_footer_relationships.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import io
import zipfile

from docx2everything.converters.markdown_converter import convert_to_markdown


def make_docx(parts):
buffer = io.BytesIO()
with zipfile.ZipFile(buffer, "w") as zipf:
for name, content in parts.items():
zipf.writestr(name, content)
buffer.seek(0)
return zipfile.ZipFile(buffer)


def test_header_hyperlink_uses_header_relationships():
docx = make_docx({
"word/document.xml": """
<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
<w:body/>
</w:document>
""",
"word/header1.xml": """
<w:hdr
xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
<w:p>
<w:hyperlink r:id="rIdHeaderLink">
<w:r><w:t>Header link</w:t></w:r>
</w:hyperlink>
</w:p>
</w:hdr>
""",
"word/_rels/header1.xml.rels": """
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
<Relationship Id="rIdHeaderLink"
Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"
Target="https://example.com/header"/>
</Relationships>
""",
})

markdown = convert_to_markdown(docx, docx.namelist())

assert markdown == "[Header link](https://example.com/header)"


def test_footer_image_uses_footer_relationships():
docx = make_docx({
"word/document.xml": """
<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
<w:body/>
</w:document>
""",
"word/footer1.xml": """
<w:ftr
xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main">
<w:p>
<w:r>
<w:drawing>
<a:blip r:embed="rIdFooterImage"/>
</w:drawing>
</w:r>
</w:p>
</w:ftr>
""",
"word/_rels/footer1.xml.rels": """
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
<Relationship Id="rIdFooterImage"
Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"
Target="media/footer-image.png"/>
</Relationships>
""",
})

markdown = convert_to_markdown(docx, docx.namelist())

assert markdown == "![footer-image.png](media/footer-image.png)"