-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathhtml2image.py
More file actions
200 lines (176 loc) · 8.57 KB
/
html2image.py
File metadata and controls
200 lines (176 loc) · 8.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
import io
from typing import Literal
from PIL import Image, ImageDraw, ImageFont
from bs4 import BeautifulSoup
def get_markup(path="./example/text.html"):
html_file = open(path, encoding="utf-8")
html_markup = html_file.read()
html_file.close()
return html_markup
class HtmlParser:
def __init__(self, html_markup):
self.soup = BeautifulSoup(html_markup, "lxml")
def get_tables(self):
parsed_tables = []
for tag_table in self.soup.find_all("table"):
parsed_table = []
tag_tbody = tag_table.find("tbody")
rows = tag_tbody.find_all("tr")
for row in rows:
cols = row.find_all("td")
cols = [element.text.strip() for element in cols]
parsed_table.append([element for element in cols if element])
parsed_tables.append(parsed_table)
return parsed_tables
def get_images(self):
from requests import get
binary_images = []
for tag_img in self.soup.find_all("img"):
url = tag_img.get("src")
bytesio = io.BytesIO(get(url).content)
binary_images.append(bytesio)
return binary_images
def get_title(self):
tag_h1 = self.soup.find("h1")
return tag_h1.text.strip()
def get_condition(self, divider="\n"):
task_condition_parts = []
for tag_h3 in self.soup.find_all("h3"):
task_condition_parts.append(tag_h3.text.strip())
return divider.join(task_condition_parts)
def get_content(self, order=None):
if order is None:
order = ["title", "condition", "tables", "images"]
content = {element: None for element in order}
content["title"] = self.get_title()
content["condition"] = self.get_condition()
content["tables"] = self.get_tables()
content["images"] = self.get_images()
return content
class Content2Image:
font_path = "./static/fonts/Cousine-Regular.ttf"
text_color = (0, 0, 0, 255)
table_color = (0, 0, 0, 255)
background = (0, 0, 0, 0)
def __init__(self, content: dict, fixed_width=1000, indent_coef=0.01, spacing_coef=0.003):
"""
:param content:
:param fixed_width: ширина изображения, высота вычисляется в зависимости от объема контента и fixed_width
:param indent_coef: коэффициент расчета расстояния между частями задания (заголовком, условием, таблицами и т.п.)
:param spacing_coef: коэффициент расчета расстояние между строчками текста
"""
self.content = content
self.width = fixed_width
self.heading_fontsize = fixed_width // 40
self.content_fontsize = int(fixed_width / 62.5)
self.indent = int(fixed_width * indent_coef)
self.spacing = int(fixed_width * spacing_coef)
def draw_title(self, xy: tuple[int, int] | None = None,
anchor="lm",
align: Literal["left", "right", "center"] = "left"):
if not self.content["title"]:
return Image.new("RGBA", (self.width, 0), (0, 0, 0, 0))
line_length = int((1600 / self.heading_fontsize) * (self.width / 1000))
text_container = self.content["title"].split("\n")
for i in range(len(text_container)):
line = text_container[i]
transfers = len(line) // line_length
j = 0
for transfer in range(transfers):
line = line[:line_length * (j + 1) + j] + "\n" + line[line_length * (j + 1) + j:]
j += 1
text_container[i] = line
text = "\n".join(text_container)
height = self.indent + (self.heading_fontsize + self.spacing) * (text.count("\n") + 1)
if xy is None and anchor != "lm":
xy = (0, 0)
elif xy is None and anchor == "lm":
xy = (0, height // 2)
font = ImageFont.truetype(self.font_path, self.heading_fontsize, encoding="UTF-8")
image = Image.new("RGBA", (self.width, height), self.background)
drawer = ImageDraw.Draw(image)
drawer.text(xy, text, self.text_color, font, anchor, self.spacing, align)
return image
def draw_condition(self, xy: tuple[int, int] | None = None,
anchor="lm",
align: Literal["left", "right", "center"] = "left"):
if not self.content["condition"]:
return Image.new("RGBA", (self.width, 0), (0, 0, 0, 0))
line_length = int((1600 / self.content_fontsize) * (self.width / 1000))
text_container = self.content["condition"].split("\n")
for i in range(len(text_container)):
line = text_container[i]
transfers = len(line) // line_length
j = 0
for transfer in range(transfers):
line = line[:line_length * (j + 1) + j] + "\n" + line[line_length * (j + 1) + j:]
j += 1
text_container[i] = line
text = "\n".join(text_container)
height = self.indent + (self.content_fontsize + self.spacing) * (text.count("\n") + 1)
if xy is None and anchor != "lm":
xy = (0, 0)
elif xy is None and anchor == "lm":
xy = (0, height // 2)
font = ImageFont.truetype(self.font_path, self.content_fontsize, encoding="UTF-8")
image = Image.new("RGBA", (self.width, height), self.background)
drawer = ImageDraw.Draw(image)
drawer.text(xy, text, self.text_color, font, anchor, self.spacing, align)
return image
def draw_images(self, image_scale=0.5):
if not self.content["images"]:
return Image.new("RGBA", (self.width, 0), (0, 0, 0, 0))
images = [Image.open(binary) for binary in self.content["images"]]
for image in images:
image.thumbnail((self.width, image.size[1]))
height = 2 * self.indent + self.spacing * (len(images) - 1) + sum(image.size[1] for image in images)
result = Image.new("RGBA", (self.width, height), self.background)
indent = Image.new("RGBA", (self.width, self.indent), self.background)
spacing = Image.new("RGBA", (self.width, self.spacing), self.background)
bottom = 0
result.paste(indent, (0, 0))
bottom += indent.size[1]
result.paste(images[0], (0, bottom))
bottom += images[0].size[1]
result.paste(spacing, (0, bottom))
bottom += spacing.size[1]
for i, image in enumerate(images[1:], 1):
result.paste(image, (0, bottom))
bottom += image.size[1]
result.paste(spacing, (0, bottom))
bottom += spacing.size[1]
result.paste(indent, (0, bottom))
w, h = result.size
w, h = int(w * image_scale), int(h * image_scale)
result.thumbnail((w, h))
return result
def draw_content(self, image_scale=0.5): # TODO: order
title = self.draw_title()
condition = self.draw_condition()
images = self.draw_images(image_scale)
height = sum(image.size[1] for image in [title, condition, images])
content = Image.new("RGBA", (self.width, height), self.background)
bottom = 0
content.paste(title, (0, bottom))
bottom += title.size[1]
content.paste(condition, (0, bottom))
bottom += condition.size[1]
content.paste(images, ((self.width - images.size[0]) // 2, bottom))
return content
def auto(path="./example/empty_or_incorrect.html"):
try:
markup = get_markup(path)
parser = HtmlParser(markup)
content = parser.get_content()
content2image = Content2Image(content, fixed_width=1000, indent_coef=0.01, spacing_coef=0.003)
# при таком значении проводил все расчеты, поэтому лучше оставить fixed_width=1000
# если нужен другой размер, то либо поменять fixed_width, либо использовать resize или thumbnail в pillow
content2image.font_path = "./static/fonts/Cousine-Regular.ttf" # по умолчанию
content2image.text_color = (0, 0, 0, 255) # по умолчанию
content2image.table_color = (0, 0, 0, 255) # по умолчанию
content2image.background = (0, 0, 0, 0) # по умолчанию
except Exception:
return Image.new("RGB", (100, 100), (255, 0, 0))
return content2image.draw_content(image_scale=0.3)
if __name__ == "__main__":
auto().show()