-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathgenerate_ocr_custom.py
More file actions
127 lines (101 loc) · 4.53 KB
/
generate_ocr_custom.py
File metadata and controls
127 lines (101 loc) · 4.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
"""
Script to generate OCR custom questions for math-eval dataset.
Usage:
python generate_ocr_custom.py --input_dir <input_dir> --output_dir <output_dir> [other flags]
"""
import argparse
import os
from PIL import Image, ImageDraw, ImageFont
from tqdm import tqdm
import csv
def create_visual_equation(equation, font):
elements = equation.split()
images = []
metadata = {}
for element in elements:
# Render each element as an image with larger dimensions
element_image = Image.new('RGBA', (200, 200), (255, 255, 255, 0)) # Increased image size
draw = ImageDraw.Draw(element_image)
bbox = draw.textbbox((0, 0), element, font=font)
w, h = bbox[2] - bbox[0], bbox[3] - bbox[1]
draw.text(((200 - w) / 2, (200 - h) / 2), element, font=font, fill=(0, 0, 0)) # Centered text
images.append(element_image)
metadata[element] = element
# Combine images side by side with REDUCED spacing
widths, heights = zip(*(i.size for i in images))
total_width = sum(widths) + (5 * (len(images) - 1)) # Reduced from 10 to 5
max_height = max(heights)
combined_image = Image.new('RGB', (total_width, max_height), color=(255, 255, 255))
x_offset = 0
for img in images:
combined_image.paste(img, (x_offset, 0), img if img.mode == 'RGBA' else None)
x_offset += img.width + 5 # Reduced from 10 to 5
return combined_image, metadata
def main(equations_file, output_folder, metadata_file):
if not os.path.exists(output_folder):
os.makedirs(output_folder)
# Load font with a fallback mechanism
try:
font = ImageFont.truetype("Arial.ttf", 120)
except IOError:
try:
font = ImageFont.truetype("DejaVuSans.ttf", 120)
except IOError:
print("Warning: Using default font, might not render properly!")
font = ImageFont.load_default()
with open(equations_file, 'r') as file:
lines = file.readlines()
metadata_list = []
for i, line in tqdm(enumerate(lines)):
# Split the line to get equations and variable values
parts = line.strip().split('<sep>')
if len(parts) < 2:
print(f"Skipping malformed line: {line}")
continue
equations_set = parts[0].strip()
variable_values = parts[1].strip().split(',')
# Dynamically extract variable names and values
variables = {}
for var in variable_values:
if '=' in var:
key, value = var.split('=')
variables[key.strip()] = value.strip()
equations = equations_set.split(',')
equation_images = []
for equation in equations:
img, _ = create_visual_equation(equation.strip(), font)
equation_images.append(img)
# Combine images vertically
widths, heights = zip(*(img.size for img in equation_images))
max_width = max(widths)
total_height = sum(heights) + (10 * (len(equation_images) - 1))
combined_image = Image.new('RGB', (max_width, total_height), color=(255, 255, 255))
y_offset = 0
for img in equation_images:
combined_image.paste(img, (0, y_offset))
y_offset += img.height + 10
output_path = os.path.join(output_folder, f"equation_set_{i + 1}.png")
combined_image.save(output_path, dpi=(300, 300))
# Store filename and all variables in metadata
metadata_entry = {'filename': os.path.basename(output_path)}
metadata_entry.update(variables)
metadata_list.append(metadata_entry)
# Write metadata to CSV file
with open(metadata_file, 'w', newline='') as csvfile:
# Dynamically determine fieldnames from metadata keys
fieldnames = ['filename'] + list(variables.keys())
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for data in metadata_list:
writer.writerow(data)
def cli_main():
parser = argparse.ArgumentParser(description="Generate OCR custom questions for math-eval dataset.")
parser.add_argument('--equations_file', type=str, required=True, help='Path to the equations file')
parser.add_argument('--output_dir', type=str, required=True, help='Output directory for generated images and metadata')
args = parser.parse_args()
equations_file = args.equations_file
output_folder = args.output_dir
metadata_file = os.path.join(args.output_dir, "metadata.csv")
main(equations_file, output_folder, metadata_file)
if __name__ == "__main__":
cli_main()