-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpdf_docs.py
More file actions
44 lines (30 loc) · 1.26 KB
/
pdf_docs.py
File metadata and controls
44 lines (30 loc) · 1.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from unstructured.partition.pdf import partition_pdf
import os
import time
import documents
def update_pdf(id: int):
doc = get_document_by_id(id)
if doc:
return documents.update_document(doc, elements, title, documents.DocumentTypes.PDF.value, url)
def add_pdf(file: bytes, filename: str, title: str, url: str = None, metadata: dict | None = None):
file_path = _save_file(file, filename)
elements = partition_pdf(filename=f"{file_path}")
return documents.add_document(elements, title, documents.DocumentTypes.PDF.value, url, metadata=metadata)
def _save_file(file: bytes, filename: str) -> str:
"""Helper to save an uploaded file to disk and return the file path."""
# for whatever reason, makedirs refused to create the subfolder,
# so two steps it is.
data_path = f"{os.environ.get("DATA_DIR", "./data")}"
try:
os.mkdir(data_path)
except Exception as e:
print("data folder exists, skipping creation.")
pdf_path = os.path.join(data_path, "pdfs")
try:
os.mkdir(pdf_path)
except Exception as e:
print("pdfs folder exists, skipping creation.")
file_path = f"{pdf_path}/{time.time()}-{filename}"
with open(file_path, "wb") as f:
f.write(file)
return file_path