Skip to content

Commit edd8df2

Browse files
committed
more timeout for large files; bytes pbar for large files
1 parent 8257aab commit edd8df2

2 files changed

Lines changed: 44 additions & 3 deletions

File tree

datamint/api/endpoints/resources_api.py

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,24 @@
3939
"""
4040

4141

42+
_LARGE_FILE_THRESHOLD = 300 * 1024 * 1024 # 300 MB
43+
44+
4245
def _infinite_gen(x):
4346
while True:
4447
yield x
4548

4649

50+
async def _tracked_file_gen(file_obj: IO, pbar: tqdm, chunk_size: int = 65536):
51+
"""Async generator that reads a file in chunks and updates a bytes progress bar."""
52+
while True:
53+
chunk = file_obj.read(chunk_size)
54+
if not chunk:
55+
break
56+
pbar.update(len(chunk))
57+
yield chunk
58+
59+
4760
def _open_io(file_path: str | Path | IO, mode: str = 'rb') -> IO:
4861
if isinstance(file_path, str) or isinstance(file_path, Path):
4962
return open(file_path, 'rb')
@@ -305,6 +318,29 @@ async def _upload_single_resource_async(self,
305318
else:
306319
f = _open_io(file_path)
307320

321+
# Determine file size for potential large-file upload progress bar
322+
try:
323+
file_size = os.fstat(f.fileno()).st_size
324+
except (AttributeError, OSError):
325+
try:
326+
pos = f.tell()
327+
f.seek(0, 2)
328+
file_size = f.tell()
329+
f.seek(pos)
330+
except (AttributeError, OSError):
331+
file_size = 0
332+
333+
upload_pbar: tqdm | None = None
334+
if file_size > _LARGE_FILE_THRESHOLD:
335+
upload_pbar = tqdm(
336+
total=file_size,
337+
unit='B',
338+
unit_scale=True,
339+
unit_divisor=1024,
340+
desc=f"Uploading {filename}",
341+
leave=False,
342+
)
343+
308344
try:
309345
metadata_content = None
310346
metadata_dict = None
@@ -337,7 +373,8 @@ async def _upload_single_resource_async(self,
337373
file_key = 'resource'
338374
form.add_field('source', 'api')
339375

340-
form.add_field(file_key, f, filename=filename, content_type=mimetype)
376+
file_payload = _tracked_file_gen(f, upload_pbar) if upload_pbar is not None else f
377+
form.add_field(file_key, file_payload, filename=filename, content_type=mimetype)
341378
form.add_field('source_filepath', source_filepath) # full path to the file
342379
if mimetype is not None:
343380
form.add_field('mimetype', mimetype)
@@ -358,7 +395,9 @@ async def _upload_single_resource_async(self,
358395
except Exception as e:
359396
_LOGGER.warning(f"Failed to add metadata to form: {e}")
360397

361-
timeout = aiohttp.ClientTimeout(total=300, connect=60, sock_read=300)
398+
# Scale total/read timeout proportionally for large files (min 300 s, ≥2 s/MB)
399+
_upload_secs = max(300, file_size // (512 * 1024)) if file_size else 300
400+
timeout = aiohttp.ClientTimeout(total=_upload_secs, connect=60, sock_read=_upload_secs)
362401
resp_data = await self._make_request_async_json('POST',
363402
endpoint=self.endpoint_base,
364403
data=form,
@@ -375,6 +414,8 @@ async def _upload_single_resource_async(self,
375414
_LOGGER.error(f"Error uploading {file_path}: {e}")
376415
raise
377416
finally:
417+
if upload_pbar is not None:
418+
upload_pbar.close()
378419
f.close()
379420

380421
async def _upload_resources_async(self,

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[project]
22
name = "datamint"
33
description = "A library for interacting with the Datamint API, designed for efficient data management, processing and Deep Learning workflows."
4-
version = "2.11.5"
4+
version = "2.11.6"
55
dynamic = ["dependencies"]
66
requires-python = ">=3.10"
77
readme = "README.md"

0 commit comments

Comments
 (0)