Skip to content

Commit 40a4b15

Browse files
new: add map_threads option for ThreadPool size in map pipe
Allow configuring ThreadPool size used in map pipe. Passing default value None makes ThreadPool use os.process_cpu_count(), keeping existing behaviour. Setting the value to 1 provides a workaround for #315 (avoid parallelism). Given that most processing happens withing Python without releasing GIL, it actually does not incur any significant penalty.
1 parent b619d50 commit 40a4b15

2 files changed

Lines changed: 6 additions & 2 deletions

File tree

src/pyff/builtins.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from lxml.etree import DocumentInvalid
2323
from str2bool import str2bool
2424

25-
from pyff.constants import NS
25+
from pyff.constants import NS, config
2626
from pyff.decorators import deprecated
2727
from pyff.exceptions import MetadataException
2828
from pyff.logs import get_log
@@ -111,7 +111,7 @@ def _p(e):
111111

112112
from multiprocessing.pool import ThreadPool
113113

114-
pool = ThreadPool()
114+
pool = ThreadPool(config.map_threads)
115115
result = pool.map(_p, iter_entities(req.t), chunksize=10)
116116
log.info(f"processed {len(result)} entities")
117117

src/pyff/constants.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,10 @@ class Config:
344344
"info_buffer_size", default=10, typeconv=as_int, info="how much history to keep about each metadata URL"
345345
)
346346

347+
map_threads = S(
348+
"map_threads", typeconv=as_int, info="how many threads to create in map pipe [default: os.process_cpu_count() ]"
349+
)
350+
347351
worker_pool_size = S(
348352
"worker_pool_size", default=1, cmdline=['pyffd'], typeconv=as_int, info="how many gunicorn workers to run"
349353
)

0 commit comments

Comments
 (0)