From d5e463198380973fd92b03df0c3884b6aa6a8343 Mon Sep 17 00:00:00 2001 From: femalves Date: Tue, 17 Mar 2026 12:25:47 -0400 Subject: [PATCH] Cap pageCount to agg bucket limit and close response before retry --- scan_explorer_service/utils/search_utils.py | 8 ++++---- scan_explorer_service/views/image_proxy.py | 1 + scan_explorer_service/views/metadata.py | 6 ++++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/scan_explorer_service/utils/search_utils.py b/scan_explorer_service/utils/search_utils.py index 3049657..d46acb0 100644 --- a/scan_explorer_service/utils/search_utils.py +++ b/scan_explorer_service/utils/search_utils.py @@ -183,9 +183,9 @@ def serialize_os_agg_collection_bucket(bucket: dict): volume = id[5:9] return {'id': id, 'journal': journal, 'volume': volume, 'pages': bucket['doc_count']} -def serialize_os_collection_result(result: dict, page: int, limit: int, contentQuery): +def serialize_os_collection_result(result: dict, page: int, limit: int, contentQuery, agg_bucket_limit: int = 10000): total_count = result['aggregations']['total_count']['value'] - page_count = int(math.ceil(total_count / limit)) + page_count = int(math.ceil(min(total_count, agg_bucket_limit) / limit)) es_buckets = result['aggregations']['ids']['buckets'] return {'page': page, 'pageCount': page_count, 'limit': limit, 'total': total_count, 'query': contentQuery, @@ -195,9 +195,9 @@ def serialize_os_agg_article_bucket(bucket: dict): id = bucket['key'] return {'id': id, 'bibcode': id, 'pages': bucket['doc_count']} -def serialize_os_article_result(result: dict, page: int, limit: int, contentQuery = '', extra_col_count = 0, extra_page_count = 0): +def serialize_os_article_result(result: dict, page: int, limit: int, contentQuery = '', extra_col_count = 0, extra_page_count = 0, agg_bucket_limit: int = 10000): total_count = result['aggregations']['total_count']['value'] - page_count = int(math.ceil(total_count / limit)) + page_count = int(math.ceil(min(total_count, agg_bucket_limit) / limit)) es_buckets = result['aggregations']['ids']['buckets'] return {'page': page, 'pageCount': page_count, 'limit': limit, 'total': total_count, 'query': contentQuery, diff --git a/scan_explorer_service/views/image_proxy.py b/scan_explorer_service/views/image_proxy.py index a3bbe0e..658450d 100644 --- a/scan_explorer_service/views/image_proxy.py +++ b/scan_explorer_service/views/image_proxy.py @@ -42,6 +42,7 @@ def image_proxy(path): f"Upstream image request failed (status {r.status_code}), " f"retrying in {retry_delay}s (attempt {attempt + 1}/{retries})") time.sleep(retry_delay) + r.close() r = requests.request(request.method, encoded_url, params=request.args, stream=True, headers=req_headers, allow_redirects=False, data=request.form) diff --git a/scan_explorer_service/views/metadata.py b/scan_explorer_service/views/metadata.py index bfe9dc7..abcf065 100644 --- a/scan_explorer_service/views/metadata.py +++ b/scan_explorer_service/views/metadata.py @@ -179,7 +179,8 @@ def article_search(): if article_count == 0: collection_count = aggregate_search(qs, EsFields.volume_id, page, limit, sort)['aggregations']['total_count']['value'] page_count = page_os_search(qs, page, limit, sort)['hits']['total']['value'] - return jsonify(serialize_os_article_result(result, page, limit, text_query, collection_count, page_count)) + agg_limit = current_app.config.get("OPEN_SEARCH_AGG_BUCKET_LIMIT", 10000) + return jsonify(serialize_os_article_result(result, page, limit, text_query, collection_count, page_count, agg_limit)) except Exception as e: current_app.logger.exception(f"An exception has occurred: {e}") return jsonify(message=str(e), type=ApiErrors.SearchError.value), 400 @@ -195,7 +196,8 @@ def collection_search(): text_query = '' if SearchOptions.FullText.value in qs_dict.keys(): text_query = qs_dict[SearchOptions.FullText.value] - return jsonify(serialize_os_collection_result(result, page, limit, text_query)) + agg_limit = current_app.config.get("OPEN_SEARCH_AGG_BUCKET_LIMIT", 10000) + return jsonify(serialize_os_collection_result(result, page, limit, text_query, agg_limit)) except Exception as e: return jsonify(message=str(e), type=ApiErrors.SearchError.value), 400