Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

OPEN_SEARCH_URL = 'http://opensearch-node1:9200'
OPEN_SEARCH_INDEX = 'scan-explorer'
OPEN_SEARCH_AGG_BUCKET_LIMIT = 10000

ADS_SEARCH_SERVICE_URL = 'https://api.adsabs.harvard.edu/v1/search/query'
ADS_SEARCH_SERVICE_TOKEN = '<CHANGE ME>'
Expand Down
4 changes: 2 additions & 2 deletions scan_explorer_service/open_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def append_aggregate(query: dict, agg_field: EsFields, page: int, size: int, sor
}
},
"ids": {
"terms": {"field": agg_field.value, "size": 10000},
"terms": {"field": agg_field.value, "size": current_app.config.get("OPEN_SEARCH_AGG_BUCKET_LIMIT", 10000)},
"aggs": {
"bucket_sort": {
"bucket_sort": {
Expand Down Expand Up @@ -97,7 +97,7 @@ def text_search_highlight(text: str, filter_field: EsFields, filter_value: str):
for hit in es_search(query)['hits']['hits']:
yield {
"page_id": hit['_source']['page_id'],
"highlight": hit['highlight']['text']
"highlight": hit.get('highlight', {}).get('text', [])
}

def set_page_ocr_fields(query: dict) -> dict:
Expand Down
35 changes: 35 additions & 0 deletions scan_explorer_service/tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,5 +356,40 @@ def test_get_page_ocr_article_no_pages(self, OpenSearch):
self.assertIn('no pages', data['message'].lower())


class TestOpenSearchHighlight(TestCaseDatabase):

def create_app(self):
from scan_explorer_service.app import create_app
return create_app(**{
'SQLALCHEMY_DATABASE_URI': self.postgresql_url,
'OPEN_SEARCH_URL': 'http://localhost:1234',
'OPEN_SEARCH_INDEX': 'test',
'SQLALCHEMY_ECHO': False,
'TESTING': True,
'PROPAGATE_EXCEPTIONS': True,
'TRAP_BAD_REQUEST_ERRORS': True,
'PRESERVE_CONTEXT_ON_EXCEPTION': False
})

def setUp(self):
Base.metadata.drop_all(bind=self.app.db.engine)
Base.metadata.create_all(bind=self.app.db.engine)

@patch('scan_explorer_service.open_search.es_search')
def test_text_search_highlight_missing_highlight_field(self, mock_es_search):
mock_es_search.return_value = {
'hits': {
'hits': [
{'_source': {'page_id': 'page1'}},
]
}
}
from scan_explorer_service.open_search import text_search_highlight, EsFields
results = list(text_search_highlight('test query', EsFields.volume_id, 'vol1'))
self.assertEqual(len(results), 1)
self.assertEqual(results[0]['page_id'], 'page1')
self.assertEqual(results[0]['highlight'], [])


if __name__ == '__main__':
unittest.main()
19 changes: 19 additions & 0 deletions scan_explorer_service/tests/test_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ def __init__(self, data, status_code, headers):
def json(self):
return self.json_data

def close(self):
pass

if 'notfound' in args[1]:
return MockResponse({}, 401, {})
elif 'badrequest' in args[1]:
Expand All @@ -111,6 +114,20 @@ def test_get_image(self, mock_request):
response = image_proxy('badrequest-~image-~path')
assert(response.status_code == 400)

@patch('scan_explorer_service.views.image_proxy.requests.request')
def test_image_proxy_closes_upstream_response(self, mock_request):
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.headers = {}
mock_response.raw.stream.return_value = [b'chunk1', b'chunk2']
mock_request.return_value = mock_response

url = url_for('proxy.image_proxy', path='some-~image-~path')
response = self.client.get(url)
self.assertEqual(response.status_code, 200)
response.close()
mock_response.close.assert_called()

@patch('requests.request', side_effect=mocked_request)
def test_get_thumbnail(self, mock_request):

Expand Down Expand Up @@ -233,6 +250,8 @@ def __init__(self, d, sc, h):
self.raw = Raw(d)
self.status_code = sc
self.headers = h or {}
def close(self):
pass
return MockResponse(data, status_code, headers or {})

@patch('requests.request')
Expand Down
5 changes: 4 additions & 1 deletion scan_explorer_service/views/image_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,10 @@ def image_proxy(path):
def generate():
for chunk in r.raw.stream(decode_content=False):
yield chunk
return Response(generate(), status=r.status_code, headers=headers)

resp = Response(generate(), status=r.status_code, headers=headers)
resp.call_on_close(r.close)
return resp


@advertise(scopes=['api'], rate_limit=[5000, 3600*24])
Expand Down
8 changes: 4 additions & 4 deletions scan_explorer_service/views/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def article_extra(bibcode: str):
try:
params = {'q': f'bibcode:{bibcode}', 'fl':'title,author'}
headers = {'Authorization': f'Bearer {auth_token}'}
response = requests.get(ads_search_service, params, headers=headers).json()
response = requests.get(ads_search_service, params, headers=headers, timeout=5).json()
docs = response.get('response').get('docs')
if docs:
return docs[0]
Expand Down Expand Up @@ -61,7 +61,7 @@ def put_article():
article = Article(**json)
article_overwrite(session, article)
return jsonify({'id': article.bibcode}), 200
except:
except Exception:
session.rollback()
return jsonify(message='Failed to create article'), 500
else:
Expand Down Expand Up @@ -130,7 +130,7 @@ def put_collection():
session.commit()

return jsonify({'id': collection.id}), 200
except:
except Exception:
session.rollback()
return jsonify(message='Failed to create collection'), 500
else:
Expand All @@ -156,7 +156,7 @@ def put_page():
session.commit()
session.refresh(page)
return jsonify({'id': page.id}), 200
except:
except Exception:
session.rollback()
return jsonify(message='Failed to create page'), 500
else:
Expand Down
Loading