From aa2ed18fdde76beff4de6afa6ada62b4c24737f3 Mon Sep 17 00:00:00 2001 From: Aneesh Karve Date: Tue, 14 May 2019 19:04:05 -0700 Subject: [PATCH 1/2] handle s3:TestEvent, make indexer core more robust --- lambdas/es/indexer/index.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/lambdas/es/indexer/index.py b/lambdas/es/indexer/index.py index 4e499a97..e62e84e7 100644 --- a/lambdas/es/indexer/index.py +++ b/lambdas/es/indexer/index.py @@ -168,8 +168,17 @@ def handler(event, _): dispatch post_to_es """ try: - for msg in event['Records']: - for record in json.loads(json.loads(msg['body'])['Message'])['Records']: + # existence of event['Records'] is guaranteed by + # https://docs.aws.amazon.com/AmazonS3/latest/dev/notification-content-structure.html + # see above for details on event structure, s3:TestEvent + for outer in event['Records']: + # these should all be SNS messages (indexer consumes from SNS topic, by design) + raw_message = json.loads(outer['body'])['Message'] + message = json.loads(raw_message) + if 'Records' not in message: + # consume event (we don't want to index it) + return + for record in message['Records']: try: eventname = record['eventName'] bucket = unquote(record['s3']['bucket']['name']) @@ -185,7 +194,8 @@ def handler(event, _): elif eventname == 'ObjectCreated:Put': event_type = 'Create' else: - event_type = eventname + # we should only send either Create or Delete events + return try: # Retry with back-off for eventual consistency reasons @tenacity.retry(wait=tenacity.wait_exponential(multiplier=2, min=4, max=30)) @@ -255,7 +265,7 @@ def get_obj_from_s3(bucket, key, version_id=None, etag=None): print(e) import traceback traceback.print_tb(e.__traceback__) - print(msg) + print(outer) except Exception as e: # do our best to process each result print("Exception encountered for whole Event") @@ -264,4 +274,4 @@ def get_obj_from_s3(bucket, key, version_id=None, etag=None): traceback.print_tb(e.__traceback__) print(event) # Fail the lambda so the message is not dequeued. - raise e \ No newline at end of file + raise e From c4bd76f27b23d4404f4838d19d3ba32779a82b83 Mon Sep 17 00:00:00 2001 From: Aneesh Karve Date: Thu, 16 May 2019 22:48:21 -0700 Subject: [PATCH 2/2] only drop s3:TestEvent --- lambdas/es/indexer/index.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lambdas/es/indexer/index.py b/lambdas/es/indexer/index.py index e62e84e7..55ca9239 100644 --- a/lambdas/es/indexer/index.py +++ b/lambdas/es/indexer/index.py @@ -170,13 +170,14 @@ def handler(event, _): try: # existence of event['Records'] is guaranteed by # https://docs.aws.amazon.com/AmazonS3/latest/dev/notification-content-structure.html - # see above for details on event structure, s3:TestEvent for outer in event['Records']: # these should all be SNS messages (indexer consumes from SNS topic, by design) raw_message = json.loads(outer['body'])['Message'] message = json.loads(raw_message) - if 'Records' not in message: - # consume event (we don't want to index it) + # When you configure an event notification on a bucket, + # Amazon S3 sends an s3:TestEvent (refer to link above) + if message['Event'] == "s3:TestEvent": + # consume event without indexing it return for record in message['Records']: try: @@ -194,8 +195,7 @@ def handler(event, _): elif eventname == 'ObjectCreated:Put': event_type = 'Create' else: - # we should only send either Create or Delete events - return + event_type = eventname try: # Retry with back-off for eventual consistency reasons @tenacity.retry(wait=tenacity.wait_exponential(multiplier=2, min=4, max=30))