From 1eaa6457d991feffe80e751415b35e5e531bdb23 Mon Sep 17 00:00:00 2001 From: Scott Peshak Date: Tue, 13 Jun 2017 15:19:15 -0500 Subject: [PATCH 1/2] Set S3 redirect metadata when processing redirect files --- lib/staticizer/crawler.rb | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/staticizer/crawler.rb b/lib/staticizer/crawler.rb index 75b1f62..28c2f1d 100644 --- a/lib/staticizer/crawler.rb +++ b/lib/staticizer/crawler.rb @@ -164,6 +164,12 @@ def save_page_to_aws(response, uri) # Upload this file directly to AWS::S3 opts = {:acl => "public-read"} opts[:content_type] = response['content-type'] rescue "text/html" + + # Detect a meta-redirect and set an S3 hosting redirect metadata item + if response =~ /META http-equiv='refresh' content='0;URL="(.*)"/ + opts[:website_redirect_location] = $1 + end + @log.info "Uploading #{key} to s3 with content type #{opts[:content_type]}" if response.respond_to?(:read_body) body = process_body(response.read_body, uri, opts) @@ -197,7 +203,6 @@ def process_success(response, parsed_uri) end # If we hit a redirect we save the redirect as a meta refresh page - # TODO: for AWS S3 hosting we could instead create a redirect? def process_redirect(url, destination_url) body = "You are being redirected to #{destination_url}." save_page(body, url) From 7d44f66b1c2a1128270a235b3bab7f84ee28f475 Mon Sep 17 00:00:00 2001 From: David Michael Date: Thu, 20 Sep 2018 12:58:10 -0500 Subject: [PATCH 2/2] Modify redirect location header if no leading / on S3 upload --- lib/staticizer/crawler.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/staticizer/crawler.rb b/lib/staticizer/crawler.rb index 28c2f1d..e45e411 100644 --- a/lib/staticizer/crawler.rb +++ b/lib/staticizer/crawler.rb @@ -167,7 +167,11 @@ def save_page_to_aws(response, uri) # Detect a meta-redirect and set an S3 hosting redirect metadata item if response =~ /META http-equiv='refresh' content='0;URL="(.*)"/ - opts[:website_redirect_location] = $1 + location = $1 + if location =~ /^(?:[^\/]|http:\/\/|https\:\/\/).*/ + location.prepend('/') + end + opts[:website_redirect_location] = location end @log.info "Uploading #{key} to s3 with content type #{opts[:content_type]}"