diff --git a/lib/staticizer/crawler.rb b/lib/staticizer/crawler.rb index 75b1f62..e45e411 100644 --- a/lib/staticizer/crawler.rb +++ b/lib/staticizer/crawler.rb @@ -164,6 +164,16 @@ def save_page_to_aws(response, uri) # Upload this file directly to AWS::S3 opts = {:acl => "public-read"} opts[:content_type] = response['content-type'] rescue "text/html" + + # Detect a meta-redirect and set an S3 hosting redirect metadata item + if response =~ /META http-equiv='refresh' content='0;URL="(.*)"/ + location = $1 + if location =~ /^(?:[^\/]|http:\/\/|https\:\/\/).*/ + location.prepend('/') + end + opts[:website_redirect_location] = location + end + @log.info "Uploading #{key} to s3 with content type #{opts[:content_type]}" if response.respond_to?(:read_body) body = process_body(response.read_body, uri, opts) @@ -197,7 +207,6 @@ def process_success(response, parsed_uri) end # If we hit a redirect we save the redirect as a meta refresh page - # TODO: for AWS S3 hosting we could instead create a redirect? def process_redirect(url, destination_url) body = "You are being redirected to #{destination_url}." save_page(body, url)