Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/htsparse.c
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,14 @@ static HTS_INLINE char html_prevc(const char *html, const char *start) {
return html > start ? html[-1] : ' ';
}

/* Drop a redirect Location's #fragment: a UA anchor, never part of the fetched
* resource (#204). */
static void url_drop_fragment(char *const url) {
char *const frag = strchr(url, '#');
if (frag != NULL)
*frag = '\0';
}

/* True if [s, s+len) is exactly an HTTP method token (XHR.open's first
argument is a method, not a URL: #218). Case-insensitive. */
static int is_http_method(const char *s, size_t len) {
Expand Down Expand Up @@ -3596,6 +3604,7 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
//

strcpybuff(mov_url, r->location);
url_drop_fragment(mov_url);

// url qque -> adresse+fichier
if ((reponse =
Expand Down Expand Up @@ -4803,6 +4812,7 @@ int hts_wait_delayed(htsmoduleStruct * str, lien_adrfilsave *afs,

mov_url[0] = '\0';
strcpybuff(mov_url, back[b].r.location); // copier URL
url_drop_fragment(mov_url);

/* Remove (temporarily created) file if it was created */
UNLINK(fconv(OPT_GET_BUFF(opt), OPT_GET_BUFF_SIZE(opt), back[b].url_sav));
Expand Down
11 changes: 11 additions & 0 deletions tests/29_local-redirect-fragment.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash
# Issue #204: a 302 Location with a #fragment must drop the fragment before the
# target is fetched. The server is strict (400 on a '#' in the request-target),
# so a leaked fragment logs an error and the target is never saved.
set -e

: "${top_srcdir:=..}"

bash "$top_srcdir/tests/local-crawl.sh" --errors 0 \
--found 'redir/target.html' \
httrack 'BASEURL/redir/index.html'
3 changes: 2 additions & 1 deletion tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ TESTS = \
25_local-mime-exclude.test \
26_local-strip-query.test \
27_local-cookies-file.test \
28_local-pause.test
28_local-pause.test \
29_local-redirect-fragment.test

CLEANFILES = check-network_sh.cache
32 changes: 32 additions & 0 deletions tests/local-server.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,21 @@ def route_size_oversize(self):
if self.command != "HEAD":
self.wfile.write(body)

# 302 whose Location carries a #fragment (#204): the fragment is a UA anchor
# that must be dropped before the target is fetched. A leaked '#' reaches the
# strict-server guard below and 400s.
def route_redir_index(self):
self.send_html('\t<a href="go.php">go</a>')

def route_redir_go(self):
self.send_response(302, "Found")
self.send_header("Location", "target.html#section")
self.send_header("Content-Length", "0")
self.end_headers()

def route_redir_target(self):
self.send_raw(b"<html><body>redirect target</body></html>\n", "text/html")

ROUTES = {
"/cookies/entrance.php": route_entrance,
"/cookies/second.php": route_second,
Expand Down Expand Up @@ -391,10 +406,23 @@ def route_size_oversize(self):
"/mimex/index.html": route_mimex_index,
"/mimex/blob.pdf": route_mimex_blob,
"/mimex/real.html": route_mimex_real,
"/redir/index.html": route_redir_index,
"/redir/go.php": route_redir_go,
"/redir/target.html": route_redir_target,
}

# --- dispatch ----------------------------------------------------------

def reject_fragment(self):
# Strict server: a '#' in the request-target is the client failing to
# drop a fragment (#204). RFC 3986 forbids it on the wire; answer 400.
if "#" in self.path:
self.send_response(400, "Bad Request")
self.send_header("Content-Length", "0")
self.end_headers()
return True
return False

def dispatch(self):
self._set_cookies = []
path = urlsplit(self.path).path
Expand All @@ -406,10 +434,14 @@ def dispatch(self):
return False

def do_GET(self):
if self.reject_fragment():
return
if not self.dispatch():
super().do_GET()

def do_HEAD(self):
if self.reject_fragment():
return
if not self.dispatch():
super().do_HEAD()

Expand Down
Loading