diff --git a/man/httrack.1 b/man/httrack.1
index 0e193aeb..9615ddb0 100644
--- a/man/httrack.1
+++ b/man/httrack.1
@@ -228,6 +228,8 @@ tolerant requests (accept bogus responses on some servers, but not standard!) (\
update hacks: various hacks to limit re\-transfers when updating (identical size, bogus response..) (\-\-updatehack)
.IP \-%u
url hacks: various hacks to limit duplicate URLs (strip //, www.foo.com==foo.com..) (\-\-urlhack)
+.br
+opt out of one url\-hack part: \-\-keep\-www\-prefix (www.foo.com<>foo.com), \-\-keep\-double\-slashes (//), \-\-keep\-query\-order (?b&a)
.IP \-%A
assume that a type (cgi,asp..) is always linked with a mime type (\-%A php3,cgi=text/html;dat,bin=application/x\-zip) (\-\-assume )
.br
diff --git a/src/htsalias.c b/src/htsalias.c
index 69400089..f5cdc306 100644
--- a/src/htsalias.c
+++ b/src/htsalias.c
@@ -128,6 +128,9 @@ const char *hts_optalias[][4] = {
{"tolerant", "-%B", "single", ""},
{"updatehack", "-%s", "single", ""}, {"sizehack", "-%s", "single", ""},
{"urlhack", "-%u", "single", ""},
+ {"keep-www-prefix", "-%j", "single", ""},
+ {"keep-double-slashes", "-%o", "single", ""},
+ {"keep-query-order", "-%y", "single", ""},
{"user-agent", "-F", "param1", "user-agent identity"},
{"referer", "-%R", "param1", "default referer URL"},
{"from", "-%E", "param1", "from email address"},
diff --git a/src/htscore.h b/src/htscore.h
index 31600727..2d59f49f 100644
--- a/src/htscore.h
+++ b/src/htscore.h
@@ -234,8 +234,10 @@ struct hash_struct {
coucal adrfil;
/* former address+path -> link index (renamed/moved entries) */
coucal former_adrfil;
- /* scratch buffers reused across lookups (not reentrant) */
- int normalized;
+ /* effective urlhack sub-flags: www.==host / // collapse / query-arg sort */
+ hts_boolean norm_host;
+ hts_boolean norm_slash;
+ hts_boolean norm_query;
/* query-strip keys (not owned); set from opt->strip_query at hash_init */
const char *strip_query;
char normfil[HTS_URLMAXSIZE * 2];
@@ -371,6 +373,11 @@ char *next_token(char *p, int flag);
char *fil_normalized_filtered(const char *source, char *dest,
const char *strip);
+/* As fil_normalized_filtered(), but DO_SLASH/DO_QUERY gate the // collapse and
+ the query-argument sort independently (the urlhack sub-flags). */
+char *fil_normalized_filtered_ex(const char *source, char *dest,
+ const char *strip, int do_slash, int do_query);
+
/* For URL ADR/FIL, return (in DEST) the comma keylist to strip from the
'\n'-separated "[pattern=]keys" RULES (patterns matched on host/path via
strjoker, last wins); NULL if none match. Feeds fil_normalized_filtered(). */
diff --git a/src/htscoremain.c b/src/htscoremain.c
index 62153238..eb1c3fe1 100644
--- a/src/htscoremain.c
+++ b/src/htscoremain.c
@@ -1570,6 +1570,30 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
com++;
}
break; // url hack
+ case 'j':
+ opt->no_www_dedup =
+ HTS_TRUE; // --keep-www-prefix: keep www.X != X
+ if (*(com + 1) == '0') {
+ opt->no_www_dedup = HTS_FALSE;
+ com++;
+ }
+ break;
+ case 'o':
+ opt->no_slash_dedup =
+ HTS_TRUE; // --keep-double-slashes: keep //
+ if (*(com + 1) == '0') {
+ opt->no_slash_dedup = HTS_FALSE;
+ com++;
+ }
+ break;
+ case 'y':
+ opt->no_query_dedup =
+ HTS_TRUE; // --keep-query-order: keep ?b&a order
+ if (*(com + 1) == '0') {
+ opt->no_query_dedup = HTS_FALSE;
+ com++;
+ }
+ break;
case 'v':
opt->verbosedisplay = HTS_VERBOSE_FULL;
if (isdigit((unsigned char) *(com + 1))) {
diff --git a/src/htshash.c b/src/htshash.c
index 96506c91..a9917161 100644
--- a/src/htshash.c
+++ b/src/htshash.c
@@ -106,10 +106,10 @@ static coucal_hashkeys key_adrfil_hashes_generic(void *arg,
const lien_url*const lien = (const lien_url*) value;
const char *const adr = !former ? lien->adr : lien->former_adr;
const char *const fil = !former ? lien->fil : lien->former_fil;
- const char *const adr_norm = adr != NULL ?
- ( hash->normalized ? jump_normalized_const(adr)
- : jump_identification_const(adr) )
- : NULL;
+ const char *const adr_norm =
+ adr != NULL ? (hash->norm_host ? jump_normalized_const(adr)
+ : jump_identification_const(adr))
+ : NULL;
// copy address
assertf(adr_norm != NULL);
@@ -123,8 +123,9 @@ static coucal_hashkeys key_adrfil_hashes_generic(void *arg,
const char *const keys = hts_query_strip_keys(hash->strip_query, adr, fil,
keybuf, sizeof(keybuf));
- if (hash->normalized || keys != NULL) {
- fil_normalized_filtered(fil, &hash->normfil[strlen(hash->normfil)], keys);
+ if (hash->norm_slash || hash->norm_query || keys != NULL) {
+ fil_normalized_filtered_ex(fil, &hash->normfil[strlen(hash->normfil)],
+ keys, hash->norm_slash, hash->norm_query);
} else {
strcpy(&hash->normfil[strlen(hash->normfil)], fil);
}
@@ -139,8 +140,7 @@ static int key_adrfil_equals_generic(void *arg,
coucal_key_const a_,
coucal_key_const b_,
const int former) {
- hash_struct *const hash = (hash_struct*) arg;
- const int normalized = hash->normalized;
+ hash_struct *const hash = (hash_struct *) arg;
const lien_url*const a = (const lien_url*) a_;
const lien_url*const b = (const lien_url*) b_;
const char *const a_adr = !former ? a->adr : a->former_adr;
@@ -157,10 +157,10 @@ static int key_adrfil_equals_generic(void *arg,
assertf(b_fil != NULL);
// skip scheme and authentication to the domain (possibly without www.)
- ja = normalized
- ? jump_normalized_const(a_adr) : jump_identification_const(a_adr);
- jb = normalized
- ? jump_normalized_const(b_adr) : jump_identification_const(b_adr);
+ ja = hash->norm_host ? jump_normalized_const(a_adr)
+ : jump_identification_const(a_adr);
+ jb = hash->norm_host ? jump_normalized_const(b_adr)
+ : jump_identification_const(b_adr);
assertf(ja != NULL);
assertf(jb != NULL);
if (strcasecmp(ja, jb) != 0) {
@@ -175,9 +175,12 @@ static int key_adrfil_equals_generic(void *arg,
const char *const keysb =
hts_query_strip_keys(hash->strip_query, b_adr, b_fil, kb, sizeof(kb));
- if (normalized || keysa != NULL || keysb != NULL) {
- fil_normalized_filtered(a_fil, hash->normfil, keysa);
- fil_normalized_filtered(b_fil, hash->normfil2, keysb);
+ if (hash->norm_slash || hash->norm_query || keysa != NULL ||
+ keysb != NULL) {
+ fil_normalized_filtered_ex(a_fil, hash->normfil, keysa, hash->norm_slash,
+ hash->norm_query);
+ fil_normalized_filtered_ex(b_fil, hash->normfil2, keysb, hash->norm_slash,
+ hash->norm_query);
return strcmp(hash->normfil, hash->normfil2) == 0;
} else {
return strcmp(a_fil, b_fil) == 0;
@@ -237,11 +240,14 @@ static int key_former_adrfil_equals(void *arg,
return key_adrfil_equals_generic(arg, a, b, 1);
}
-void hash_init(httrackp *opt, hash_struct * hash, int normalized) {
+void hash_init(httrackp *opt, hash_struct *hash, hts_boolean normalized) {
hash->sav = coucal_new(0);
hash->adrfil = coucal_new(0);
hash->former_adrfil = coucal_new(0);
- hash->normalized = normalized;
+ /* urlhack is the umbrella; per-feature negatives opt out of each part */
+ hash->norm_host = normalized && !opt->no_www_dedup;
+ hash->norm_slash = normalized && !opt->no_slash_dedup;
+ hash->norm_query = normalized && !opt->no_query_dedup;
/* snapshot the query-strip list (not owned; valid for the hash lifetime) */
hash->strip_query =
StringNotEmpty(opt->strip_query) ? StringBuff(opt->strip_query) : NULL;
@@ -300,6 +306,26 @@ void hash_free(hash_struct *hash) {
}
}
+/* Test helper: do the two URLs dedupe to the same key under opt's urlhack
+ flags? Exercises the live hash compare (norm_host/slash/query resolution). */
+hts_boolean hash_url_equals(httrackp *opt, const char *adra, const char *fila,
+ const char *adrb, const char *filb) {
+ hash_struct hash;
+ lien_url la, lb;
+ hts_boolean eq;
+
+ memset(&la, 0, sizeof(la));
+ memset(&lb, 0, sizeof(lb));
+ la.adr = key_duphandler(NULL, adra);
+ la.fil = key_duphandler(NULL, fila);
+ lb.adr = key_duphandler(NULL, adrb);
+ lb.fil = key_duphandler(NULL, filb);
+ hash_init(opt, &hash, opt->urlhack);
+ eq = key_adrfil_equals(&hash, &la, &lb);
+ hash_free(&hash);
+ return eq;
+}
+
// retour: position ou -1 si non trouvé
int hash_read(const hash_struct * hash, const char *nom1, const char *nom2,
hash_struct_type type) {
diff --git a/src/htshash.h b/src/htshash.h
index e6cf0ac5..ffac47f8 100644
--- a/src/htshash.h
+++ b/src/htshash.h
@@ -51,8 +51,12 @@ typedef enum hash_struct_type {
} hash_struct_type;
// tables de hachage
-void hash_init(httrackp *opt, hash_struct *hash, int normalized);
+void hash_init(httrackp *opt, hash_struct *hash, hts_boolean normalized);
void hash_free(hash_struct *hash);
+/* Test helper: HTS_TRUE if the two URLs dedupe together under opt's urlhack
+ flags. */
+hts_boolean hash_url_equals(httrackp *opt, const char *adra, const char *fila,
+ const char *adrb, const char *filb);
int hash_read(const hash_struct * hash, const char *nom1, const char *nom2,
hash_struct_type type);
void hash_write(hash_struct * hash, size_t lpos);
diff --git a/src/htshelp.c b/src/htshelp.c
index e0532c4d..103b53c8 100644
--- a/src/htshelp.c
+++ b/src/htshelp.c
@@ -588,6 +588,9 @@ void help(const char *app, int more) {
(" %s update hacks: various hacks to limit re-transfers when updating (identical size, bogus response..)");
infomsg
(" %u url hacks: various hacks to limit duplicate URLs (strip //, www.foo.com==foo.com..)");
+ infomsg(" opt out of one url-hack part: --keep-www-prefix "
+ "(www.foo.com<>foo.com), --keep-double-slashes (//), "
+ "--keep-query-order (?b&a)");
infomsg
(" %A assume that a type (cgi,asp..) is always linked with a mime type (-%A php3,cgi=text/html;dat,bin=application/x-zip)");
infomsg(" shortcut: '--assume standard' is equivalent to -%A "
diff --git a/src/htslib.c b/src/htslib.c
index 1a88b62f..d7663a0f 100644
--- a/src/htslib.c
+++ b/src/htslib.c
@@ -3610,7 +3610,10 @@ static int sortNormFnc(const void *a_, const void *b_) {
return strcmp(*a + 1, *b + 1);
}
-HTSEXT_API char *fil_normalized(const char *source, char *dest) {
+/* Path normalizer core: optionally collapse redundant '//' (DO_SLASH) and/or
+ sort query arguments (DO_QUERY) so equivalent URLs dedupe. */
+static char *fil_normalized_ex(const char *source, char *dest, int do_slash,
+ int do_query) {
char lastc = 0;
int gotquery = 0;
int ampargs = 0;
@@ -3620,8 +3623,8 @@ HTSEXT_API char *fil_normalized(const char *source, char *dest) {
for(i = j = 0; source[i] != '\0'; i++) {
if (!gotquery && source[i] == '?')
gotquery = ampargs = 1;
- if ((!gotquery && lastc == '/' && source[i] == '/') // foo//bar -> foo/bar
- ) {
+ if (do_slash && !gotquery && lastc == '/' && source[i] == '/') {
+ // foo//bar -> foo/bar
} else {
if (gotquery && source[i] == '&') {
ampargs++;
@@ -3633,7 +3636,7 @@ HTSEXT_API char *fil_normalized(const char *source, char *dest) {
dest[j++] = '\0';
/* Sort arguments (&foo=1&bar=2 == &bar=2&foo=1) */
- if (ampargs > 1) {
+ if (do_query && ampargs > 1) {
char **amps = malloct(ampargs * sizeof(char *));
char *copyBuff = NULL;
size_t qLen = 0;
@@ -3681,6 +3684,10 @@ HTSEXT_API char *fil_normalized(const char *source, char *dest) {
return dest;
}
+HTSEXT_API char *fil_normalized(const char *source, char *dest) {
+ return fil_normalized_ex(source, dest, 1, 1);
+}
+
/* Is query key ARG[0..keylen) in the comma-separated STRIP list? "*" = all;
case-sensitive, space-trimmed tokens. */
static int hts_query_key_stripped(const char *arg, size_t keylen,
@@ -3711,8 +3718,9 @@ static int hts_query_key_stripped(const char *arg, size_t keylen,
}
/* see htscore.h */
-char *fil_normalized_filtered(const char *source, char *dest,
- const char *strip) {
+char *fil_normalized_filtered_ex(const char *source, char *dest,
+ const char *strip, int do_slash,
+ int do_query) {
const char *query;
char BIGSTK tmp[HTS_URLMAXSIZE * 2];
htsbuff cb;
@@ -3721,7 +3729,7 @@ char *fil_normalized_filtered(const char *source, char *dest,
/* No strip list, or no query: plain normalization. */
if (strip == NULL || *strip == '\0' ||
(query = strchr(source, '?')) == NULL) {
- return fil_normalized(source, dest);
+ return fil_normalized_ex(source, dest, do_slash, do_query);
}
/* Copy the path, re-emit kept query args, let fil_normalized() sort. Walk
@@ -3750,7 +3758,13 @@ char *fil_normalized_filtered(const char *source, char *dest,
break;
query++;
}
- return fil_normalized(tmp, dest);
+ return fil_normalized_ex(tmp, dest, do_slash, do_query);
+}
+
+/* see htscore.h */
+char *fil_normalized_filtered(const char *source, char *dest,
+ const char *strip) {
+ return fil_normalized_filtered_ex(source, dest, strip, 1, 1);
}
/* see htscore.h */
@@ -6026,6 +6040,9 @@ HTSEXT_API httrackp *hts_create_opt(void) {
opt->verbosedisplay = HTS_VERBOSE_NONE; // no text animation
opt->sizehack = HTS_FALSE;
opt->urlhack = HTS_TRUE;
+ opt->no_www_dedup = HTS_FALSE;
+ opt->no_slash_dedup = HTS_FALSE;
+ opt->no_query_dedup = HTS_FALSE;
StringCopy(opt->footer, HTS_DEFAULT_FOOTER);
StringCopy(opt->strip_query, "");
opt->ftp_proxy = HTS_TRUE;
diff --git a/src/htsname.c b/src/htsname.c
index c6ee007e..c2e2bb5a 100644
--- a/src/htsname.c
+++ b/src/htsname.c
@@ -237,9 +237,13 @@ int url_savename(lien_adrfilsave *const afs,
// www-42.foo.com -> foo.com
// foo.com/bar//foobar -> foo.com/bar/foobar
if (opt->urlhack) {
- // copy of adr (without protocol), used for lookups (see urlhack)
- normadr = adr_normalized_sized(adr, normadr_, sizeof(normadr_));
- normfil = fil_normalized_filtered(fil_complete, normfil_, strip);
+ // dedup-lookup key; honor the per-feature negatives like htshash.c so
+ // distinct URLs keep distinct savenames (else keep normadr = adr)
+ if (!opt->no_www_dedup)
+ normadr = adr_normalized_sized(adr, normadr_, sizeof(normadr_));
+ normfil =
+ fil_normalized_filtered_ex(fil_complete, normfil_, strip,
+ !opt->no_slash_dedup, !opt->no_query_dedup);
} else {
if (link_has_authority(adr_complete)) { // https or other protocols : in "http/" subfolder
char *pos = strchr(adr_complete, ':');
@@ -252,9 +256,11 @@ int url_savename(lien_adrfilsave *const afs,
normadr = normadr_;
}
}
- // strip still applies with urlhack off (host left untouched)
+ // strip still applies with urlhack off (host left untouched); no // or
+ // query-sort here, to match the hash key (norm_slash/norm_query are 0 when
+ // urlhack is off) so a URL is looked up under the key it was stored with
if (strip != NULL)
- normfil = fil_normalized_filtered(fil_complete, normfil_, strip);
+ normfil = fil_normalized_filtered_ex(fil_complete, normfil_, strip, 0, 0);
}
// à afficher sans ftp://
diff --git a/src/htsopt.h b/src/htsopt.h
index eabd070a..4dccd17f 100644
--- a/src/htsopt.h
+++ b/src/htsopt.h
@@ -531,6 +531,10 @@ struct httrackp {
htsoptstate state; /**< embedded live engine state */
String strip_query; /**< query keys to drop when deduping URLs (-strip-query);
appended at the tail to keep field offsets stable */
+ hts_boolean
+ no_www_dedup; /**< with urlhack, keep www.host distinct from host */
+ hts_boolean no_slash_dedup; /**< with urlhack, keep redundant // in paths */
+ hts_boolean no_query_dedup; /**< with urlhack, keep query-argument order */
};
/* Running statistics for a mirror. */
diff --git a/src/htsparse.c b/src/htsparse.c
index 36decc13..4ec35245 100644
--- a/src/htsparse.c
+++ b/src/htsparse.c
@@ -3602,16 +3602,28 @@ int hts_mirror_check_moved(htsmoduleStruct * str,
ident_url_relatif(mov_url, urladr(), urlfil(), moved)) >= 0) {
int set_prio_to = 0; // pas de priotité fixéd par wizard
- // check whether URLHack is harmless or not
- if (opt->urlhack) {
+ // check whether URLHack is harmless or not (per the effective
+ // sub-flags)
+ if (opt->urlhack && (!opt->no_www_dedup || !opt->no_slash_dedup ||
+ !opt->no_query_dedup)) {
+ const int norm_host = !opt->no_www_dedup;
+ const int norm_slash = !opt->no_slash_dedup;
+ const int norm_query = !opt->no_query_dedup;
char BIGSTK n_adr[HTS_URLMAXSIZE * 2], n_fil[HTS_URLMAXSIZE * 2];
char BIGSTK pn_adr[HTS_URLMAXSIZE * 2], pn_fil[HTS_URLMAXSIZE * 2];
- n_adr[0] = n_fil[0] = '\0';
- (void) adr_normalized_sized(moved->adr, n_adr, sizeof(n_adr));
- (void) fil_normalized(moved->fil, n_fil);
- (void) adr_normalized_sized(urladr(), pn_adr, sizeof(pn_adr));
- (void) fil_normalized(urlfil(), pn_fil);
+ strlcpybuff(n_adr,
+ norm_host ? jump_normalized_const(moved->adr)
+ : jump_identification_const(moved->adr),
+ sizeof(n_adr));
+ strlcpybuff(pn_adr,
+ norm_host ? jump_normalized_const(urladr())
+ : jump_identification_const(urladr()),
+ sizeof(pn_adr));
+ fil_normalized_filtered_ex(moved->fil, n_fil, NULL, norm_slash,
+ norm_query);
+ fil_normalized_filtered_ex(urlfil(), pn_fil, NULL, norm_slash,
+ norm_query);
if (strcasecmp(n_adr, pn_adr) == 0
&& strcasecmp(n_fil, pn_fil) == 0) {
hts_log_print(opt, LOG_WARNING,
diff --git a/src/htsselftest.c b/src/htsselftest.c
index 8c194c9e..95bc723f 100644
--- a/src/htsselftest.c
+++ b/src/htsselftest.c
@@ -1172,6 +1172,53 @@ static int st_stripquery(httrackp *opt, int argc, char **argv) {
return 0;
}
+/* -%u url-hack split (#271): each sub-flag must toggle independently. */
+static int st_urlhack(httrackp *opt, int argc, char **argv) {
+ (void) argc;
+ (void) argv;
+#define EQ(aa, fa, ab, fb) hash_url_equals(opt, aa, fa, ab, fb)
+ /* urlhack on, no opt-outs: www, // and query order all collapse */
+ opt->urlhack = HTS_TRUE;
+ opt->no_www_dedup = opt->no_slash_dedup = opt->no_query_dedup = HTS_FALSE;
+ assertf(EQ("www.foo.com", "/a", "foo.com", "/a"));
+ assertf(EQ("foo.com", "/a//b", "foo.com", "/a/b"));
+ assertf(EQ("foo.com", "/p?b=2&a=1", "foo.com", "/p?a=1&b=2"));
+
+ /* keep-www-prefix: host off; // and query still collapse */
+ opt->no_www_dedup = HTS_TRUE;
+ assertf(!EQ("www.foo.com", "/a", "foo.com", "/a"));
+ assertf(EQ("foo.com", "/a//b", "foo.com", "/a/b"));
+ assertf(EQ("foo.com", "/p?b=2&a=1", "foo.com", "/p?a=1&b=2"));
+ opt->no_www_dedup = HTS_FALSE;
+
+ /* keep-double-slashes: // significant; www, query order still collapse */
+ opt->no_slash_dedup = HTS_TRUE;
+ assertf(!EQ("foo.com", "/a//b", "foo.com", "/a/b"));
+ assertf(EQ("www.foo.com", "/a", "foo.com", "/a"));
+ assertf(EQ("foo.com", "/p?b=2&a=1", "foo.com", "/p?a=1&b=2"));
+ opt->no_slash_dedup = HTS_FALSE;
+
+ /* keep-query-order: query order significant; www and // still collapse */
+ opt->no_query_dedup = HTS_TRUE;
+ assertf(!EQ("foo.com", "/p?b=2&a=1", "foo.com", "/p?a=1&b=2"));
+ assertf(EQ("www.foo.com", "/a", "foo.com", "/a"));
+ assertf(EQ("foo.com", "/a//b", "foo.com", "/a/b"));
+ opt->no_query_dedup = HTS_FALSE;
+
+ /* all opt-outs == urlhack off entirely */
+ opt->no_www_dedup = opt->no_slash_dedup = opt->no_query_dedup = HTS_TRUE;
+ assertf(!EQ("www.foo.com", "/a", "foo.com", "/a"));
+ assertf(!EQ("foo.com", "/a//b", "foo.com", "/a/b"));
+ assertf(!EQ("foo.com", "/p?b=2&a=1", "foo.com", "/p?a=1&b=2"));
+ opt->urlhack = HTS_FALSE;
+ opt->no_www_dedup = opt->no_slash_dedup = opt->no_query_dedup = HTS_FALSE;
+ assertf(!EQ("www.foo.com", "/a", "foo.com", "/a"));
+ assertf(!EQ("foo.com", "/a//b", "foo.com", "/a/b"));
+#undef EQ
+ printf("urlhack self-test OK\n");
+ return 0;
+}
+
/* ------------------------------------------------------------ */
/* Registry: name -> handler, with a usage hint and a one-line description. */
/* ------------------------------------------------------------ */
@@ -1190,6 +1237,8 @@ static const struct selftest_entry {
{"simplify", "", "collapse ./ and ../ in a path", st_simplify},
{"stripquery", "", "--strip-query pattern/key stripping self-test",
st_stripquery},
+ {"urlhack", "", "-%u url-hack sub-flag (www/slash/query) self-test",
+ st_urlhack},
{"mime", "", "MIME type for a filename", st_mime},
{"charset", " ",
"convert a string to UTF-8 from a charset", st_charset},
diff --git a/tests/01_engine-urlhack.test b/tests/01_engine-urlhack.test
new file mode 100644
index 00000000..3050f415
--- /dev/null
+++ b/tests/01_engine-urlhack.test
@@ -0,0 +1,8 @@
+#!/bin/bash
+#
+
+set -euo pipefail
+
+# -%u url-hack split (#271): www / // / query-order dedup toggle independently.
+# All assertions live in the engine self-test (hash compare flag resolution).
+httrack -O /dev/null -#test=urlhack run | grep -q "urlhack self-test OK"
diff --git a/tests/26_local-strip-query.test b/tests/26_local-strip-query.test
index 02a8eb3e..7bdf4542 100755
--- a/tests/26_local-strip-query.test
+++ b/tests/26_local-strip-query.test
@@ -16,3 +16,8 @@ bash "$top_srcdir/tests/local-crawl.sh" --errors 0 --files 2 \
# control: no stripping -> both query-named variants are saved
bash "$top_srcdir/tests/local-crawl.sh" --errors 0 --files 3 \
httrack 'BASEURL/stripquery/index.html'
+
+# strip still applies with url-hack off (-%u0): exercises the urlhack-off
+# savename branch, which must normalize the dedup key the same way the hash does
+bash "$top_srcdir/tests/local-crawl.sh" --errors 0 --files 2 \
+ httrack 'BASEURL/stripquery/index.html' -%u0 --strip-query 'utm_source'
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 9d7e265c..1fb61608 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -48,6 +48,7 @@ TESTS = \
01_engine-simplify.test \
01_engine-stripquery.test \
01_engine-strsafe.test \
+ 01_engine-urlhack.test \
02_manpage-regen.test \
02_update-cache.test \
10_crawl-simple.test \