diff --git a/src/htsfilters.c b/src/htsfilters.c index 721a3fae..53d7f327 100644 --- a/src/htsfilters.c +++ b/src/htsfilters.c @@ -76,7 +76,8 @@ int fa_strjoker(int type, char **filters, int nfil, const char *nom, LLint * siz } if (size) sz = *size; - if (strjoker(nom, filters[i] + filteroffs, &sz, size_flag)) { // reconnu + /* size unknown (scan time): no size pointer => size tests stay neutral */ + if (strjoker(nom, filters[i] + filteroffs, size ? &sz : NULL, size_flag)) { if (size) if (sz != *size) sizelimit = sz; diff --git a/src/htsselftest.c b/src/htsselftest.c index 5d28f597..c7c0287d 100644 --- a/src/htsselftest.c +++ b/src/htsselftest.c @@ -524,6 +524,30 @@ static int st_filter(httrackp *opt, int argc, char **argv) { return 0; } +/* Size-aware filter verdict via fa_strjoker: a negative means the size + is still unknown (scan time), so a size rule like -*.jpg*[<10] must stay + neutral. */ +static int st_filtersize(httrackp *opt, int argc, char **argv) { + LLint sz; + int size_flag = 0, verdict, known; + + (void) opt; + if (argc < 3) { + fprintf(stderr, "filtersize: needs [filter...]\n"); + return 1; + } + known = (argv[0][0] != '-'); /* "-1"/"-" => size unknown */ + sz = known ? (LLint) strtoll(argv[0], NULL, 10) : -1; + verdict = fa_strjoker(0, &argv[2], argc - 2, argv[1], known ? &sz : NULL, + known ? &size_flag : NULL, NULL); + printf("verdict=%s size_flag=%d\n", + verdict > 0 ? "allowed" + : verdict < 0 ? "forbidden" + : "unknown", + size_flag); + return 0; +} + static int st_simplify(httrackp *opt, int argc, char **argv) { (void) opt; if (argc < 1) { @@ -1038,6 +1062,9 @@ static const struct selftest_entry { } selftests[] = { {"filter", " ", "match a string against a wildcard filter", st_filter}, + {"filtersize", " ...", + "size-aware filter verdict (negative size = unknown/scan time)", + st_filtersize}, {"simplify", "", "collapse ./ and ../ in a path", st_simplify}, {"mime", "", "MIME type for a filename", st_mime}, {"charset", " ", diff --git a/tests/01_engine-filter.test b/tests/01_engine-filter.test index 803290e6..ed259dbe 100755 --- a/tests/01_engine-filter.test +++ b/tests/01_engine-filter.test @@ -71,3 +71,24 @@ nomatch '*[\[\]]' '[' # not matched, despite the docs match '*[\[\]]' ']' # only via the empty class-match + trailing ']' match '*[\[\]]' '[]' # one of {'[','\'} then the trailing ']' nomatch '*[\[\]]' '[]x' + +# Size-based rules (-#test=filtersize ): a negative size +# means the size is still unknown (scan time). A size exclusion must stay neutral +# then, so the file is fetched and only cancelled once its size is known (#143). +fsize() { + local want="$1" + shift + test "$(httrack -O /dev/null -#test=filtersize "$@")" == "$want" || exit 1 +} +fsize 'verdict=allowed size_flag=0' -1 foo.jpg -* '+*.jpg' '-*.jpg*[<10]' # scan time: keep +fsize 'verdict=forbidden size_flag=1' 5 foo.jpg -* '+*.jpg' '-*.jpg*[<10]' # <10KB: cancel +fsize 'verdict=allowed size_flag=1' 20 foo.jpg -* '+*.jpg' '-*.jpg*[<10]' # >=10KB: keep +fsize 'verdict=forbidden size_flag=0' -1 foo.txt -* '+*.jpg' '-*.jpg*[<10]' # not a jpg + +# [name]/[file]/[path] never span '?' mid-string; a trailing query is still +# tolerated by the global '?' rule (same as plain *.aspx), not the class (#144). +nomatch '*[path]/end' 'a?b/end' +nomatch '*[file]end' 'foo?xend' +nomatch '*[name]X' 'abc?X' +match '*[file]' 'foo?x=1' # trailing query: tolerated, as for *.aspx +match '*.aspx' 'page.aspx?y=2'