Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/htsfilters.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ int fa_strjoker(int type, char **filters, int nfil, const char *nom, LLint * siz
}
if (size)
sz = *size;
if (strjoker(nom, filters[i] + filteroffs, &sz, size_flag)) { // reconnu
/* size unknown (scan time): no size pointer => size tests stay neutral */
if (strjoker(nom, filters[i] + filteroffs, size ? &sz : NULL, size_flag)) {
if (size)
if (sz != *size)
sizelimit = sz;
Expand Down
27 changes: 27 additions & 0 deletions src/htsselftest.c
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,30 @@ static int st_filter(httrackp *opt, int argc, char **argv) {
return 0;
}

/* Size-aware filter verdict via fa_strjoker: a negative <size> means the size
is still unknown (scan time), so a size rule like -*.jpg*[<10] must stay
neutral. */
static int st_filtersize(httrackp *opt, int argc, char **argv) {
LLint sz;
int size_flag = 0, verdict, known;

(void) opt;
if (argc < 3) {
fprintf(stderr, "filtersize: needs <size> <string> <filter> [filter...]\n");
return 1;
}
known = (argv[0][0] != '-'); /* "-1"/"-" => size unknown */
sz = known ? (LLint) strtoll(argv[0], NULL, 10) : -1;
verdict = fa_strjoker(0, &argv[2], argc - 2, argv[1], known ? &sz : NULL,
known ? &size_flag : NULL, NULL);
printf("verdict=%s size_flag=%d\n",
verdict > 0 ? "allowed"
: verdict < 0 ? "forbidden"
: "unknown",
size_flag);
return 0;
}

static int st_simplify(httrackp *opt, int argc, char **argv) {
(void) opt;
if (argc < 1) {
Expand Down Expand Up @@ -1038,6 +1062,9 @@ static const struct selftest_entry {
} selftests[] = {
{"filter", "<pattern> <string>", "match a string against a wildcard filter",
st_filter},
{"filtersize", "<size> <string> <filter>...",
"size-aware filter verdict (negative size = unknown/scan time)",
st_filtersize},
{"simplify", "<path>", "collapse ./ and ../ in a path", st_simplify},
{"mime", "<filename>", "MIME type for a filename", st_mime},
{"charset", "<charset> <string>",
Expand Down
21 changes: 21 additions & 0 deletions tests/01_engine-filter.test
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,24 @@ nomatch '*[\[\]]' '[' # not matched, despite the docs
match '*[\[\]]' ']' # only via the empty class-match + trailing ']'
match '*[\[\]]' '[]' # one of {'[','\'} then the trailing ']'
nomatch '*[\[\]]' '[]x'

# Size-based rules (-#test=filtersize <size> <string> <filter...>): a negative size
# means the size is still unknown (scan time). A size exclusion must stay neutral
# then, so the file is fetched and only cancelled once its size is known (#143).
fsize() {
local want="$1"
shift
test "$(httrack -O /dev/null -#test=filtersize "$@")" == "$want" || exit 1
}
fsize 'verdict=allowed size_flag=0' -1 foo.jpg -* '+*.jpg' '-*.jpg*[<10]' # scan time: keep
fsize 'verdict=forbidden size_flag=1' 5 foo.jpg -* '+*.jpg' '-*.jpg*[<10]' # <10KB: cancel
fsize 'verdict=allowed size_flag=1' 20 foo.jpg -* '+*.jpg' '-*.jpg*[<10]' # >=10KB: keep
fsize 'verdict=forbidden size_flag=0' -1 foo.txt -* '+*.jpg' '-*.jpg*[<10]' # not a jpg

# [name]/[file]/[path] never span '?' mid-string; a trailing query is still
# tolerated by the global '?' rule (same as plain *.aspx), not the class (#144).
nomatch '*[path]/end' 'a?b/end'
nomatch '*[file]end' 'foo?xend'
nomatch '*[name]X' 'abc?X'
match '*[file]' 'foo?x=1' # trailing query: tolerated, as for *.aspx
match '*.aspx' 'page.aspx?y=2'
Loading