Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions man/httrack.1
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ httrack \- offline browser : copy websites to a local directory
[ \fB\-EN, \-\-max\-time[=N]\fR ]
[ \fB\-AN, \-\-max\-rate[=N]\fR ]
[ \fB\-%cN, \-\-connection\-per\-second[=N]\fR ]
[ \fB\-%G, \-\-pause\fR ]
[ \fB\-GN, \-\-max\-pause[=N]\fR ]
[ \fB\-cN, \-\-sockets[=N]\fR ]
[ \fB\-TN, \-\-timeout[=N]\fR ]
Expand Down Expand Up @@ -155,6 +156,8 @@ maximum mirror time in seconds (60=1 minute, 3600=1 hour) (\-\-max\-time[=N])
maximum transfer rate in bytes/seconds (1000=1KB/s max) (\-\-max\-rate[=N])
.IP \-%cN
maximum number of connections/seconds (*%c10) (\-\-connection\-per\-second[=N])
.IP \-%G
random pause of MIN[:MAX] seconds between files (e.g. %G5:10) (\-\-pause <param>)
.IP \-GN
pause transfer if N bytes reached, and wait until lock file is deleted (\-\-max\-pause[=N])
.SS Flow control:
Expand Down
2 changes: 2 additions & 0 deletions src/htsalias.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ const char *hts_optalias[][4] = {
"strip [host/pattern=]key1,key2,... from URLs"},
{"cookies-file", "-%K", "param1",
"load extra cookies from a Netscape cookies.txt"},
{"pause", "-%G", "param1",
"random pause of MIN[:MAX] seconds between files"},
{"generate-errors", "-o", "single", ""},
{"do-not-generate-errors", "-o0", "single", ""},
{"purge-old", "-X", "param", ""},
Expand Down
33 changes: 33 additions & 0 deletions src/htscore.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ Please visit our Website: http://www.httrack.com

#include <fcntl.h>
#include <ctype.h>
#include <stdint.h> /* uint64_t for the pause mixer (already a hard dep via md5.h) */

/* File defs */
#include "htscore.h"
Expand Down Expand Up @@ -3314,6 +3315,21 @@ HTS_INLINE int back_fillmax(struct_back * sback, httrackp * opt,
return -1; /* plus de place */
}

/* Seed-derived: stable within a gap, rerolls per launch; a per-call rand()
would bias the delay toward min_ms (see header). Jitter, not crypto. */
int hts_pause_target_ms(TStamp seed, int min_ms, int max_ms) {
uint64_t z = (uint64_t) seed;

if (max_ms <= min_ms)
return min_ms;
/* SplitMix64 finalizer: scrambles the low-entropy ms timestamp. */
z += 0x9E3779B97F4A7C15ULL;
z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ULL;
z = (z ^ (z >> 27)) * 0x94D049BB133111EBULL;
z ^= z >> 31;
return min_ms + (int) (z % (uint64_t) (max_ms - min_ms + 1));
}

int back_pluggable_sockets_strict(struct_back * sback, httrackp * opt) {
int n = opt->maxsoc - back_nsoc(sback);

Expand All @@ -3334,6 +3350,18 @@ int back_pluggable_sockets_strict(struct_back * sback, httrackp * opt) {
}
}

// #185 randomized inter-file pause: non-blocking, one launch per gap
if (n > 0 && opt->pause_max_ms > 0 && HTS_STAT.last_connect > 0) {
TStamp opTime =
HTS_STAT.last_request ? HTS_STAT.last_request : HTS_STAT.last_connect;
TStamp lap = mtime_local() - opTime;

if (lap < hts_pause_target_ms(opTime, opt->pause_min_ms, opt->pause_max_ms))
n = 0;
else
n = 1;
}

return n;
}

Expand Down Expand Up @@ -3748,6 +3776,11 @@ HTSEXT_API int copy_htsopt(const httrackp * from, httrackp * to) {
if (StringNotEmpty(from->cookies_file))
StringCopyS(to->cookies_file, from->cookies_file);

if (from->pause_max_ms > 0) {
to->pause_min_ms = from->pause_min_ms;
to->pause_max_ms = from->pause_max_ms;
}

if (from->retry > -1)
to->retry = from->retry;

Expand Down
4 changes: 4 additions & 0 deletions src/htscore.h
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,10 @@ int back_pluggable_sockets(struct_back * sback, httrackp * opt);

int back_pluggable_sockets_strict(struct_back * sback, httrackp * opt);

/* Randomized inter-file pause target in [min_ms,max_ms] (#185), derived from a
timestamp seed so it is stable within one gap and rerolls per launch. */
int hts_pause_target_ms(TStamp seed, int min_ms, int max_ms);

/* Schedule more links from the heap into free slots. Returns the number queued,
or <=0 if none could be added (no free slot / paused / stopped). */
int back_fill(struct_back * sback, httrackp * opt, cache_back * cache,
Expand Down
27 changes: 27 additions & 0 deletions src/htscoremain.c
Original file line number Diff line number Diff line change
Expand Up @@ -1994,6 +1994,33 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
StringCopy(opt->cookies_file, argv[na]);
}
break;
case 'G': // pause: randomized inter-file delay MIN[:MAX] seconds
if ((na + 1 >= argc) || (argv[na + 1][0] == '-')) {
HTS_PANIC_PRINTF("Option pause needs a blank space and a "
"delay in seconds (MIN[:MAX])");
printf("Example: --pause 5:10\n");
htsmain_free();
return -1;
} else {
double pmin = 0, pmax = 0;
int nf;

na++;
nf = sscanf(argv[na], "%lf:%lf", &pmin, &pmax);
if (nf < 2)
pmax = pmin; /* a single value means a fixed delay */
/* positive-form bounds: NaN fails every comparison, so this
rejects it before the undefined (int)(NaN*1000) cast */
if (nf < 1 || !(pmin >= 0 && pmax >= pmin && pmax <= 86400)) {
HTS_PANIC_PRINTF("Invalid --pause range (expected "
"MIN[:MAX] seconds, 0<=MIN<=MAX<=86400)");
htsmain_free();
return -1;
}
opt->pause_min_ms = (int) (pmin * 1000.0);
opt->pause_max_ms = (int) (pmax * 1000.0);
}
break;
case 't': /* do not change type (ending) of filenames according to the MIME type */
opt->no_type_change = 1;
if (*(com+1)=='0') { opt->no_type_change = 0; com++; }
Expand Down
1 change: 1 addition & 0 deletions src/htshelp.c
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,7 @@ void help(const char *app, int more) {
infomsg(" EN maximum mirror time in seconds (60=1 minute, 3600=1 hour)");
infomsg(" AN maximum transfer rate in bytes/seconds (1000=1KB/s max)");
infomsg(" %cN maximum number of connections/seconds (*%c10)");
infomsg(" %G random pause of MIN[:MAX] seconds between files (e.g. %G5:10)");
infomsg
(" GN pause transfer if N bytes reached, and wait until lock file is deleted");
infomsg("");
Expand Down
2 changes: 2 additions & 0 deletions src/htslib.c
Original file line number Diff line number Diff line change
Expand Up @@ -6046,6 +6046,8 @@ HTSEXT_API httrackp *hts_create_opt(void) {
StringCopy(opt->footer, HTS_DEFAULT_FOOTER);
StringCopy(opt->strip_query, "");
StringCopy(opt->cookies_file, "");
opt->pause_min_ms = 0;
opt->pause_max_ms = 0;
opt->ftp_proxy = HTS_TRUE;
opt->convert_utf8 = HTS_TRUE;
StringCopy(opt->filelist, "");
Expand Down
2 changes: 2 additions & 0 deletions src/htsopt.h
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,8 @@ struct httrackp {
hts_boolean no_query_dedup; /**< with urlhack, keep query-argument order */
String cookies_file; /**< extra Netscape cookies.txt to preload
(--cookies-file) */
int pause_min_ms; /**< inter-file pause lower bound, ms (0=off, #185) */
int pause_max_ms; /**< inter-file pause upper bound, ms */
};

/* Running statistics for a mirror. */
Expand Down
47 changes: 47 additions & 0 deletions src/htsselftest.c
Original file line number Diff line number Diff line change
Expand Up @@ -912,12 +912,58 @@ static int st_copyopt(httrackp *opt, int argc, char **argv) {
if (strcmp(StringBuff(to->cookies_file), "/tmp/jar.txt") != 0)
err = 1;

/* #185 pause pair: copied when enabled (max>0), the 0 sentinel skips */
from->pause_min_ms = 5000;
from->pause_max_ms = 10000;
to->pause_min_ms = to->pause_max_ms = 0;
copy_htsopt(from, to);
if (to->pause_min_ms != 5000 || to->pause_max_ms != 10000)
err = 1;
from->pause_min_ms = from->pause_max_ms = 0;
copy_htsopt(from, to);
if (to->pause_min_ms != 5000 || to->pause_max_ms != 10000)
err = 1;

hts_free_opt(from);
hts_free_opt(to);
printf("copy-htsopt: %s\n", err ? "FAIL" : "OK");
return err;
}

static int st_pause(httrackp *opt, int argc, char **argv) {
int err = 0, i, seen_low = 0, seen_high = 0;

(void) opt;
(void) argc;
(void) argv;
/* Consecutive-ms seeds (production shape: launch timestamps a few ms apart)
must stay in range and spread, not collapse to a bound -- worst case for a
weak low-bit mixer. */
for (i = 0; i < 10000; i++) {
int t = hts_pause_target_ms((TStamp) (1719500000000LL + i), 5000, 10000);

if (t < 5000 || t > 10000)
err = 1;
seen_low |= (t < 6000);
seen_high |= (t > 9000);
}
if (!seen_low || !seen_high)
err = 1;
if (hts_pause_target_ms(12345, 8000, 8000) != 8000) /* equal bounds = fixed */
err = 1;
/* deterministic: a seed yields the same target even after an intervening call
with another seed (no global PRNG state to perturb it) */
{
int a = hts_pause_target_ms(99, 5000, 10000);

(void) hts_pause_target_ms(54321, 5000, 10000);
if (hts_pause_target_ms(99, 5000, 10000) != a)
err = 1;
}
printf("pause: %s\n", err ? "FAIL" : "OK");
return err;
}

static int st_relative(httrackp *opt, int argc, char **argv) {
char s[HTS_URLMAXSIZE * 2];

Expand Down Expand Up @@ -1264,6 +1310,7 @@ static const struct selftest_entry {
{"strsafe", "[overflow|overflow-buff [str]]", "bounded string-op self-test",
st_strsafe},
{"copyopt", "", "copy_htsopt option-copy self-test", st_copyopt},
{"pause", "", "randomized inter-file pause target self-test", st_pause},
{"relative", "<link> <curr-file>", "relative link between two paths",
st_relative},
{"resolve", "<link> <adr> <fil>", "resolve a link against an origin",
Expand Down
12 changes: 12 additions & 0 deletions tests/01_engine-cmdline.test
Original file line number Diff line number Diff line change
Expand Up @@ -90,4 +90,16 @@ refused "dangling-quote argument not refused cleanly"
run_only "$tmp/q-lone" '"'
refused "lone-quote argument not refused cleanly"

# --pause (#185): valid MIN[:MAX] accepted; malformed, reversed, over-range and
# non-finite values refused cleanly. NaN defeats naive `<`/`>` checks (it
# compares false to everything), so it must not slip through to the int cast.
run "$tmp/pause-ok" --pause 0.2:0.4
accepted "$tmp/pause-ok" "#185: valid --pause range rejected"
run "$tmp/pause-fix" --pause 0.2
accepted "$tmp/pause-fix" "#185: valid fixed --pause rejected"
for bad in nan nan:5 5:nan inf 10:5 99999; do
run "$tmp/pause-bad" --pause "$bad"
refused "#185: invalid --pause '$bad' not refused cleanly"
done

exit 0
15 changes: 15 additions & 0 deletions tests/01_engine-pause.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash
#
# --pause (#185): the inter-file pause target must stay in [min,max] and spread
# across it (a per-call rand() would collapse it toward min). Driven by the
# in-process 'httrack -#test=pause' test. POSIX-portable ($(BASH) is /bin/sh on macOS).

set -eu

# 'run' is an ignored placeholder argument.
out=$(httrack -#test=pause run)

test "$out" = "pause: OK" || {
echo "expected 'pause: OK', got: $out" >&2
exit 1
}
29 changes: 29 additions & 0 deletions tests/28_local-pause.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash
#
# --pause (#185): a fixed inter-file delay must slow a multi-file crawl. Measure
# the same crawl with and without --pause and compare: the harness overhead
# cancels, leaving only the pause. Integer seconds keep it portable (BSD date
# has no %N); a lower bound is not timing-flaky since a pause only adds time.

set -e

: "${top_srcdir:=..}"

run() { # echoes the wall-clock seconds of one crawl
local t0 t1
t0=$(date +%s)
bash "$top_srcdir/tests/local-crawl.sh" --errors 0 \
httrack 'BASEURL/types/index.html' -c1 "$@" >/dev/null 2>&1
t1=$(date +%s)
echo $((t1 - t0))
}

base=$(run)
paused=$(run --pause 0.5)
delta=$((paused - base))

echo "crawl: ${base}s, with --pause 0.5: ${paused}s (delta ${delta}s)"
if [ "$delta" -lt 2 ]; then
echo "FAIL: --pause did not delay the crawl (delta ${delta}s)" >&2
exit 1
fi
4 changes: 3 additions & 1 deletion tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ TESTS = \
01_engine-idna.test \
01_engine-mime.test \
01_engine-parse.test \
01_engine-pause.test \
01_engine-rcfile.test \
01_engine-relative.test \
01_engine-savename.test \
Expand Down Expand Up @@ -73,6 +74,7 @@ TESTS = \
24_local-resume-overlap.test \
25_local-mime-exclude.test \
26_local-strip-query.test \
27_local-cookies-file.test
27_local-cookies-file.test \
28_local-pause.test

CLEANFILES = check-network_sh.cache
Loading