diff --git a/man/httrack.1 b/man/httrack.1
index 6e7cd1ad..3ba9b835 100644
--- a/man/httrack.1
+++ b/man/httrack.1
@@ -24,6 +24,7 @@ httrack \- offline browser : copy websites to a local directory
[ \fB\-EN, \-\-max\-time[=N]\fR ]
[ \fB\-AN, \-\-max\-rate[=N]\fR ]
[ \fB\-%cN, \-\-connection\-per\-second[=N]\fR ]
+[ \fB\-%G, \-\-pause\fR ]
[ \fB\-GN, \-\-max\-pause[=N]\fR ]
[ \fB\-cN, \-\-sockets[=N]\fR ]
[ \fB\-TN, \-\-timeout[=N]\fR ]
@@ -155,6 +156,8 @@ maximum mirror time in seconds (60=1 minute, 3600=1 hour) (\-\-max\-time[=N])
maximum transfer rate in bytes/seconds (1000=1KB/s max) (\-\-max\-rate[=N])
.IP \-%cN
maximum number of connections/seconds (*%c10) (\-\-connection\-per\-second[=N])
+.IP \-%G
+random pause of MIN[:MAX] seconds between files (e.g. %G5:10) (\-\-pause )
.IP \-GN
pause transfer if N bytes reached, and wait until lock file is deleted (\-\-max\-pause[=N])
.SS Flow control:
diff --git a/src/htsalias.c b/src/htsalias.c
index 7359f2f7..7b4811a3 100644
--- a/src/htsalias.c
+++ b/src/htsalias.c
@@ -114,6 +114,8 @@ const char *hts_optalias[][4] = {
"strip [host/pattern=]key1,key2,... from URLs"},
{"cookies-file", "-%K", "param1",
"load extra cookies from a Netscape cookies.txt"},
+ {"pause", "-%G", "param1",
+ "random pause of MIN[:MAX] seconds between files"},
{"generate-errors", "-o", "single", ""},
{"do-not-generate-errors", "-o0", "single", ""},
{"purge-old", "-X", "param", ""},
diff --git a/src/htscore.c b/src/htscore.c
index 0d25d327..c8736363 100644
--- a/src/htscore.c
+++ b/src/htscore.c
@@ -35,6 +35,7 @@ Please visit our Website: http://www.httrack.com
#include
#include
+#include /* uint64_t for the pause mixer (already a hard dep via md5.h) */
/* File defs */
#include "htscore.h"
@@ -3314,6 +3315,21 @@ HTS_INLINE int back_fillmax(struct_back * sback, httrackp * opt,
return -1; /* plus de place */
}
+/* Seed-derived: stable within a gap, rerolls per launch; a per-call rand()
+ would bias the delay toward min_ms (see header). Jitter, not crypto. */
+int hts_pause_target_ms(TStamp seed, int min_ms, int max_ms) {
+ uint64_t z = (uint64_t) seed;
+
+ if (max_ms <= min_ms)
+ return min_ms;
+ /* SplitMix64 finalizer: scrambles the low-entropy ms timestamp. */
+ z += 0x9E3779B97F4A7C15ULL;
+ z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ULL;
+ z = (z ^ (z >> 27)) * 0x94D049BB133111EBULL;
+ z ^= z >> 31;
+ return min_ms + (int) (z % (uint64_t) (max_ms - min_ms + 1));
+}
+
int back_pluggable_sockets_strict(struct_back * sback, httrackp * opt) {
int n = opt->maxsoc - back_nsoc(sback);
@@ -3334,6 +3350,18 @@ int back_pluggable_sockets_strict(struct_back * sback, httrackp * opt) {
}
}
+ // #185 randomized inter-file pause: non-blocking, one launch per gap
+ if (n > 0 && opt->pause_max_ms > 0 && HTS_STAT.last_connect > 0) {
+ TStamp opTime =
+ HTS_STAT.last_request ? HTS_STAT.last_request : HTS_STAT.last_connect;
+ TStamp lap = mtime_local() - opTime;
+
+ if (lap < hts_pause_target_ms(opTime, opt->pause_min_ms, opt->pause_max_ms))
+ n = 0;
+ else
+ n = 1;
+ }
+
return n;
}
@@ -3748,6 +3776,11 @@ HTSEXT_API int copy_htsopt(const httrackp * from, httrackp * to) {
if (StringNotEmpty(from->cookies_file))
StringCopyS(to->cookies_file, from->cookies_file);
+ if (from->pause_max_ms > 0) {
+ to->pause_min_ms = from->pause_min_ms;
+ to->pause_max_ms = from->pause_max_ms;
+ }
+
if (from->retry > -1)
to->retry = from->retry;
diff --git a/src/htscore.h b/src/htscore.h
index 2d59f49f..d6caed43 100644
--- a/src/htscore.h
+++ b/src/htscore.h
@@ -418,6 +418,10 @@ int back_pluggable_sockets(struct_back * sback, httrackp * opt);
int back_pluggable_sockets_strict(struct_back * sback, httrackp * opt);
+/* Randomized inter-file pause target in [min_ms,max_ms] (#185), derived from a
+ timestamp seed so it is stable within one gap and rerolls per launch. */
+int hts_pause_target_ms(TStamp seed, int min_ms, int max_ms);
+
/* Schedule more links from the heap into free slots. Returns the number queued,
or <=0 if none could be added (no free slot / paused / stopped). */
int back_fill(struct_back * sback, httrackp * opt, cache_back * cache,
diff --git a/src/htscoremain.c b/src/htscoremain.c
index 5de6fde1..c4c2d416 100644
--- a/src/htscoremain.c
+++ b/src/htscoremain.c
@@ -1994,6 +1994,33 @@ static int hts_main_internal(int argc, char **argv, httrackp * opt) {
StringCopy(opt->cookies_file, argv[na]);
}
break;
+ case 'G': // pause: randomized inter-file delay MIN[:MAX] seconds
+ if ((na + 1 >= argc) || (argv[na + 1][0] == '-')) {
+ HTS_PANIC_PRINTF("Option pause needs a blank space and a "
+ "delay in seconds (MIN[:MAX])");
+ printf("Example: --pause 5:10\n");
+ htsmain_free();
+ return -1;
+ } else {
+ double pmin = 0, pmax = 0;
+ int nf;
+
+ na++;
+ nf = sscanf(argv[na], "%lf:%lf", &pmin, &pmax);
+ if (nf < 2)
+ pmax = pmin; /* a single value means a fixed delay */
+ /* positive-form bounds: NaN fails every comparison, so this
+ rejects it before the undefined (int)(NaN*1000) cast */
+ if (nf < 1 || !(pmin >= 0 && pmax >= pmin && pmax <= 86400)) {
+ HTS_PANIC_PRINTF("Invalid --pause range (expected "
+ "MIN[:MAX] seconds, 0<=MIN<=MAX<=86400)");
+ htsmain_free();
+ return -1;
+ }
+ opt->pause_min_ms = (int) (pmin * 1000.0);
+ opt->pause_max_ms = (int) (pmax * 1000.0);
+ }
+ break;
case 't': /* do not change type (ending) of filenames according to the MIME type */
opt->no_type_change = 1;
if (*(com+1)=='0') { opt->no_type_change = 0; com++; }
diff --git a/src/htshelp.c b/src/htshelp.c
index 8e17a4b2..6a7ca5e6 100644
--- a/src/htshelp.c
+++ b/src/htshelp.c
@@ -521,6 +521,7 @@ void help(const char *app, int more) {
infomsg(" EN maximum mirror time in seconds (60=1 minute, 3600=1 hour)");
infomsg(" AN maximum transfer rate in bytes/seconds (1000=1KB/s max)");
infomsg(" %cN maximum number of connections/seconds (*%c10)");
+ infomsg(" %G random pause of MIN[:MAX] seconds between files (e.g. %G5:10)");
infomsg
(" GN pause transfer if N bytes reached, and wait until lock file is deleted");
infomsg("");
diff --git a/src/htslib.c b/src/htslib.c
index a0a01e76..4bea4c43 100644
--- a/src/htslib.c
+++ b/src/htslib.c
@@ -6046,6 +6046,8 @@ HTSEXT_API httrackp *hts_create_opt(void) {
StringCopy(opt->footer, HTS_DEFAULT_FOOTER);
StringCopy(opt->strip_query, "");
StringCopy(opt->cookies_file, "");
+ opt->pause_min_ms = 0;
+ opt->pause_max_ms = 0;
opt->ftp_proxy = HTS_TRUE;
opt->convert_utf8 = HTS_TRUE;
StringCopy(opt->filelist, "");
diff --git a/src/htsopt.h b/src/htsopt.h
index 3007b0de..6d39bb63 100644
--- a/src/htsopt.h
+++ b/src/htsopt.h
@@ -537,6 +537,8 @@ struct httrackp {
hts_boolean no_query_dedup; /**< with urlhack, keep query-argument order */
String cookies_file; /**< extra Netscape cookies.txt to preload
(--cookies-file) */
+ int pause_min_ms; /**< inter-file pause lower bound, ms (0=off, #185) */
+ int pause_max_ms; /**< inter-file pause upper bound, ms */
};
/* Running statistics for a mirror. */
diff --git a/src/htsselftest.c b/src/htsselftest.c
index cf833fa6..aca1c96b 100644
--- a/src/htsselftest.c
+++ b/src/htsselftest.c
@@ -912,12 +912,58 @@ static int st_copyopt(httrackp *opt, int argc, char **argv) {
if (strcmp(StringBuff(to->cookies_file), "/tmp/jar.txt") != 0)
err = 1;
+ /* #185 pause pair: copied when enabled (max>0), the 0 sentinel skips */
+ from->pause_min_ms = 5000;
+ from->pause_max_ms = 10000;
+ to->pause_min_ms = to->pause_max_ms = 0;
+ copy_htsopt(from, to);
+ if (to->pause_min_ms != 5000 || to->pause_max_ms != 10000)
+ err = 1;
+ from->pause_min_ms = from->pause_max_ms = 0;
+ copy_htsopt(from, to);
+ if (to->pause_min_ms != 5000 || to->pause_max_ms != 10000)
+ err = 1;
+
hts_free_opt(from);
hts_free_opt(to);
printf("copy-htsopt: %s\n", err ? "FAIL" : "OK");
return err;
}
+static int st_pause(httrackp *opt, int argc, char **argv) {
+ int err = 0, i, seen_low = 0, seen_high = 0;
+
+ (void) opt;
+ (void) argc;
+ (void) argv;
+ /* Consecutive-ms seeds (production shape: launch timestamps a few ms apart)
+ must stay in range and spread, not collapse to a bound -- worst case for a
+ weak low-bit mixer. */
+ for (i = 0; i < 10000; i++) {
+ int t = hts_pause_target_ms((TStamp) (1719500000000LL + i), 5000, 10000);
+
+ if (t < 5000 || t > 10000)
+ err = 1;
+ seen_low |= (t < 6000);
+ seen_high |= (t > 9000);
+ }
+ if (!seen_low || !seen_high)
+ err = 1;
+ if (hts_pause_target_ms(12345, 8000, 8000) != 8000) /* equal bounds = fixed */
+ err = 1;
+ /* deterministic: a seed yields the same target even after an intervening call
+ with another seed (no global PRNG state to perturb it) */
+ {
+ int a = hts_pause_target_ms(99, 5000, 10000);
+
+ (void) hts_pause_target_ms(54321, 5000, 10000);
+ if (hts_pause_target_ms(99, 5000, 10000) != a)
+ err = 1;
+ }
+ printf("pause: %s\n", err ? "FAIL" : "OK");
+ return err;
+}
+
static int st_relative(httrackp *opt, int argc, char **argv) {
char s[HTS_URLMAXSIZE * 2];
@@ -1264,6 +1310,7 @@ static const struct selftest_entry {
{"strsafe", "[overflow|overflow-buff [str]]", "bounded string-op self-test",
st_strsafe},
{"copyopt", "", "copy_htsopt option-copy self-test", st_copyopt},
+ {"pause", "", "randomized inter-file pause target self-test", st_pause},
{"relative", " ", "relative link between two paths",
st_relative},
{"resolve", " ", "resolve a link against an origin",
diff --git a/tests/01_engine-cmdline.test b/tests/01_engine-cmdline.test
index 0a507618..549bd97a 100755
--- a/tests/01_engine-cmdline.test
+++ b/tests/01_engine-cmdline.test
@@ -90,4 +90,16 @@ refused "dangling-quote argument not refused cleanly"
run_only "$tmp/q-lone" '"'
refused "lone-quote argument not refused cleanly"
+# --pause (#185): valid MIN[:MAX] accepted; malformed, reversed, over-range and
+# non-finite values refused cleanly. NaN defeats naive `<`/`>` checks (it
+# compares false to everything), so it must not slip through to the int cast.
+run "$tmp/pause-ok" --pause 0.2:0.4
+accepted "$tmp/pause-ok" "#185: valid --pause range rejected"
+run "$tmp/pause-fix" --pause 0.2
+accepted "$tmp/pause-fix" "#185: valid fixed --pause rejected"
+for bad in nan nan:5 5:nan inf 10:5 99999; do
+ run "$tmp/pause-bad" --pause "$bad"
+ refused "#185: invalid --pause '$bad' not refused cleanly"
+done
+
exit 0
diff --git a/tests/01_engine-pause.test b/tests/01_engine-pause.test
new file mode 100755
index 00000000..4fb8e9be
--- /dev/null
+++ b/tests/01_engine-pause.test
@@ -0,0 +1,15 @@
+#!/bin/bash
+#
+# --pause (#185): the inter-file pause target must stay in [min,max] and spread
+# across it (a per-call rand() would collapse it toward min). Driven by the
+# in-process 'httrack -#test=pause' test. POSIX-portable ($(BASH) is /bin/sh on macOS).
+
+set -eu
+
+# 'run' is an ignored placeholder argument.
+out=$(httrack -#test=pause run)
+
+test "$out" = "pause: OK" || {
+ echo "expected 'pause: OK', got: $out" >&2
+ exit 1
+}
diff --git a/tests/28_local-pause.test b/tests/28_local-pause.test
new file mode 100755
index 00000000..8505a750
--- /dev/null
+++ b/tests/28_local-pause.test
@@ -0,0 +1,29 @@
+#!/bin/bash
+#
+# --pause (#185): a fixed inter-file delay must slow a multi-file crawl. Measure
+# the same crawl with and without --pause and compare: the harness overhead
+# cancels, leaving only the pause. Integer seconds keep it portable (BSD date
+# has no %N); a lower bound is not timing-flaky since a pause only adds time.
+
+set -e
+
+: "${top_srcdir:=..}"
+
+run() { # echoes the wall-clock seconds of one crawl
+ local t0 t1
+ t0=$(date +%s)
+ bash "$top_srcdir/tests/local-crawl.sh" --errors 0 \
+ httrack 'BASEURL/types/index.html' -c1 "$@" >/dev/null 2>&1
+ t1=$(date +%s)
+ echo $((t1 - t0))
+}
+
+base=$(run)
+paused=$(run --pause 0.5)
+delta=$((paused - base))
+
+echo "crawl: ${base}s, with --pause 0.5: ${paused}s (delta ${delta}s)"
+if [ "$delta" -lt 2 ]; then
+ echo "FAIL: --pause did not delay the crawl (delta ${delta}s)" >&2
+ exit 1
+fi
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 20c2ca1e..380c53e9 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -41,6 +41,7 @@ TESTS = \
01_engine-idna.test \
01_engine-mime.test \
01_engine-parse.test \
+ 01_engine-pause.test \
01_engine-rcfile.test \
01_engine-relative.test \
01_engine-savename.test \
@@ -73,6 +74,7 @@ TESTS = \
24_local-resume-overlap.test \
25_local-mime-exclude.test \
26_local-strip-query.test \
- 27_local-cookies-file.test
+ 27_local-cookies-file.test \
+ 28_local-pause.test
CLEANFILES = check-network_sh.cache