From 4ded0e3c27a8c61d86b1c8b0003268d0558581c9 Mon Sep 17 00:00:00 2001 From: Tobias Kuhn Date: Mon, 1 Jun 2026 20:18:47 +0200 Subject: [PATCH 1/2] fix(env): read environment via System.getenv instead of forking a subprocess MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Utils.getEnvString read the environment via Apache Commons Exec's EnvironmentUtils.getProcEnvironment(), which forks a subprocess (env/sh) and parses its stdout on every call. This method is on the per-nanopub hot path — FeatureFlags.fullRepoEnabled() and the other repo flags are evaluated for every nanopub, from the 4-thread loading pool. The subprocess read is neither thread-safe nor robust under load, so an intermittently mangled result made fullRepoEnabled() evaluate to false and silently skip the full-repo write for individual nanopubs. Because the meta task is deferred until the (submitted) per-repo tasks succeed, but the full task is gated by the flag *before* submission, this leaves the full repo behind meta with no error — and undetectably, since the monitor and the Nanopub-Query-Loaded-Nanopub-Count/-Checksum headers source from meta (issue #117). Switch getEnvString to System.getenv, which reads the JVM's in-memory environment block: thread-safe, allocation-free, deterministic, and it never forks or throws. Also retire the same getProcEnvironment() mechanism in the TripleStore constructor. It is startup-only (not part of the #117 trigger), but a fork failure there throws IOException and leaves the singleton uninitialised; System.getenv removes that failure mode and the last use of the fragile API. ENDPOINT_BASE/ENDPOINT_TYPE are container env vars set at launch, so the read is semantically identical. Remediation note: this stops the gap from growing but does not backfill nanopubs already missing from full — those must be rebuilt via FORCE_RESYNC/FORCE_RELOAD on affected instances. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../knowledgepixels/query/TripleStore.java | 11 +++++++---- .../java/com/knowledgepixels/query/Utils.java | 19 +++++++++++-------- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/main/java/com/knowledgepixels/query/TripleStore.java b/src/main/java/com/knowledgepixels/query/TripleStore.java index f167ba7..7c68a80 100644 --- a/src/main/java/com/knowledgepixels/query/TripleStore.java +++ b/src/main/java/com/knowledgepixels/query/TripleStore.java @@ -1,6 +1,5 @@ package com.knowledgepixels.query; -import org.apache.commons.exec.environment.EnvironmentUtils; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpUriRequest; @@ -79,10 +78,14 @@ public static TripleStore get() { } private TripleStore() throws IOException { - Map env = EnvironmentUtils.getProcEnvironment(); - endpointBase = env.get("ENDPOINT_BASE"); + // Read directly from the JVM's in-memory environment block rather than via + // Apache Commons Exec's EnvironmentUtils.getProcEnvironment(), which forks a + // subprocess and can throw under memory pressure / a restart storm — leaving + // the singleton uninitialised. Same fragile mechanism that caused issue #117 + // on the per-nanopub hot path (see Utils.getEnvString); retired here too. + endpointBase = System.getenv("ENDPOINT_BASE"); log.info("Endpoint base: {}", endpointBase); - endpointType = env.get("ENDPOINT_TYPE"); + endpointType = System.getenv("ENDPOINT_TYPE"); getRepository("empty"); // Make sure empty repo exists } diff --git a/src/main/java/com/knowledgepixels/query/Utils.java b/src/main/java/com/knowledgepixels/query/Utils.java index 5b28daf..97f50b7 100644 --- a/src/main/java/com/knowledgepixels/query/Utils.java +++ b/src/main/java/com/knowledgepixels/query/Utils.java @@ -9,7 +9,6 @@ import java.util.Map; import java.util.Properties; -import org.apache.commons.exec.environment.EnvironmentUtils; import org.apache.commons.lang3.StringUtils; import org.apache.http.client.config.CookieSpecs; import org.apache.http.client.config.RequestConfig; @@ -165,13 +164,17 @@ public static List getObjectsForPattern(RepositoryConnection conn, IRI gr * @return environment variable value */ public static String getEnvString(String envVarName, String defaultValue) { - try { - String s = EnvironmentUtils.getProcEnvironment().get(envVarName); - if (s != null && !s.isEmpty()) { - return s; - } - } catch (Exception ex) { - log.info("Could not get environment variable", ex); + // Read straight from the JVM's in-memory environment block. The previous + // implementation used Apache Commons Exec's EnvironmentUtils.getProcEnvironment(), + // which forks a subprocess (env/sh) and parses its stdout on every call. That is + // neither thread-safe nor robust under load, and since this method is on the + // per-nanopub hot path (FeatureFlags.fullRepoEnabled() and friends, called from + // the 4-thread loading pool) an intermittently mangled read made + // fullRepoEnabled() evaluate to false and silently skip the full-repo write for + // individual nanopubs — leaving full behind meta undetectably (issue #117). + String s = System.getenv(envVarName); + if (s != null && !s.isEmpty()) { + return s; } return defaultValue; } From 7178e3327e6dbe667c6b04eb22b389441b227704 Mon Sep 17 00:00:00 2001 From: Tobias Kuhn Date: Mon, 1 Jun 2026 20:30:20 +0200 Subject: [PATCH 2/2] test(env): mock the getRawEnv seam instead of getProcEnvironment UtilsTest.getEnvString/getEnvInt mocked EnvironmentUtils.getProcEnvironment(), which no longer governs getEnvString. System.getenv can't be mocked directly (Mockito refuses to mock java.lang.System), so extract a package-private getRawEnv(name) seam around System.getenv and stub that via the existing mockStatic(Utils.class, CALLS_REAL_METHODS) pattern. Dropped the obsolete IOException-throw case (System.getenv doesn't throw). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../java/com/knowledgepixels/query/Utils.java | 32 ++++++++++++----- .../com/knowledgepixels/query/UtilsTest.java | 36 ++++++++----------- 2 files changed, 37 insertions(+), 31 deletions(-) diff --git a/src/main/java/com/knowledgepixels/query/Utils.java b/src/main/java/com/knowledgepixels/query/Utils.java index 97f50b7..1781534 100644 --- a/src/main/java/com/knowledgepixels/query/Utils.java +++ b/src/main/java/com/knowledgepixels/query/Utils.java @@ -164,21 +164,35 @@ public static List getObjectsForPattern(RepositoryConnection conn, IRI gr * @return environment variable value */ public static String getEnvString(String envVarName, String defaultValue) { - // Read straight from the JVM's in-memory environment block. The previous - // implementation used Apache Commons Exec's EnvironmentUtils.getProcEnvironment(), - // which forks a subprocess (env/sh) and parses its stdout on every call. That is - // neither thread-safe nor robust under load, and since this method is on the - // per-nanopub hot path (FeatureFlags.fullRepoEnabled() and friends, called from - // the 4-thread loading pool) an intermittently mangled read made - // fullRepoEnabled() evaluate to false and silently skip the full-repo write for - // individual nanopubs — leaving full behind meta undetectably (issue #117). - String s = System.getenv(envVarName); + String s = getRawEnv(envVarName); if (s != null && !s.isEmpty()) { return s; } return defaultValue; } + /** + * Reads a single environment variable from the JVM's in-memory environment + * block. The previous implementation used Apache Commons Exec's + * {@code EnvironmentUtils.getProcEnvironment()}, which forks a subprocess + * ({@code env}/{@code sh}) and parses its stdout on every call. That is neither + * thread-safe nor robust under load, and since {@link #getEnvString} is on the + * per-nanopub hot path ({@link FeatureFlags#fullRepoEnabled()} and friends, + * called from the 4-thread loading pool) an intermittently mangled read made + * {@code fullRepoEnabled()} evaluate to {@code false} and silently skip the + * full-repo write for individual nanopubs — leaving {@code full} behind + * {@code meta} undetectably (issue #117). + * + *

Extracted as a package-private seam because {@link System} static methods + * cannot be mocked directly with Mockito; tests stub this instead. + * + * @param envVarName environment variable name + * @return the raw value, or {@code null} if unset + */ + static String getRawEnv(String envVarName) { + return System.getenv(envVarName); + } + /** * Returns the system environment variable content as an integer for the given environment variable name. * diff --git a/src/test/java/com/knowledgepixels/query/UtilsTest.java b/src/test/java/com/knowledgepixels/query/UtilsTest.java index dfbaf6f..47047e8 100644 --- a/src/test/java/com/knowledgepixels/query/UtilsTest.java +++ b/src/test/java/com/knowledgepixels/query/UtilsTest.java @@ -1,7 +1,6 @@ package com.knowledgepixels.query; import com.github.jsonldjava.shaded.com.google.common.hash.Hashing; -import org.apache.commons.exec.environment.EnvironmentUtils; import org.apache.http.client.config.RequestConfig; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Value; @@ -14,7 +13,6 @@ import org.mockito.Mockito; import org.nanopub.vocabulary.NPA; -import java.io.IOException; import java.lang.reflect.Field; import java.nio.charset.StandardCharsets; import java.util.HashMap; @@ -128,24 +126,20 @@ void getObjectsForPattern() { @Test void getEnvString() { final String defaultValue = "default"; + final String mockValueString = "value"; - Map mockEnv = new HashMap<>(); - String mockValueString = "value"; - mockEnv.put("EXISTING_VAR", mockValueString); - mockEnv.put("EMPTY_VAR", ""); - mockEnv.put("NULL_VAR", null); - - try (MockedStatic mockedEnvUtils = Mockito.mockStatic(EnvironmentUtils.class)) { - mockedEnvUtils.when(EnvironmentUtils::getProcEnvironment).thenReturn(mockEnv); + // getEnvString now reads the env via the Utils.getRawEnv seam (System.getenv + // cannot be mocked directly — see #117). CALLS_REAL_METHODS keeps getEnvString real. + try (MockedStatic mockedUtils = Mockito.mockStatic(Utils.class, Mockito.CALLS_REAL_METHODS)) { + mockedUtils.when(() -> Utils.getRawEnv("EXISTING_VAR")).thenReturn(mockValueString); + mockedUtils.when(() -> Utils.getRawEnv("EMPTY_VAR")).thenReturn(""); + mockedUtils.when(() -> Utils.getRawEnv("NULL_VAR")).thenReturn(null); + mockedUtils.when(() -> Utils.getRawEnv("NON_EXISTING_VAR")).thenReturn(null); assertEquals(mockValueString, Utils.getEnvString("EXISTING_VAR", defaultValue)); assertEquals(defaultValue, Utils.getEnvString("NON_EXISTING_VAR", defaultValue)); assertEquals(defaultValue, Utils.getEnvString("EMPTY_VAR", defaultValue)); assertEquals(defaultValue, Utils.getEnvString("NULL_VAR", defaultValue)); - - mockedEnvUtils.when(EnvironmentUtils::getProcEnvironment).thenThrow(IOException.class); - assertEquals(defaultValue, Utils.getEnvString("EXISTING_VAR", defaultValue)); - } } @@ -156,14 +150,12 @@ void getEnvInt() { final int validInt = Integer.parseInt(validIntValue); final String invalidIntValue = "not_an_int"; - Map mockEnv = new HashMap<>(); - mockEnv.put("VALID_INT", validIntValue); - mockEnv.put("INVALID_INT", invalidIntValue); - mockEnv.put("EMPTY_VAR", ""); - mockEnv.put("NULL_VAR", null); - - try (MockedStatic mockedEnvUtils = Mockito.mockStatic(EnvironmentUtils.class)) { - mockedEnvUtils.when(EnvironmentUtils::getProcEnvironment).thenReturn(mockEnv); + try (MockedStatic mockedUtils = Mockito.mockStatic(Utils.class, Mockito.CALLS_REAL_METHODS)) { + mockedUtils.when(() -> Utils.getRawEnv("VALID_INT")).thenReturn(validIntValue); + mockedUtils.when(() -> Utils.getRawEnv("INVALID_INT")).thenReturn(invalidIntValue); + mockedUtils.when(() -> Utils.getRawEnv("EMPTY_VAR")).thenReturn(""); + mockedUtils.when(() -> Utils.getRawEnv("NULL_VAR")).thenReturn(null); + mockedUtils.when(() -> Utils.getRawEnv("NON_EXISTING_VAR")).thenReturn(null); assertEquals(validInt, Utils.getEnvInt("VALID_INT", defaultValue)); assertEquals(defaultValue, Utils.getEnvInt("NON_EXISTING_VAR", defaultValue));