Extract inbound tcp and websocket connection flow to separate classes, unify retrying#773
Extract inbound tcp and websocket connection flow to separate classes, unify retrying#773Vlatombe wants to merge 19 commits into
Conversation
A more ambitious attempt to extract logic from Engine into separate classes
| private final RSAPublicKey publicKey; | ||
| private final Map<String, String> headers; | ||
|
|
||
| public EngineJnlpConnectionStateListener(RSAPublicKey publicKey, Map<String, String> headers) { | ||
| this.publicKey = publicKey; | ||
| this.headers = headers; | ||
| } |
There was a problem hiding this comment.
Too bad JnlpConnectionStateListener is an abstract class (for no good reason)
|
Seems OK from a quick glance, though the diff is too long to readily see what is actually being edited. |
And implement records
| private static class NoReconnectException extends RuntimeException {} | ||
|
|
||
| @Test(timeout = 30_000) | ||
| @Test(timeout = 10_000) |
There was a problem hiding this comment.
Shortening timeout since the new retry logic is snappier
| private final RSAPublicKey publicKey; | ||
| private final Map<String, String> headers; | ||
|
|
||
| public EngineJnlpConnectionStateListener(RSAPublicKey publicKey, Map<String, String> headers) { | ||
| this.publicKey = publicKey; | ||
| this.headers = headers; | ||
| } |
There was a problem hiding this comment.
Too bad JnlpConnectionStateListener is an abstract class (for no good reason)
|
|
||
| public Engine( | ||
| EngineListener listener, | ||
| List<URL> hudsonUrls, |
| SSLContext context; | ||
| // prepare our SSLContext | ||
| try { | ||
| context = SSLContext.getInstance("TLS"); | ||
| } catch (NoSuchAlgorithmException e) { | ||
| throw new IllegalStateException("Java runtime specification requires support for TLS algorithm", e); | ||
| } | ||
| char[] password = "password".toCharArray(); | ||
| KeyStore store; | ||
| try { | ||
| store = KeyStore.getInstance(KeyStore.getDefaultType()); | ||
| } catch (KeyStoreException e) { | ||
| throw new IllegalStateException("Java runtime specification requires support for JKS key store", e); | ||
| } | ||
| try { | ||
| store.load(null, password); | ||
| } catch (NoSuchAlgorithmException e) { | ||
| throw new IllegalStateException("Java runtime specification requires support for JKS key store", e); | ||
| } catch (CertificateException e) { | ||
| throw new IllegalStateException("Empty keystore", e); | ||
| } | ||
| KeyManagerFactory kmf; | ||
| try { | ||
| kmf = KeyManagerFactory.getInstance(KeyManagerFactory.getDefaultAlgorithm()); | ||
| } catch (NoSuchAlgorithmException e) { | ||
| throw new IllegalStateException( | ||
| "Java runtime specification requires support for default key manager", e); | ||
| } | ||
| try { | ||
| kmf.init(store, password); | ||
| } catch (KeyStoreException | NoSuchAlgorithmException | UnrecoverableKeyException e) { | ||
| throw new IllegalStateException(e); | ||
| } | ||
| try { | ||
| context.init(kmf.getKeyManagers(), new TrustManager[] {agentTrustManager}, null); | ||
| } catch (KeyManagementException e) { | ||
| events.error(e); | ||
| return; | ||
| } |
| @SuppressFBWarnings( | ||
| value = {"REC_CATCH_EXCEPTION"}, | ||
| justification = "checked exceptions were a mistake to begin with; connecting to Jenkins from agent") | ||
| private void runWebSocket() { |
There was a problem hiding this comment.
Moved to WebSocketConnector
| private static class ExponentialRetry { | ||
| final int factor; | ||
| final Instant beginning; | ||
| final Duration delay; | ||
| final Duration timeout; | ||
| final Duration incrementDelay; | ||
| final Duration maxDelay; | ||
|
|
||
| ExponentialRetry(Duration timeout) { | ||
| this(Duration.ofSeconds(0), timeout, 2, Duration.ofSeconds(1), Duration.ofSeconds(10)); | ||
| } | ||
|
|
||
| ExponentialRetry( | ||
| Duration initialDelay, Duration timeout, int factor, Duration incrementDelay, Duration maxDelay) { | ||
| this.beginning = Instant.now(); | ||
| this.delay = initialDelay; | ||
| this.timeout = timeout; | ||
| this.factor = factor; | ||
| this.incrementDelay = incrementDelay; | ||
| this.maxDelay = maxDelay; | ||
| } | ||
|
|
||
| ExponentialRetry(ExponentialRetry previous) { | ||
| beginning = previous.beginning; | ||
| factor = previous.factor; | ||
| timeout = previous.timeout; | ||
| incrementDelay = previous.incrementDelay; | ||
| maxDelay = previous.maxDelay; | ||
| delay = min(maxDelay, previous.delay.multipliedBy(previous.factor).plus(incrementDelay)); | ||
| } | ||
|
|
||
| private static Duration min(Duration a, Duration b) { | ||
| return a.compareTo(b) < 0 ? a : b; | ||
| } | ||
|
|
||
| boolean timeoutExceeded() { | ||
| return Util.shouldBailOut(beginning, timeout); | ||
| } | ||
|
|
||
| ExponentialRetry next(EngineListenerSplitter events) throws InterruptedException { | ||
| var next = new ExponentialRetry(this); | ||
| if (next.timeoutExceeded()) { | ||
| events.status("Bailing out after " + DurationFormatter.format(next.timeout)); | ||
| return null; | ||
| } else { | ||
| events.status("Waiting " + DurationFormatter.format(next.delay) + " before retry"); | ||
| Thread.sleep(next.delay.toMillis()); | ||
| } | ||
| return next; | ||
| } | ||
| } |
| } | ||
| } | ||
|
|
||
| private void innerRun(IOHub hub, SSLContext context, ExecutorService service) { |
There was a problem hiding this comment.
Moved to InboundTCPConnector
| private static final Logger LOGGER = Logger.getLogger(Engine.class.getName()); | ||
|
|
||
| @SuppressFBWarnings(value = "PATH_TRAVERSAL_IN", justification = "File path is loaded from system properties.") | ||
| static KeyStore getCacertsKeyStore() |
| return this.protocolName; | ||
| } | ||
|
|
||
| private class EngineJnlpConnectionStateListener extends JnlpConnectionStateListener { |
There was a problem hiding this comment.
Moved to InboundTCPConnector
| } | ||
| value = {"HARD_CODE_PASSWORD", "REC_CATCH_EXCEPTION"}, | ||
| justification = "Password doesn't need to be protected.; We need to catch all exceptions") | ||
| public void run() { |
There was a problem hiding this comment.
This method contains the key structural changes.
| var clientProtocols = new JnlpProtocolHandlerFactory(data.executor()) | ||
| .withIOHub(hub) | ||
| .withSSLContext(context) | ||
| .withPreferNonBlockingIO(false) // we only have one connection, prefer blocking I/O | ||
| .handlers(); | ||
| var negotiatedProtocols = clientProtocols.stream() | ||
| .filter(JnlpProtocolHandler::isEnabled) | ||
| .filter(p -> endpoint.isProtocolSupported(p.getName())) | ||
| .collect(Collectors.toSet()); | ||
| var serverProtocols = endpoint.getProtocols() == null ? "?" : String.join(",", endpoint.getProtocols()); | ||
| LOGGER.info(buildDebugProtocolsMessage(serverProtocols, clientProtocols, negotiatedProtocols)); |
There was a problem hiding this comment.
Gives clearer output to users on which protocols are supported on either side of the connection.
|
See this document for a discussion of various retry strategies. Exponential backoff does appear to be a strict improvement over the status quo, as it balances retries against latency when the controller is unavailable for an unknown duration; however, in contrast with jitter-based solutions, it does not resolve contention for the controller. As in #676, testing of various retry strategies in real-world scenarios is needed to evaluate the effectiveness of those strategies. I would be fine with this PR if it was a simple refactoring that did not introduce exponential backoff. I would also be fine with introducing exponential backoff it was confirmed to have a positive impact in a functional test scenario. |
Extracts Inbound TCP and websocket connection flows to separate classes, and unify the retrying logic, so that both Inbound TCP and Websocket use retries, with exponential retries.
Bumped requirement to Java 17, since we are past End-october, and records makes things pretttier.
Testing done
Submitter checklist