Skip to content

Commit 4dd2e93

Browse files
committed
addressed pr comments
1 parent dd6fef1 commit 4dd2e93

10 files changed

Lines changed: 885 additions & 70 deletions

File tree

azure-blob-payloads/src/main/java/com/microsoft/durabletask/azureblobpayloads/BlobPayloadStore.java

Lines changed: 59 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import com.azure.storage.blob.BlobServiceClientBuilder;
1010
import com.azure.storage.blob.models.BlobDownloadResponse;
1111
import com.azure.storage.blob.models.BlobHttpHeaders;
12+
import com.azure.storage.blob.models.BlobRequestConditions;
1213
import com.azure.storage.blob.models.BlobStorageException;
1314
import com.azure.storage.common.policy.RequestRetryOptions;
1415
import com.azure.storage.common.policy.RetryPolicyType;
@@ -20,6 +21,7 @@
2021
import java.nio.charset.StandardCharsets;
2122
import java.util.UUID;
2223
import java.util.concurrent.atomic.AtomicBoolean;
24+
import java.util.regex.Pattern;
2325
import java.util.zip.GZIPInputStream;
2426
import java.util.zip.GZIPOutputStream;
2527

@@ -34,6 +36,13 @@ public final class BlobPayloadStore extends PayloadStore {
3436
static final String TOKEN_PREFIX = "blob:v1:";
3537
private static final String CONTENT_ENCODING_GZIP = "gzip";
3638

39+
// Blob name is UUID.randomUUID().toString().replace("-", ""): exactly 32 lowercase hex chars.
40+
// Container name follows Azure rules: 3-63 chars, lowercase alphanumerics and single hyphens,
41+
// must start and end with alphanumeric (see isValidContainerName).
42+
// Full token grammar: blob:v1:<container>:<32-lowercase-hex>
43+
private static final Pattern TOKEN_PATTERN = Pattern.compile(
44+
"^blob:v1:[a-z0-9](?:[a-z0-9]|-(?=[a-z0-9])){1,61}[a-z0-9]:[0-9a-f]{32}$");
45+
3746
private final BlobContainerClient containerClient;
3847
private final LargePayloadStorageOptions options;
3948
private final AtomicBoolean containerVerified = new AtomicBoolean(false);
@@ -106,22 +115,33 @@ public String upload(String payload) {
106115

107116
byte[] payloadBytes = payload.getBytes(StandardCharsets.UTF_8);
108117

109-
// Ensure container exists (idempotent) — skip after first successful check
110-
if (!this.containerVerified.get()) {
118+
// Ensure container exists (idempotent) — skip after first successful check.
119+
// compareAndSet lets only one concurrent caller perform the RPC; others skip.
120+
// On failure we reset the flag so a later call can retry.
121+
if (this.containerVerified.compareAndSet(false, true)) {
111122
try {
112123
this.containerClient.createIfNotExists();
113-
this.containerVerified.set(true);
114124
} catch (BlobStorageException e) {
115-
// 409 Conflict means it already exists — safe to ignore
125+
// 409 Conflict means it already exists — safe to ignore, leave flag set.
116126
if (e.getStatusCode() != 409) {
127+
this.containerVerified.set(false); // allow a future upload to retry
117128
throw new PayloadStorageException(
118129
"Failed to create blob container '" + this.containerClient.getBlobContainerName() + "'.", e);
119130
}
120-
this.containerVerified.set(true);
131+
} catch (RuntimeException e) {
132+
// Any other transport/SDK failure: also allow retry on next upload.
133+
this.containerVerified.set(false);
134+
throw e;
121135
}
122136
}
123137

124138
try {
139+
// Defense-in-depth: require the blob to not already exist (If-None-Match: *).
140+
// Blob names are random UUIDs so collisions are astronomically unlikely, but this
141+
// guards against future regressions (e.g. a caller-supplied PayloadStore that
142+
// generates deterministic names or a refactor that reuses names) by failing loudly
143+
// instead of silently overwriting someone else's payload.
144+
BlobRequestConditions conditions = new BlobRequestConditions().setIfNoneMatch("*");
125145
if (this.options.isCompressionEnabled()) {
126146
ByteArrayOutputStream compressedBuffer = new ByteArrayOutputStream();
127147
try (GZIPOutputStream gzip = new GZIPOutputStream(compressedBuffer)) {
@@ -133,19 +153,39 @@ public String upload(String payload) {
133153
blob.uploadWithResponse(
134154
stream,
135155
compressedBytes.length,
136-
null, // parallelTransferOptions
156+
null, // parallelTransferOptions
137157
headers,
138-
null, // metadata
139-
null, // tier
140-
null, // requestConditions
141-
null, // timeout
158+
null, // metadata
159+
null, // tier
160+
conditions, // requestConditions
161+
null, // timeout
142162
Context.NONE);
143163
}
144164
} else {
145165
try (InputStream stream = new ByteArrayInputStream(payloadBytes)) {
146-
blob.upload(stream, payloadBytes.length, true);
166+
blob.uploadWithResponse(
167+
stream,
168+
payloadBytes.length,
169+
null, // parallelTransferOptions
170+
null, // headers
171+
null, // metadata
172+
null, // tier
173+
conditions, // requestConditions
174+
null, // timeout
175+
Context.NONE);
147176
}
148177
}
178+
} catch (BlobStorageException e) {
179+
// 409 BlobAlreadyExists and 412 ConditionNotMet (from If-None-Match: *) both indicate
180+
// a name collision on upload — treat as a hard failure rather than silently overwriting.
181+
if (e.getStatusCode() == 409 || e.getStatusCode() == 412) {
182+
throw new PayloadStorageException(
183+
"Payload blob '" + blobName + "' already exists in container '"
184+
+ this.containerClient.getBlobContainerName()
185+
+ "'. Refusing to overwrite. This should not happen with random UUID blob names "
186+
+ "and likely indicates a bug in a custom PayloadStore implementation.", e);
187+
}
188+
throw new PayloadStorageException("Failed to upload payload blob '" + blobName + "'.", e);
149189
} catch (IOException e) {
150190
throw new PayloadStorageException("Failed to upload payload blob '" + blobName + "'.", e);
151191
}
@@ -213,7 +253,14 @@ public boolean isKnownPayloadToken(String value) {
213253
if (value == null || value.isEmpty()) {
214254
return false;
215255
}
216-
return value.startsWith(TOKEN_PREFIX);
256+
// Validate the full token grammar (prefix + container + blob name), not just the
257+
// prefix, so arbitrary user strings that happen to start with "blob:v1:" are not
258+
// treated as tokens. This avoids spurious blob GETs (DoS surface) and spurious
259+
// "container mismatch" failures on the response path.
260+
if (value.length() < TOKEN_PREFIX.length() || !value.startsWith(TOKEN_PREFIX)) {
261+
return false;
262+
}
263+
return TOKEN_PATTERN.matcher(value).matches();
217264
}
218265

219266
static String encodeToken(String container, String name) {

0 commit comments

Comments
 (0)