Skip to content

Commit e4ad07c

Browse files
SK-1978 Add deidentify and reidentify text public interface for detect service. (#173)
* SK-1978 add fern generated code for detect * SK-1978 add public interface for deidentify text detect support * SK-1978 add public interface for deidentify & reidentify text with unit test * SK-1978 updated masking method, thrown skyflow exception & fixed unit test * SK-1978 address the review comments
1 parent af115c4 commit e4ad07c

152 files changed

Lines changed: 21064 additions & 2588 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

src/main/java/com/skyflow/Skyflow.java

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import com.skyflow.utils.logger.LogUtil;
1515
import com.skyflow.utils.validations.Validations;
1616
import com.skyflow.vault.controller.ConnectionController;
17+
import com.skyflow.vault.controller.DetectController;
1718
import com.skyflow.vault.controller.VaultController;
1819

1920
import java.util.LinkedHashMap;
@@ -101,25 +102,57 @@ public VaultController vault(String vaultId) throws SkyflowException {
101102
return controller;
102103
}
103104

104-
public ConnectionController connection() {
105-
String connectionId = (String) this.builder.connectionsMap.keySet().toArray()[0];
105+
106+
public ConnectionController connection() throws SkyflowException {
107+
Object[] array = this.builder.connectionsMap.keySet().toArray();
108+
if (array.length < 1) {
109+
LogUtil.printErrorLog(ErrorLogs.CONNECTION_CONFIG_DOES_NOT_EXIST.getLog());
110+
throw new SkyflowException(ErrorCode.INVALID_INPUT.getCode(), ErrorMessage.ConnectionIdNotInConfigList.getMessage());
111+
}
112+
String connectionId = (String) array[0];
106113
return this.connection(connectionId);
107114
}
108115

109-
public ConnectionController connection(String connectionId) {
110-
return this.builder.connectionsMap.get(connectionId);
116+
public ConnectionController connection(String connectionId) throws SkyflowException {
117+
ConnectionController controller = this.builder.connectionsMap.get(connectionId);
118+
if (controller == null) {
119+
LogUtil.printErrorLog(ErrorLogs.CONNECTION_CONFIG_DOES_NOT_EXIST.getLog());
120+
throw new SkyflowException(ErrorCode.INVALID_INPUT.getCode(), ErrorMessage.ConnectionIdNotInConfigList.getMessage());
121+
}
122+
return controller;
123+
}
124+
125+
public DetectController detect() throws SkyflowException {
126+
Object[] array = this.builder.detectClientsMap.keySet().toArray();
127+
if (array.length < 1) {
128+
LogUtil.printErrorLog(ErrorLogs.VAULT_CONFIG_DOES_NOT_EXIST.getLog());
129+
throw new SkyflowException(ErrorCode.INVALID_INPUT.getCode(), ErrorMessage.VaultIdNotInConfigList.getMessage());
130+
}
131+
String detectId = (String) array[0];
132+
return this.detect(detectId);
133+
}
134+
135+
public DetectController detect(String vaultId) throws SkyflowException {
136+
DetectController controller = this.builder.detectClientsMap.get(vaultId);
137+
if (controller == null) {
138+
LogUtil.printErrorLog(ErrorLogs.VAULT_CONFIG_DOES_NOT_EXIST.getLog());
139+
throw new SkyflowException(ErrorCode.INVALID_INPUT.getCode(), ErrorMessage.VaultIdNotInConfigList.getMessage());
140+
}
141+
return controller;
111142
}
112143

113144
public static final class SkyflowClientBuilder {
114145
private final LinkedHashMap<String, ConnectionController> connectionsMap;
115146
private final LinkedHashMap<String, VaultController> vaultClientsMap;
147+
private final LinkedHashMap<String, DetectController> detectClientsMap;
116148
private final LinkedHashMap<String, VaultConfig> vaultConfigMap;
117149
private final LinkedHashMap<String, ConnectionConfig> connectionConfigMap;
118150
private Credentials skyflowCredentials;
119151
private LogLevel logLevel;
120152

121153
public SkyflowClientBuilder() {
122154
this.vaultClientsMap = new LinkedHashMap<>();
155+
this.detectClientsMap = new LinkedHashMap<>();
123156
this.vaultConfigMap = new LinkedHashMap<>();
124157
this.connectionsMap = new LinkedHashMap<>();
125158
this.connectionConfigMap = new LinkedHashMap<>();
@@ -139,8 +172,11 @@ public SkyflowClientBuilder addVaultConfig(VaultConfig vaultConfig) throws Skyfl
139172
} else {
140173
this.vaultConfigMap.put(vaultConfig.getVaultId(), vaultConfig);
141174
this.vaultClientsMap.put(vaultConfig.getVaultId(), new VaultController(vaultConfig, this.skyflowCredentials));
175+
this.detectClientsMap.put(vaultConfig.getVaultId(), new DetectController(vaultConfig, this.skyflowCredentials));
142176
LogUtil.printInfoLog(Utils.parameterizedString(
143177
InfoLogs.VAULT_CONTROLLER_INITIALIZED.getLog(), vaultConfig.getVaultId()));
178+
LogUtil.printInfoLog(Utils.parameterizedString(
179+
InfoLogs.DETECT_CONTROLLER_INITIALIZED.getLog(), vaultConfig.getVaultId()));
144180
}
145181
return this;
146182
}
@@ -226,6 +262,9 @@ public SkyflowClientBuilder addSkyflowCredentials(Credentials credentials) throw
226262
for (VaultController vault : this.vaultClientsMap.values()) {
227263
vault.setCommonCredentials(this.skyflowCredentials);
228264
}
265+
for (DetectController detect : this.detectClientsMap.values()) {
266+
detect.setCommonCredentials(this.skyflowCredentials);
267+
}
229268
for (ConnectionController connection : this.connectionsMap.values()) {
230269
connection.setCommonCredentials(this.skyflowCredentials);
231270
}

src/main/java/com/skyflow/VaultClient.java

Lines changed: 176 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import com.skyflow.config.Credentials;
44
import com.skyflow.config.VaultConfig;
5+
import com.skyflow.enums.DetectEntities;
56
import com.skyflow.errors.ErrorCode;
67
import com.skyflow.errors.ErrorMessage;
78
import com.skyflow.errors.SkyflowException;
@@ -12,10 +13,15 @@
1213
import com.skyflow.generated.rest.resources.records.requests.RecordServiceBatchOperationBody;
1314
import com.skyflow.generated.rest.resources.records.requests.RecordServiceInsertRecordBody;
1415
import com.skyflow.generated.rest.resources.records.requests.RecordServiceUpdateRecordBody;
16+
import com.skyflow.generated.rest.resources.strings.StringsClient;
17+
import com.skyflow.generated.rest.resources.strings.requests.DeidentifyStringRequest;
18+
import com.skyflow.generated.rest.resources.strings.requests.ReidentifyStringRequest;
19+
import com.skyflow.generated.rest.resources.strings.types.ReidentifyStringRequestFormat;
1520
import com.skyflow.generated.rest.resources.tokens.TokensClient;
1621
import com.skyflow.generated.rest.resources.tokens.requests.V1DetokenizePayload;
1722
import com.skyflow.generated.rest.resources.tokens.requests.V1TokenizePayload;
1823
import com.skyflow.generated.rest.types.*;
24+
import com.skyflow.generated.rest.types.Transformations;
1925
import com.skyflow.logs.InfoLogs;
2026
import com.skyflow.serviceaccount.util.Token;
2127
import com.skyflow.utils.Constants;
@@ -24,16 +30,17 @@
2430
import com.skyflow.utils.validations.Validations;
2531
import com.skyflow.vault.data.InsertRequest;
2632
import com.skyflow.vault.data.UpdateRequest;
33+
import com.skyflow.vault.detect.*;
2734
import com.skyflow.vault.tokens.ColumnValue;
2835
import com.skyflow.vault.tokens.DetokenizeData;
2936
import com.skyflow.vault.tokens.DetokenizeRequest;
3037
import com.skyflow.vault.tokens.TokenizeRequest;
3138
import io.github.cdimascio.dotenv.Dotenv;
3239
import io.github.cdimascio.dotenv.DotenvException;
3340

34-
import java.util.ArrayList;
35-
import java.util.HashMap;
36-
import java.util.List;
41+
import java.util.*;
42+
import java.util.stream.Collectors;
43+
3744

3845
public class VaultClient {
3946
private final VaultConfig vaultConfig;
@@ -61,6 +68,10 @@ protected TokensClient getTokensApi() {
6168
return this.apiClient.tokens();
6269
}
6370

71+
protected StringsClient getDetectTextApi() {
72+
return this.apiClient.strings();
73+
}
74+
6475
protected QueryClient getQueryApi() {
6576
return this.apiClient.query();
6677
}
@@ -209,6 +220,168 @@ protected void setBearerToken() throws SkyflowException {
209220
this.apiClient = this.apiClientBuilder.build();
210221
}
211222

223+
protected DeidentifyTextResponse getDeIdentifyTextResponse(DeidentifyStringResponse deidentifyStringResponse) {
224+
List<EntityInfo> entities = deidentifyStringResponse.getEntities() != null
225+
? deidentifyStringResponse.getEntities().stream()
226+
.map(this::convertDetectedEntityToEntityInfo)
227+
.collect(Collectors.toList())
228+
: null;
229+
230+
return new DeidentifyTextResponse(
231+
deidentifyStringResponse.getProcessedText(),
232+
entities,
233+
deidentifyStringResponse.getWordCount(),
234+
deidentifyStringResponse.getCharacterCount()
235+
);
236+
}
237+
238+
protected DeidentifyStringRequest getDeidentifyStringRequest(DeidentifyTextRequest deIdentifyTextRequest, String vaultId) throws SkyflowException {
239+
List<DetectEntities> entities = deIdentifyTextRequest.getEntities();
240+
241+
List<EntityType> mappedEntityTypes = null;
242+
if (entities != null) {
243+
mappedEntityTypes = deIdentifyTextRequest.getEntities().stream()
244+
.map(detectEntity -> EntityType.valueOf(detectEntity.name()))
245+
.collect(Collectors.toList());
246+
}
247+
248+
TokenFormat tokenFormat = deIdentifyTextRequest.getTokenFormat();
249+
250+
Optional<List<EntityType>> vaultToken = Optional.empty();
251+
Optional<List<EntityType>> entityTypes = Optional.empty();
252+
Optional<List<EntityType>> entityUniqueCounter = Optional.empty();
253+
Optional<List<String>> allowRegex = Optional.ofNullable(deIdentifyTextRequest.getAllowRegexList());
254+
Optional<List<String>> restrictRegex = Optional.ofNullable(deIdentifyTextRequest.getRestrictRegexList());
255+
Optional<Transformations> transformations = Optional.ofNullable(getTransformations(deIdentifyTextRequest.getTransformations()));
256+
257+
if (tokenFormat != null) {
258+
if (tokenFormat.getVaultToken() != null && !tokenFormat.getVaultToken().isEmpty()) {
259+
vaultToken = Optional.of(tokenFormat.getVaultToken().stream()
260+
.map(detectEntity -> EntityType.valueOf(detectEntity.name()))
261+
.collect(Collectors.toList()));
262+
}
263+
264+
if (tokenFormat.getEntityOnly() != null && !tokenFormat.getEntityOnly().isEmpty()) {
265+
entityTypes = Optional.of(tokenFormat.getEntityOnly().stream()
266+
.map(detectEntity -> EntityType.valueOf(detectEntity.name()))
267+
.collect(Collectors.toList()));
268+
}
269+
270+
if (tokenFormat.getEntityUniqueCounter() != null && !tokenFormat.getEntityUniqueCounter().isEmpty()) {
271+
entityUniqueCounter = Optional.of(tokenFormat.getEntityUniqueCounter().stream()
272+
.map(detectEntity -> EntityType.valueOf(detectEntity.name()))
273+
.collect(Collectors.toList()));
274+
}
275+
}
276+
277+
TokenType tokenType = TokenType.builder()
278+
.vaultToken(vaultToken)
279+
.entityOnly(entityTypes)
280+
.entityUnqCounter(entityUniqueCounter)
281+
.build();
282+
283+
284+
return DeidentifyStringRequest.builder()
285+
.vaultId(vaultId)
286+
.text(deIdentifyTextRequest.getText())
287+
.entityTypes(mappedEntityTypes)
288+
.tokenType(tokenType)
289+
.allowRegex(allowRegex)
290+
.restrictRegex(restrictRegex)
291+
.transformations(transformations)
292+
.build();
293+
}
294+
295+
protected ReidentifyStringRequest getReidentifyStringRequest(ReidentifyTextRequest reidentifyTextRequest, String vaultId) throws SkyflowException {
296+
List<EntityType> maskEntities = null;
297+
List<EntityType> redactedEntities = null;
298+
List<EntityType> plaintextEntities = null;
299+
300+
if (reidentifyTextRequest.getMaskedEntities() != null) {
301+
maskEntities = reidentifyTextRequest.getMaskedEntities().stream()
302+
.map(detectEntity -> EntityType.valueOf(detectEntity.name()))
303+
.collect(Collectors.toList());
304+
}
305+
306+
if (reidentifyTextRequest.getPlainTextEntities() != null) {
307+
plaintextEntities = reidentifyTextRequest.getPlainTextEntities().stream()
308+
.map(detectEntity -> EntityType.valueOf(detectEntity.name()))
309+
.collect(Collectors.toList());
310+
}
311+
312+
if (reidentifyTextRequest.getRedactedEntities() != null) {
313+
redactedEntities = reidentifyTextRequest.getRedactedEntities().stream()
314+
.map(detectEntity -> EntityType.valueOf(detectEntity.name()))
315+
.collect(Collectors.toList());
316+
}
317+
318+
ReidentifyStringRequestFormat reidentifyStringRequestFormat = ReidentifyStringRequestFormat.builder()
319+
.masked(maskEntities)
320+
.plaintext(plaintextEntities)
321+
.redacted(redactedEntities)
322+
.build();
323+
324+
325+
return ReidentifyStringRequest.builder()
326+
.text(reidentifyTextRequest.getText())
327+
.vaultId(vaultId)
328+
.format(reidentifyStringRequestFormat)
329+
.build();
330+
}
331+
332+
333+
private EntityInfo convertDetectedEntityToEntityInfo(DetectedEntity detectedEntity) {
334+
TextIndex textIndex = new TextIndex(
335+
detectedEntity.getLocation().get().getStartIndex().orElse(0),
336+
detectedEntity.getLocation().get().getEndIndex().orElse(0)
337+
);
338+
TextIndex processedIndex = new TextIndex(
339+
detectedEntity.getLocation().get().getStartIndexProcessed().orElse(0),
340+
detectedEntity.getLocation().get().getEndIndexProcessed().orElse(0)
341+
);
342+
343+
Map<String, Float> entityScores = detectedEntity.getEntityScores()
344+
.map(doubleMap -> doubleMap.entrySet().stream()
345+
.collect(Collectors.toMap(
346+
Map.Entry::getKey,
347+
entry -> entry.getValue().floatValue()
348+
)))
349+
.orElse(Collections.emptyMap());
350+
351+
352+
return new EntityInfo(
353+
detectedEntity.getToken().orElse(""),
354+
detectedEntity.getValue().orElse(""),
355+
textIndex,
356+
processedIndex,
357+
detectedEntity.getEntityType().orElse(""),
358+
entityScores);
359+
}
360+
361+
362+
private Transformations getTransformations(com.skyflow.vault.detect.Transformations transformations) {
363+
if (transformations == null || transformations.getShiftDates() == null) {
364+
return null;
365+
}
366+
367+
List<TransformationsShiftDatesEntityTypesItem> entityTypes = null;
368+
if (!transformations.getShiftDates().getEntities().isEmpty()) {
369+
entityTypes = transformations.getShiftDates().getEntities().stream()
370+
.map(entity -> TransformationsShiftDatesEntityTypesItem.valueOf(entity.name()))
371+
.collect(Collectors.toList());
372+
} else {
373+
entityTypes = Collections.emptyList();
374+
}
375+
376+
return Transformations.builder()
377+
.shiftDates(TransformationsShiftDates.builder()
378+
.maxDays(transformations.getShiftDates().getMax())
379+
.minDays(transformations.getShiftDates().getMin())
380+
.entityTypes(entityTypes)
381+
.build())
382+
.build();
383+
}
384+
212385
private void setApiKey() {
213386
if (apiKey == null) {
214387
apiKey = this.finalCredentials.getApiKey();

0 commit comments

Comments
 (0)