diff --git a/plugins/signum/.gitignore b/plugins/signum/.gitignore new file mode 100644 index 0000000..91ea741 --- /dev/null +++ b/plugins/signum/.gitignore @@ -0,0 +1,2 @@ +build +.gradle diff --git a/plugins/signum/build.gradle b/plugins/signum/build.gradle new file mode 100644 index 0000000..28f62f8 --- /dev/null +++ b/plugins/signum/build.gradle @@ -0,0 +1,29 @@ +apply plugin: 'groovy' +// Apply the java plugin to add support for Java +apply plugin: 'java' + +// In this section you declare where to find the dependencies of your project +repositories { + mavenCentral() +} + +sourceSets { + main { + java { + srcDirs = [] + } + groovy { + srcDirs = ['src/main/java', 'src/main/groovy'] + } + } +} + +dependencies { + // This dependency is found on compile classpath of this component and consumers. + compile 'org.springframework.batch:spring-batch-core:3.0.7.RELEASE' + compile 'org.slf4j:slf4j-api:1.7.21' + compile 'org.codehaus.groovy:groovy-all:2.4.4' + compile group: 'com.google.code.gson', name: 'gson', version: '2.6.2' + // Use JUnit test framework + // testCompile 'junit:junit:4.12' +} diff --git a/plugins/signum/gradle/wrapper/gradle-wrapper.jar b/plugins/signum/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000..8c57b52 Binary files /dev/null and b/plugins/signum/gradle/wrapper/gradle-wrapper.jar differ diff --git a/plugins/signum/gradle/wrapper/gradle-wrapper.properties b/plugins/signum/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000..e4f764a --- /dev/null +++ b/plugins/signum/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,6 @@ +#Sat Mar 25 00:27:56 GMT 2017 +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-3.4.1-bin.zip diff --git a/plugins/signum/gradlew b/plugins/signum/gradlew new file mode 100755 index 0000000..4453cce --- /dev/null +++ b/plugins/signum/gradlew @@ -0,0 +1,172 @@ +#!/usr/bin/env sh + +############################################################################## +## +## Gradle start up script for UN*X +## +############################################################################## + +# Attempt to set APP_HOME +# Resolve links: $0 may be a link +PRG="$0" +# Need this for relative symlinks. +while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`"/$link" + fi +done +SAVED="`pwd`" +cd "`dirname \"$PRG\"`/" >/dev/null +APP_HOME="`pwd -P`" +cd "$SAVED" >/dev/null + +APP_NAME="Gradle" +APP_BASE_NAME=`basename "$0"` + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS="" + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD="maximum" + +warn ( ) { + echo "$*" +} + +die ( ) { + echo + echo "$*" + echo + exit 1 +} + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "`uname`" in + CYGWIN* ) + cygwin=true + ;; + Darwin* ) + darwin=true + ;; + MINGW* ) + msys=true + ;; + NONSTOP* ) + nonstop=true + ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + else + JAVACMD="$JAVA_HOME/bin/java" + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD="java" + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then + MAX_FD_LIMIT=`ulimit -H -n` + if [ $? -eq 0 ] ; then + if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then + MAX_FD="$MAX_FD_LIMIT" + fi + ulimit -n $MAX_FD + if [ $? -ne 0 ] ; then + warn "Could not set maximum file descriptor limit: $MAX_FD" + fi + else + warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" + fi +fi + +# For Darwin, add options to specify how the application appears in the dock +if $darwin; then + GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" +fi + +# For Cygwin, switch paths to Windows format before running java +if $cygwin ; then + APP_HOME=`cygpath --path --mixed "$APP_HOME"` + CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` + JAVACMD=`cygpath --unix "$JAVACMD"` + + # We build the pattern for arguments to be converted via cygpath + ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` + SEP="" + for dir in $ROOTDIRSRAW ; do + ROOTDIRS="$ROOTDIRS$SEP$dir" + SEP="|" + done + OURCYGPATTERN="(^($ROOTDIRS))" + # Add a user-defined pattern to the cygpath arguments + if [ "$GRADLE_CYGPATTERN" != "" ] ; then + OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" + fi + # Now convert the arguments - kludge to limit ourselves to /bin/sh + i=0 + for arg in "$@" ; do + CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` + CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option + + if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition + eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` + else + eval `echo args$i`="\"$arg\"" + fi + i=$((i+1)) + done + case $i in + (0) set -- ;; + (1) set -- "$args0" ;; + (2) set -- "$args0" "$args1" ;; + (3) set -- "$args0" "$args1" "$args2" ;; + (4) set -- "$args0" "$args1" "$args2" "$args3" ;; + (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; + (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; + (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; + (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; + (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; + esac +fi + +# Escape application args +save ( ) { + for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done + echo " " +} +APP_ARGS=$(save "$@") + +# Collect all arguments for the java command, following the shell quoting and substitution rules +eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" + +# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong +if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then + cd "$(dirname "$0")" +fi + +exec "$JAVACMD" "$@" diff --git a/plugins/signum/gradlew.bat b/plugins/signum/gradlew.bat new file mode 100644 index 0000000..e95643d --- /dev/null +++ b/plugins/signum/gradlew.bat @@ -0,0 +1,84 @@ +@if "%DEBUG%" == "" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%" == "" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS= + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if "%ERRORLEVEL%" == "0" goto init + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto init + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:init +@rem Get command-line arguments, handling Windows variants + +if not "%OS%" == "Windows_NT" goto win9xME_args + +:win9xME_args +@rem Slurp the command line arguments. +set CMD_LINE_ARGS= +set _SKIP=2 + +:win9xME_args_slurp +if "x%~1" == "x" goto execute + +set CMD_LINE_ARGS=%* + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% + +:end +@rem End local scope for the variables with windows NT shell +if "%ERRORLEVEL%"=="0" goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 +exit /b 1 + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/plugins/signum/settings.gradle b/plugins/signum/settings.gradle new file mode 100644 index 0000000..a018d57 --- /dev/null +++ b/plugins/signum/settings.gradle @@ -0,0 +1,18 @@ +/* + * This settings file was generated by the Gradle 'init' task. + * + * The settings file is used to specify which projects to include in your build. + * In a single project build this file can be empty or even removed. + * + * Detailed information about configuring a multi-project build in Gradle can be found + * in the user guide at https://docs.gradle.org/3.4.1/userguide/multi_project_builds.html + */ + +/* +// To declare projects as part of a multi-project build use the 'include' method +include 'shared' +include 'api' +include 'services:webservice' +*/ + +rootProject.name = 'signum' diff --git a/plugins/signum/src/main/groovy/uk/ac/kcl/model/Document.groovy b/plugins/signum/src/main/groovy/uk/ac/kcl/model/Document.groovy new file mode 100644 index 0000000..26d95be --- /dev/null +++ b/plugins/signum/src/main/groovy/uk/ac/kcl/model/Document.groovy @@ -0,0 +1,43 @@ +package uk.ac.kcl.model + +import com.google.gson.Gson +import com.google.gson.GsonBuilder + +import java.sql.Timestamp + +/** + * Created by rich on 15/04/16. + */ +class Document { + + //generic fields + String databaseName + String databaseSchema + String srcTableName + String srcColumnFieldName + String primaryKeyFieldName + String primaryKeyFieldValue + Timestamp timeStamp + HashSet exceptions = new HashSet<>() + Gson gson = new GsonBuilder().create(); + + //for catpuring itemProcessor output + //XContentBuilder xContentBuilder + + String outputData; + + //for tika + byte[] binaryContent + + //for Gate + String textContent + + + //for es + HashMap associativeArray = new HashMap(); + + public String getDocName(){ + return srcTableName+"_"+srcColumnFieldName+"_"+primaryKeyFieldValue + } + +} diff --git a/plugins/signum/src/main/java/uk/ac/ucl/signum/PostBioyodieItemProcessor.java b/plugins/signum/src/main/java/uk/ac/ucl/signum/PostBioyodieItemProcessor.java new file mode 100644 index 0000000..6e2104a --- /dev/null +++ b/plugins/signum/src/main/java/uk/ac/ucl/signum/PostBioyodieItemProcessor.java @@ -0,0 +1,74 @@ +package uk.ac.ucl.signum; + +import org.slf4j.LoggerFactory; +import org.springframework.batch.item.ItemProcessor; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Profile; +import org.springframework.core.env.Environment; +import org.springframework.stereotype.Service; + +import uk.ac.kcl.model.Document; + +import java.lang.ClassCastException; +import java.util.List; +import java.util.Map; +import javax.annotation.PostConstruct; + +@Profile("postbioyodie") +@Service("PostBioyodieItemProcessor") +public class PostBioyodieItemProcessor implements ItemProcessor { + + private static final org.slf4j.Logger LOG = LoggerFactory.getLogger(PostBioyodieItemProcessor.class); + + @Autowired + Environment env; + + @Value("${webservice.fieldName}") + private String bioYodieFieldName; + + @PostConstruct + public void init() { + + } + + + @Override + public Document process(final Document doc) throws Exception { + LOG.info("Starting {} on doc", this.getClass().getSimpleName(), doc.getDocName()); + + long startTime = System.currentTimeMillis(); + try { + + if (bioYodieFieldName != null) { + // Map mentionList; + Object bioyodieMapObj; + Object entitiesMapObj; + Object mentionListObj; + bioyodieMapObj = doc.getAssociativeArray().getOrDefault(bioYodieFieldName, null); + if (bioyodieMapObj != null) { + entitiesMapObj = ((Map) bioyodieMapObj).getOrDefault("entities", null); + if (entitiesMapObj != null) { + mentionListObj = ((Map) entitiesMapObj).getOrDefault("Mention", null); + if (mentionListObj != null) { + doc.getAssociativeArray().put(bioYodieFieldName, mentionListObj); + doc.getAssociativeArray().put("X-PLUGINS-POST-BIO-YODIE", "success"); + } + } + } + } + long endTime = System.currentTimeMillis(); + LOG.info("{};Time:{} ms", + this.getClass().getSimpleName(), + endTime - startTime); + LOG.info("Finished {} on doc", this.getClass().getSimpleName(), doc.getDocName()); + } catch (ClassCastException castEx) { + LOG.warn("ClassCastException caught, possibly due to malformed result"); + } catch (Exception e) { + LOG.error("Exception caught {}", e); + } + finally { + return doc; + } + } +} diff --git a/src/integration-test/resources/plugins.properties b/src/integration-test/resources/plugins.properties new file mode 100644 index 0000000..17ddf51 --- /dev/null +++ b/src/integration-test/resources/plugins.properties @@ -0,0 +1,3 @@ +# Comma separated list of plugin Bean names +# E.g. +# plugins.names = PostBioyodieItemProcessor diff --git a/src/main/java/uk/ac/kcl/batch/JobConfiguration.java b/src/main/java/uk/ac/kcl/batch/JobConfiguration.java index 3770fb5..5e32be2 100644 --- a/src/main/java/uk/ac/kcl/batch/JobConfiguration.java +++ b/src/main/java/uk/ac/kcl/batch/JobConfiguration.java @@ -42,6 +42,7 @@ import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.autoconfigure.EnableAutoConfiguration; import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration; +import org.springframework.context.ApplicationContext; import org.springframework.context.annotation.*; import org.springframework.context.support.PropertySourcesPlaceholderConfigurer; import org.springframework.core.env.Environment; @@ -56,6 +57,8 @@ import javax.sql.DataSource; import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; /** * @@ -69,6 +72,7 @@ "uk.ac.kcl.partitioners", "uk.ac.kcl.itemProcessors", "uk.ac.kcl.itemWriters", + "uk.ac.ucl.signum", "uk.ac.kcl.cleanup"}) @EnableBatchProcessing @Import({ @@ -79,6 +83,9 @@ public class JobConfiguration { private static final org.slf4j.Logger LOG = LoggerFactory.getLogger(JobConfiguration.class); + @Autowired + ApplicationContext context; + ///Configure order of processoer and writer composites here @Bean @@ -94,6 +101,18 @@ public ItemProcessor compositeItemProcessor() { if(deIdDocumentItemProcessor !=null) delegates.add(deIdDocumentItemProcessor); if(webserviceDocumentItemProcessor !=null) delegates.add(webserviceDocumentItemProcessor); + // New approach: plugins + List plugins = Arrays.asList(env.getProperty("plugins.names", "").split(",")); + for (String plugin: plugins) { + try { + ItemProcessor proc = (ItemProcessor) context.getBean(plugin); + delegates.add(proc); + LOG.info("Dynamically loaded plugin: {}", plugin); + } catch (Exception e) { + LOG.warn("Plugin {} failed to load", plugin); + } + } + delegates.add(jsonMakerItemProcessor); processor.setDelegates(delegates); return processor;