From 0e90fe8c107cd90a7baf16b5d4f334d1650a0afb Mon Sep 17 00:00:00 2001 From: pwawrzyniak Date: Tue, 14 Mar 2017 17:43:25 +0100 Subject: [PATCH 01/17] Initial structure of Kafka components. Initital code for Kafka Consumer --- samoa-api/pom.xml | 208 +++++++++--------- .../streams/kafka/KafkaDeserializer.java | 30 +++ .../kafka/KafkaDestinationProcessor.java | 42 ++++ .../streams/kafka/KafkaEntranceProcessor.java | 65 ++++++ .../samoa/streams/kafka/KafkaSerializer.java | 31 +++ .../samoa/streams/kafka/KafkaUtils.java | 71 ++++++ 6 files changed, 346 insertions(+), 101 deletions(-) create mode 100644 samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDeserializer.java create mode 100644 samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessor.java create mode 100644 samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessor.java create mode 100644 samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaSerializer.java create mode 100644 samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java diff --git a/samoa-api/pom.xml b/samoa-api/pom.xml index 9f69e20b..4621b931 100644 --- a/samoa-api/pom.xml +++ b/samoa-api/pom.xml @@ -11,119 +11,125 @@ http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - #L% - --> +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +#L% +--> - 4.0.0 - - UTF-8 - + 4.0.0 + + UTF-8 + - samoa-api - API and algorithms for SAMOA + samoa-api + API and algorithms for SAMOA - samoa-api - - org.apache.samoa - samoa - 0.5.0-incubating-SNAPSHOT - + samoa-api + + org.apache.samoa + samoa + 0.5.0-incubating-SNAPSHOT + - - - com.yammer.metrics - metrics-core - ${metrics-core.version} - + + + com.yammer.metrics + metrics-core + ${metrics-core.version} + - - net.jcip - jcip-annotations - ${jcip-annotations.version} - + + net.jcip + jcip-annotations + ${jcip-annotations.version} + - - org.apache.commons - commons-lang3 - ${commons-lang3.version} - + + org.apache.commons + commons-lang3 + ${commons-lang3.version} + - - com.github.javacliparser - javacliparser - ${javacliparser.version} - + + com.github.javacliparser + javacliparser + ${javacliparser.version} + - - org.apache.samoa - samoa-instances - ${project.version} - + + org.apache.samoa + samoa-instances + ${project.version} + - - com.google.guava - guava - ${guava.version} - + + com.google.guava + guava + ${guava.version} + - - com.esotericsoftware.kryo - kryo - ${kryo.version} - + + com.esotericsoftware.kryo + kryo + ${kryo.version} + - - com.dreizak - miniball - ${miniball.version} - + + com.dreizak + miniball + ${miniball.version} + - - org.apache.hadoop - hadoop-common - ${hadoop.version} - - - org.apache.hadoop - hadoop-hdfs - ${hadoop.version} - - - org.apache.hadoop - hadoop-minicluster - ${hadoop.version} - test - - + + org.apache.hadoop + hadoop-common + ${hadoop.version} + + + org.apache.hadoop + hadoop-hdfs + ${hadoop.version} + + + org.apache.hadoop + hadoop-minicluster + ${hadoop.version} + test + + + + org.apache.kafka + kafka-clients + 0.10.2.0 + + - - - - org.apache.maven.plugins - maven-dependency-plugin - ${maven-dependency-plugin.version} - - - copy-dependencies - package - - copy-dependencies - - - ${project.build.directory}/lib - false - false - true - - - - - - + + + + org.apache.maven.plugins + maven-dependency-plugin + ${maven-dependency-plugin.version} + + + copy-dependencies + package + + copy-dependencies + + + ${project.build.directory}/lib + false + false + true + + + + + + diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDeserializer.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDeserializer.java new file mode 100644 index 00000000..2c7dae19 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDeserializer.java @@ -0,0 +1,30 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.samoa.streams.kafka; + +import org.apache.samoa.core.ContentEvent; + +/** + * + * @author pwawrzyniak + * @param the class that would be deserialized + */ +public interface KafkaDeserializer { + + // TODO: Consider key-value schema? + + T deserialize(byte[] message); +} diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessor.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessor.java new file mode 100644 index 00000000..ed8f164b --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessor.java @@ -0,0 +1,42 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.samoa.streams.kafka; + +import org.apache.samoa.core.ContentEvent; +import org.apache.samoa.core.Processor; + +/** + * + * @author pwawrzyniak + */ +public class KafkaDestinationProcessor implements Processor { + + @Override + public boolean process(ContentEvent event) { + throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. + } + + @Override + public void onCreate(int id) { + throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. + } + + @Override + public Processor newProcessor(Processor processor) { + throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. + } + +} diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessor.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessor.java new file mode 100644 index 00000000..228e81b9 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessor.java @@ -0,0 +1,65 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.samoa.streams.kafka; + +import java.util.Properties; +import org.apache.samoa.core.ContentEvent; +import org.apache.samoa.core.EntranceProcessor; +import org.apache.samoa.core.Processor; + +/** + * + * @author pwawrzyniak + */ +public class KafkaEntranceProcessor implements EntranceProcessor { + + transient private KafkaUtils kafkaUtils; + + public KafkaEntranceProcessor(Properties props, String topic, int batchSize) { + kafkaUtils = new KafkaUtils(props, null, batchSize); + } + + @Override + public void onCreate(int id) { + throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. + } + + @Override + public boolean isFinished() { + throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. + } + + @Override + public boolean hasNext() { + throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. + } + + @Override + public ContentEvent nextEvent() { + throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. + } + + @Override + public boolean process(ContentEvent event) { + throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. + } + + @Override + public Processor newProcessor(Processor processor) { + throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. + } + +} diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaSerializer.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaSerializer.java new file mode 100644 index 00000000..29f04ca5 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaSerializer.java @@ -0,0 +1,31 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.samoa.streams.kafka; + +import org.apache.samoa.core.ContentEvent; + +/** + * + * @author pwawrzyniak + * @param the class that would be serialized + */ +public interface KafkaSerializer { + + // TODO: Consider Key-Value schema? + + + byte[] serialize(T message); +} diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java new file mode 100644 index 00000000..c2fbaa88 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java @@ -0,0 +1,71 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.samoa.streams.kafka; + +import java.util.Collection; +import java.util.Properties; +import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.clients.producer.KafkaProducer; + +/** + * Internal class responsible for Kafka Stream handling + * + * @author pwawrzyniak + */ +class KafkaUtils { + + // Consumer class for internal use to retrieve messages from Kafka + private KafkaConsumer consumer; + + private KafkaProducer producer; + + // Properties of the consumer, as defined in Kafka documentation + private Properties consumerProperties; + private Properties producerProperties; + + // Batch size for Kafka Consumer + private int consumerTimeout; + + public KafkaUtils(Properties consumerProperties, Properties producerProperties, int consumerTimeout) { + this.consumerProperties = consumerProperties; + this.producerProperties = producerProperties; + this.consumerTimeout = consumerTimeout; + } + + public void initializeConsumer(Collection topics) { + // lazy initialization + if (consumer == null) { + consumer = new KafkaConsumer(consumerProperties); + } + consumer.subscribe(topics); + } + + public ConsumerRecords getMessages() throws Exception { + + if (consumer != null) { + if (!consumer.subscription().isEmpty()) { + return consumer.poll(consumerTimeout); + } else { + // TODO: do it more elegant way + throw new Exception("Consumer subscribed to no topics!"); + } + } else { + // TODO: do more elegant way + throw new Exception("Consumer not initialised"); + } + } +} From 2168400f86189605b06fb531511235733bf3b6ca Mon Sep 17 00:00:00 2001 From: pwawrzyniak Date: Fri, 17 Mar 2017 11:05:14 +0100 Subject: [PATCH 02/17] Code for KafkaEntranceProcessor (consuming messages from Kafka) --- .../streams/kafka/KafkaEntranceProcessor.java | 153 ++++++++++------- .../samoa/streams/kafka/KafkaUtils.java | 161 ++++++++++-------- 2 files changed, 178 insertions(+), 136 deletions(-) diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessor.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessor.java index 228e81b9..b1e8a7f6 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessor.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessor.java @@ -1,65 +1,88 @@ -/* - * Copyright 2017 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.samoa.streams.kafka; - -import java.util.Properties; -import org.apache.samoa.core.ContentEvent; -import org.apache.samoa.core.EntranceProcessor; -import org.apache.samoa.core.Processor; - -/** - * - * @author pwawrzyniak - */ -public class KafkaEntranceProcessor implements EntranceProcessor { - - transient private KafkaUtils kafkaUtils; - - public KafkaEntranceProcessor(Properties props, String topic, int batchSize) { - kafkaUtils = new KafkaUtils(props, null, batchSize); - } - - @Override - public void onCreate(int id) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public boolean isFinished() { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public boolean hasNext() { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public ContentEvent nextEvent() { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public boolean process(ContentEvent event) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public Processor newProcessor(Processor processor) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - -} +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.samoa.streams.kafka; + +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.samoa.core.ContentEvent; +import org.apache.samoa.core.EntranceProcessor; +import org.apache.samoa.core.Processor; + +/** + * + * @author pwawrzyniak + */ +public class KafkaEntranceProcessor implements EntranceProcessor { + + transient private KafkaUtils kafkaUtils; + List buffer; + private final KafkaDeserializer deserializer; + + public KafkaEntranceProcessor(Properties props, String topic, int timeout, KafkaDeserializer deserializer) { + this.kafkaUtils = new KafkaUtils(props, null, timeout); + this.deserializer = deserializer; + } + + private KafkaEntranceProcessor(KafkaUtils kafkaUtils, KafkaDeserializer deserializer) { + this.kafkaUtils = kafkaUtils; + this.deserializer = deserializer; + } + + @Override + public void onCreate(int id) { + this.buffer = new ArrayList<>(100); + } + + @Override + public boolean isFinished() { + return false; + } + + @Override + public boolean hasNext() { + if (buffer.isEmpty()) { + try { + buffer.addAll(kafkaUtils.getKafkaMessages()); + } catch (Exception ex) { + Logger.getLogger(KafkaEntranceProcessor.class.getName()).log(Level.SEVERE, null, ex); + } + } + return buffer.size() > 0; + } + + @Override + public ContentEvent nextEvent() { + // assume this will never be called when buffer is empty! + return this.deserializer.deserialize(buffer.remove(buffer.size() - 1)); + + } + + @Override + public boolean process(ContentEvent event) { + return false; + } + + @Override + public Processor newProcessor(Processor processor) { + KafkaEntranceProcessor kep = (KafkaEntranceProcessor) processor; + return new KafkaEntranceProcessor(new KafkaUtils(kep.kafkaUtils), deserializer); + } + +} diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java index c2fbaa88..d1488784 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java @@ -1,71 +1,90 @@ -/* - * Copyright 2017 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.samoa.streams.kafka; - -import java.util.Collection; -import java.util.Properties; -import org.apache.kafka.clients.consumer.ConsumerRecords; -import org.apache.kafka.clients.consumer.KafkaConsumer; -import org.apache.kafka.clients.producer.KafkaProducer; - -/** - * Internal class responsible for Kafka Stream handling - * - * @author pwawrzyniak - */ -class KafkaUtils { - - // Consumer class for internal use to retrieve messages from Kafka - private KafkaConsumer consumer; - - private KafkaProducer producer; - - // Properties of the consumer, as defined in Kafka documentation - private Properties consumerProperties; - private Properties producerProperties; - - // Batch size for Kafka Consumer - private int consumerTimeout; - - public KafkaUtils(Properties consumerProperties, Properties producerProperties, int consumerTimeout) { - this.consumerProperties = consumerProperties; - this.producerProperties = producerProperties; - this.consumerTimeout = consumerTimeout; - } - - public void initializeConsumer(Collection topics) { - // lazy initialization - if (consumer == null) { - consumer = new KafkaConsumer(consumerProperties); - } - consumer.subscribe(topics); - } - - public ConsumerRecords getMessages() throws Exception { - - if (consumer != null) { - if (!consumer.subscription().isEmpty()) { - return consumer.poll(consumerTimeout); - } else { - // TODO: do it more elegant way - throw new Exception("Consumer subscribed to no topics!"); - } - } else { - // TODO: do more elegant way - throw new Exception("Consumer not initialised"); - } - } -} +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.samoa.streams.kafka; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.Properties; +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.clients.producer.KafkaProducer; + +/** + * Internal class responsible for Kafka Stream handling + * + * @author pwawrzyniak + */ +class KafkaUtils { + + // Consumer class for internal use to retrieve messages from Kafka + private KafkaConsumer consumer; + + private KafkaProducer producer; + + // Properties of the consumer, as defined in Kafka documentation + private Properties consumerProperties; + private Properties producerProperties; + + // Batch size for Kafka Consumer + private int consumerTimeout; + + public KafkaUtils(Properties consumerProperties, Properties producerProperties, int consumerTimeout) { + this.consumerProperties = consumerProperties; + this.producerProperties = producerProperties; + this.consumerTimeout = consumerTimeout; + } + + KafkaUtils(KafkaUtils kafkaUtils) { + this.consumerProperties = kafkaUtils.consumerProperties; + this.producerProperties = kafkaUtils.producerProperties; + this.consumerTimeout = kafkaUtils.consumerTimeout; + } + + public void initializeConsumer(Collection topics) { + // lazy initialization + if (consumer == null) { + consumer = new KafkaConsumer(consumerProperties); + } + consumer.subscribe(topics); + } + + public List getKafkaMessages() throws Exception { + + if (consumer != null) { + if (!consumer.subscription().isEmpty()) { + return getMessagesBytes(consumer.poll(consumerTimeout)); + } else { + // TODO: do it more elegant way + throw new Exception("Consumer subscribed to no topics!"); + } + } else { + // TODO: do more elegant way + throw new Exception("Consumer not initialised"); + } + } + + private List getMessagesBytes(ConsumerRecords poll) { + Iterator> iterator = poll.iterator(); + List ret = new ArrayList<>(); + while(iterator.hasNext()){ + ret.add(iterator.next().value()); + } + return ret; + } +} From cf2ff369989fa3560f1570dfaf32bcb746d51c25 Mon Sep 17 00:00:00 2001 From: pwawrzyniak Date: Fri, 17 Mar 2017 11:54:47 +0100 Subject: [PATCH 03/17] Updated comments --- .../streams/kafka/KafkaEntranceProcessor.java | 27 ++++++++++++----- .../samoa/streams/kafka/KafkaUtils.java | 29 +++++++++++++++---- 2 files changed, 43 insertions(+), 13 deletions(-) diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessor.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessor.java index b1e8a7f6..fe82212f 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessor.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessor.java @@ -16,38 +16,52 @@ package org.apache.samoa.streams.kafka; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Properties; import java.util.logging.Level; import java.util.logging.Logger; -import org.apache.kafka.clients.consumer.ConsumerRecords; import org.apache.samoa.core.ContentEvent; import org.apache.samoa.core.EntranceProcessor; import org.apache.samoa.core.Processor; /** - * + * Entrance processor that reads incoming messages from Apache Kafka * @author pwawrzyniak + * @version 0.5.0-incubating-SNAPSHOT + * @since 0.5.0-incubating */ public class KafkaEntranceProcessor implements EntranceProcessor { - transient private KafkaUtils kafkaUtils; - List buffer; + transient private final KafkaUtils kafkaUtils; + private List buffer; private final KafkaDeserializer deserializer; + private final String topic; + /** + * Class constructor + * @param props Properties of Kafka consumer + * @see Apache Kafka consumer configuration + * @param topic Topic from which the messages should be read + * @param timeout Timeout used when polling Kafka for new messages + * @param deserializer Instance of the implementation of {@link KafkaDeserializer} + */ public KafkaEntranceProcessor(Properties props, String topic, int timeout, KafkaDeserializer deserializer) { this.kafkaUtils = new KafkaUtils(props, null, timeout); this.deserializer = deserializer; + this.topic = topic; } - private KafkaEntranceProcessor(KafkaUtils kafkaUtils, KafkaDeserializer deserializer) { + private KafkaEntranceProcessor(KafkaUtils kafkaUtils, KafkaDeserializer deserializer, String topic) { this.kafkaUtils = kafkaUtils; this.deserializer = deserializer; + this.topic = topic; } @Override public void onCreate(int id) { this.buffer = new ArrayList<>(100); + this.kafkaUtils.initializeConsumer(Arrays.asList(this.topic)); } @Override @@ -82,7 +96,6 @@ public boolean process(ContentEvent event) { @Override public Processor newProcessor(Processor processor) { KafkaEntranceProcessor kep = (KafkaEntranceProcessor) processor; - return new KafkaEntranceProcessor(new KafkaUtils(kep.kafkaUtils), deserializer); + return new KafkaEntranceProcessor(new KafkaUtils(kep.kafkaUtils), kep.deserializer, kep.topic); } - } diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java index d1488784..c87b2f14 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java @@ -26,9 +26,11 @@ import org.apache.kafka.clients.producer.KafkaProducer; /** - * Internal class responsible for Kafka Stream handling + * Internal class responsible for Kafka Stream handling (both consume and produce) * * @author pwawrzyniak + * @version 0.5.0-incubating-SNAPSHOT + * @since 0.5.0-incubating */ class KafkaUtils { @@ -38,12 +40,18 @@ class KafkaUtils { private KafkaProducer producer; // Properties of the consumer, as defined in Kafka documentation - private Properties consumerProperties; - private Properties producerProperties; + private final Properties consumerProperties; + private final Properties producerProperties; - // Batch size for Kafka Consumer + // Timeout for Kafka Consumer private int consumerTimeout; + /** + * Class constructor + * @param consumerProperties Properties of consumer + * @param producerProperties Properties of producer + * @param consumerTimeout Timeout for consumer poll requests + */ public KafkaUtils(Properties consumerProperties, Properties producerProperties, int consumerTimeout) { this.consumerProperties = consumerProperties; this.producerProperties = producerProperties; @@ -56,14 +64,23 @@ public KafkaUtils(Properties consumerProperties, Properties producerProperties, this.consumerTimeout = kafkaUtils.consumerTimeout; } + /** + * Method used to initialize Kafka Consumer, i.e. instantiate it and subscribe to configured topic + * @param topics List of Kafka topics that consumer should subscribe to + */ public void initializeConsumer(Collection topics) { - // lazy initialization + // lazy instantiation if (consumer == null) { - consumer = new KafkaConsumer(consumerProperties); + consumer = new KafkaConsumer<>(consumerProperties); } consumer.subscribe(topics); } + /** + * Method for reading new messages from Kafka topics + * @return Collection of read messages + * @throws Exception Exception is thrown when consumer was not initialized or is not subscribed to any topic. + */ public List getKafkaMessages() throws Exception { if (consumer != null) { From 26d86da6ebdd53e440e8b1e65f5ba5ed9f5f609e Mon Sep 17 00:00:00 2001 From: pwawrzyniak Date: Fri, 17 Mar 2017 12:09:52 +0100 Subject: [PATCH 04/17] Sample serializer/deserializer for JSON and InstanceContentEvent Updates in comments --- .../streams/kafka/KafkaDeserializer.java | 64 +++++++++--------- .../streams/kafka/KafkaEntranceProcessor.java | 1 - .../samoa/streams/kafka/KafkaJsonMapper.java | 52 +++++++++++++++ .../samoa/streams/kafka/KafkaSerializer.java | 66 ++++++++++--------- 4 files changed, 121 insertions(+), 62 deletions(-) create mode 100644 samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaJsonMapper.java diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDeserializer.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDeserializer.java index 2c7dae19..b85ec1fa 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDeserializer.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDeserializer.java @@ -1,30 +1,34 @@ -/* - * Copyright 2017 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.samoa.streams.kafka; - -import org.apache.samoa.core.ContentEvent; - -/** - * - * @author pwawrzyniak - * @param the class that would be deserialized - */ -public interface KafkaDeserializer { - - // TODO: Consider key-value schema? - - T deserialize(byte[] message); -} +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.samoa.streams.kafka; + +import org.apache.samoa.core.ContentEvent; + +/** + * + * @author pwawrzyniak + * @param the class that would be deserialized + */ +public interface KafkaDeserializer { + + // TODO: Consider key-value schema? + /** + * Method that provides deserialization algorithm + * @param message Message as received from Apache Kafka + * @return Deserialized form of message, to be passed to topology + */ + T deserialize(byte[] message); +} diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessor.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessor.java index fe82212f..d0a4c0d6 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessor.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessor.java @@ -85,7 +85,6 @@ public boolean hasNext() { public ContentEvent nextEvent() { // assume this will never be called when buffer is empty! return this.deserializer.deserialize(buffer.remove(buffer.size() - 1)); - } @Override diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaJsonMapper.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaJsonMapper.java new file mode 100644 index 00000000..6ede4475 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaJsonMapper.java @@ -0,0 +1,52 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.samoa.streams.kafka; + +import com.google.gson.Gson; +import java.nio.charset.Charset; +import org.apache.samoa.learners.InstanceContentEvent; + +/** + * Sample class for serializing and deserializing InsatnceContentEvent from/to JSON format + * @author pwawrzyniak + * @version 0.5.0-incubating-SNAPSHOT + * @since 0.5.0-incubating + */ +public class KafkaJsonMapper implements KafkaDeserializer, KafkaSerializer{ + + private final transient Gson gson; + private final Charset charset; + + /** + * Class constructor + * @param charset Charset to be used for bytes parsing + */ + public KafkaJsonMapper(Charset charset){ + this.gson = new Gson(); + this.charset = charset; + } + + @Override + public InstanceContentEvent deserialize(byte[] message) { + return gson.fromJson(new String(message, this.charset), InstanceContentEvent.class); + } + + @Override + public byte[] serialize(InstanceContentEvent message) { + return gson.toJson(message).getBytes(this.charset); + } + +} diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaSerializer.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaSerializer.java index 29f04ca5..a8cc0b86 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaSerializer.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaSerializer.java @@ -1,31 +1,35 @@ -/* - * Copyright 2017 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.samoa.streams.kafka; - -import org.apache.samoa.core.ContentEvent; - -/** - * - * @author pwawrzyniak - * @param the class that would be serialized - */ -public interface KafkaSerializer { - - // TODO: Consider Key-Value schema? - - - byte[] serialize(T message); -} +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.samoa.streams.kafka; + +import org.apache.samoa.core.ContentEvent; + +/** + * + * @author pwawrzyniak + * @param the class that would be serialized + */ +public interface KafkaSerializer { + + // TODO: Consider Key-Value schema? + + /** + * Method that provides serialization algorithm + * @param message Message received from topology, to be serialized + * @return Serialized form of the message + */ + byte[] serialize(T message); +} From 45b3546f9c525af385f646447a09d3683b70937d Mon Sep 17 00:00:00 2001 From: pwawrzyniak Date: Fri, 17 Mar 2017 15:40:25 +0100 Subject: [PATCH 05/17] KafkaDestinationProcessor implementation (sending msg to Kafka --- .../kafka/KafkaDestinationProcessor.java | 119 +++++++++++------- .../samoa/streams/kafka/KafkaUtils.java | 15 +++ 2 files changed, 92 insertions(+), 42 deletions(-) diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessor.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessor.java index ed8f164b..5632b6e1 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessor.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessor.java @@ -1,42 +1,77 @@ -/* - * Copyright 2017 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.samoa.streams.kafka; - -import org.apache.samoa.core.ContentEvent; -import org.apache.samoa.core.Processor; - -/** - * - * @author pwawrzyniak - */ -public class KafkaDestinationProcessor implements Processor { - - @Override - public boolean process(ContentEvent event) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public void onCreate(int id) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public Processor newProcessor(Processor processor) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - -} +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.samoa.streams.kafka; + +import java.util.Properties; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.apache.samoa.core.ContentEvent; +import org.apache.samoa.core.Processor; + +/** + * Destination processor that writes data to Apache Kafka + * @author pwawrzyniak + * @version 0.5.0-incubating-SNAPSHOT + * @since 0.5.0-incubating + */ +public class KafkaDestinationProcessor implements Processor { + + private final KafkaUtils kafkaUtils; + private final String topic; + private final KafkaSerializer serializer; + + /** + * Class constructor + * @param props Properties of Kafka Producer + * @see Kafka Producer configuration + * @param topic Topic this destination processor will write into + * @param serializer Implementation of KafkaSerializer that handles arriving data serialization + */ + public KafkaDestinationProcessor(Properties props, String topic, KafkaSerializer serializer) { + this.kafkaUtils = new KafkaUtils(null, props, 0); + this.topic = topic; + this.serializer = serializer; + } + + private KafkaDestinationProcessor(KafkaUtils kafkaUtils, String topic, KafkaSerializer serializer){ + this.kafkaUtils = kafkaUtils; + this.topic = topic; + this.serializer = serializer; + } + + @Override + public boolean process(ContentEvent event) { + try { + kafkaUtils.sendKafkaMessage(topic, serializer.serialize(event)); + } catch (Exception ex) { + Logger.getLogger(KafkaEntranceProcessor.class.getName()).log(Level.SEVERE, null, ex); + return false; + } + return true; + } + + @Override + public void onCreate(int id) { + kafkaUtils.initializeProducer(); + } + + @Override + public Processor newProcessor(Processor processor) { + KafkaDestinationProcessor kdp = (KafkaDestinationProcessor)processor; + return new KafkaDestinationProcessor(new KafkaUtils(kdp.kafkaUtils), kdp.topic, kdp.serializer); + } + +} diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java index c87b2f14..24783d41 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java @@ -24,6 +24,7 @@ import org.apache.kafka.clients.consumer.ConsumerRecords; import org.apache.kafka.clients.consumer.KafkaConsumer; import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.ProducerRecord; /** * Internal class responsible for Kafka Stream handling (both consume and produce) @@ -76,6 +77,13 @@ public void initializeConsumer(Collection topics) { consumer.subscribe(topics); } + public void initializeProducer(){ + // lazy instantiation + if(producer==null){ + producer = new KafkaProducer<>(producerProperties); + } + } + /** * Method for reading new messages from Kafka topics * @return Collection of read messages @@ -104,4 +112,11 @@ private List getMessagesBytes(ConsumerRecords poll) { } return ret; } + + public void sendKafkaMessage(String topic, byte[] message){ + if(producer!=null){ + producer.send(new ProducerRecord(topic, message)); + producer.flush(); + } + } } From 2e6d502a2909731cb9859ba3403407e6a33a2fc2 Mon Sep 17 00:00:00 2001 From: pwawrzyniak Date: Fri, 24 Mar 2017 14:34:49 +0100 Subject: [PATCH 06/17] Tests for kafkaUtils and KafkaEntranceProcessor, minor changes in classes --- .gitignore | 31 +- samoa-api/pom.xml | 289 ++++++++++-------- .../streams/kafka/KafkaDeserializer.java | 23 +- .../kafka/KafkaDestinationProcessor.java | 23 +- .../streams/kafka/KafkaEntranceProcessor.java | 28 ++ .../samoa/streams/kafka/KafkaJsonMapper.java | 41 ++- .../samoa/streams/kafka/KafkaSerializer.java | 23 +- .../samoa/streams/kafka/KafkaUtils.java | 79 ++++- .../kafka/KafkaEntranceProcessorTest.java | 212 +++++++++++++ .../samoa/streams/kafka/KafkaUtilsTest.java | 235 ++++++++++++++ .../streams/kafka/TestUtilsForKafka.java | 132 ++++++++ 11 files changed, 945 insertions(+), 171 deletions(-) create mode 100644 samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorTest.java create mode 100644 samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaUtilsTest.java create mode 100644 samoa-api/src/test/java/org/apache/samoa/streams/kafka/TestUtilsForKafka.java diff --git a/.gitignore b/.gitignore index 294c7185..a834232f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,15 +1,16 @@ -#maven -target/ - -#eclipse -.classpath -.project -.settings/ - -#DS_Store -.DS_Store - -#intellij -.idea/ -*.iml -*.iws +#maven +target/ + +#eclipse +.classpath +.project +.settings/ + +#DS_Store +.DS_Store + +#intellij +.idea/ +*.iml +*.iws +/samoa-api/nbproject/ \ No newline at end of file diff --git a/samoa-api/pom.xml b/samoa-api/pom.xml index 4621b931..2b7bd22d 100644 --- a/samoa-api/pom.xml +++ b/samoa-api/pom.xml @@ -1,135 +1,154 @@ - - - - - 4.0.0 - - UTF-8 - - - samoa-api - API and algorithms for SAMOA - - samoa-api - - org.apache.samoa - samoa - 0.5.0-incubating-SNAPSHOT - - - - - com.yammer.metrics - metrics-core - ${metrics-core.version} - - - - net.jcip - jcip-annotations - ${jcip-annotations.version} - - - - org.apache.commons - commons-lang3 - ${commons-lang3.version} - - - - com.github.javacliparser - javacliparser - ${javacliparser.version} - - - - org.apache.samoa - samoa-instances - ${project.version} - - - - com.google.guava - guava - ${guava.version} - - - - com.esotericsoftware.kryo - kryo - ${kryo.version} - - - - com.dreizak - miniball - ${miniball.version} - - - - org.apache.hadoop - hadoop-common - ${hadoop.version} - - - org.apache.hadoop - hadoop-hdfs - ${hadoop.version} - - - org.apache.hadoop - hadoop-minicluster - ${hadoop.version} - test - - - - org.apache.kafka - kafka-clients - 0.10.2.0 - - - - - - - org.apache.maven.plugins - maven-dependency-plugin - ${maven-dependency-plugin.version} - - - copy-dependencies - package - - copy-dependencies - - - ${project.build.directory}/lib - false - false - true - - - - - - - + + + + + 4.0.0 + + UTF-8 + + + samoa-api + API and algorithms for SAMOA + + samoa-api + + org.apache.samoa + samoa + 0.5.0-incubating-SNAPSHOT + + + + + com.yammer.metrics + metrics-core + ${metrics-core.version} + + + + net.jcip + jcip-annotations + ${jcip-annotations.version} + + + + org.apache.commons + commons-lang3 + ${commons-lang3.version} + + + + com.github.javacliparser + javacliparser + ${javacliparser.version} + + + + org.apache.samoa + samoa-instances + ${project.version} + + + + com.google.guava + guava + ${guava.version} + + + + com.esotericsoftware.kryo + kryo + ${kryo.version} + + + + com.dreizak + miniball + ${miniball.version} + + + + org.apache.hadoop + hadoop-common + ${hadoop.version} + + + org.apache.hadoop + hadoop-hdfs + ${hadoop.version} + + + org.apache.hadoop + hadoop-minicluster + ${hadoop.version} + test + + + + org.apache.kafka + kafka-clients + 0.10.2.0 + + + org.apache.kafka + kafka-clients + 0.10.2.0 + test + test + + + org.apache.kafka + kafka_2.11 + 0.10.2.0 + + + org.apache.kafka + kafka_2.11 + 0.10.2.0 + test + test + + + + + + + org.apache.maven.plugins + maven-dependency-plugin + ${maven-dependency-plugin.version} + + + copy-dependencies + package + + copy-dependencies + + + ${project.build.directory}/lib + false + false + true + + + + + + + diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDeserializer.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDeserializer.java index b85ec1fa..7b11cbd4 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDeserializer.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDeserializer.java @@ -13,7 +13,28 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.samoa.streams.kafka; +package org.apache.samoa.streams.kafka; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2017 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + import org.apache.samoa.core.ContentEvent; diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessor.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessor.java index 5632b6e1..67dfbaaf 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessor.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessor.java @@ -13,7 +13,28 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.samoa.streams.kafka; +package org.apache.samoa.streams.kafka; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2017 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + import java.util.Properties; import java.util.logging.Level; diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessor.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessor.java index d0a4c0d6..2b0b808c 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessor.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessor.java @@ -15,6 +15,27 @@ */ package org.apache.samoa.streams.kafka; +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2017 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + + import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -97,4 +118,11 @@ public Processor newProcessor(Processor processor) { KafkaEntranceProcessor kep = (KafkaEntranceProcessor) processor; return new KafkaEntranceProcessor(new KafkaUtils(kep.kafkaUtils), kep.deserializer, kep.topic); } + + @Override + protected void finalize() throws Throwable { + kafkaUtils.closeConsumer(); + super.finalize(); //To change body of generated methods, choose Tools | Templates. + } + } diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaJsonMapper.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaJsonMapper.java index 6ede4475..1996b40a 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaJsonMapper.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaJsonMapper.java @@ -15,12 +15,40 @@ */ package org.apache.samoa.streams.kafka; +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2017 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + + import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.gson.InstanceCreator; +import java.lang.reflect.Type; import java.nio.charset.Charset; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.apache.samoa.instances.InstanceData; +import org.apache.samoa.instances.SingleClassInstanceData; import org.apache.samoa.learners.InstanceContentEvent; /** - * Sample class for serializing and deserializing InsatnceContentEvent from/to JSON format + * Sample class for serializing and deserializing {@link InstanceContentEvent} from/to JSON format * @author pwawrzyniak * @version 0.5.0-incubating-SNAPSHOT * @since 0.5.0-incubating @@ -35,7 +63,7 @@ public class KafkaJsonMapper implements KafkaDeserializer, * @param charset Charset to be used for bytes parsing */ public KafkaJsonMapper(Charset charset){ - this.gson = new Gson(); + this.gson = new GsonBuilder().registerTypeAdapter(InstanceData.class, new InstanceDataCreator()).create(); this.charset = charset; } @@ -49,4 +77,13 @@ public byte[] serialize(InstanceContentEvent message) { return gson.toJson(message).getBytes(this.charset); } + public class InstanceDataCreator implements InstanceCreator{ + + @Override + public InstanceData createInstance(Type type) { + return new SingleClassInstanceData(); + } + + } + } diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaSerializer.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaSerializer.java index a8cc0b86..ad6bd8e6 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaSerializer.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaSerializer.java @@ -13,7 +13,28 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.samoa.streams.kafka; +package org.apache.samoa.streams.kafka; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2017 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + import org.apache.samoa.core.ContentEvent; diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java index 24783d41..f5227d3f 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java @@ -15,19 +15,45 @@ */ package org.apache.samoa.streams.kafka; +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2017 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.Properties; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.logging.Level; import org.apache.kafka.clients.consumer.ConsumerRecord; import org.apache.kafka.clients.consumer.ConsumerRecords; import org.apache.kafka.clients.consumer.KafkaConsumer; import org.apache.kafka.clients.producer.KafkaProducer; import org.apache.kafka.clients.producer.ProducerRecord; +import java.util.logging.Level; +import java.util.logging.Logger; /** - * Internal class responsible for Kafka Stream handling (both consume and produce) + * Internal class responsible for Kafka Stream handling (both consume and + * produce) * * @author pwawrzyniak * @version 0.5.0-incubating-SNAPSHOT @@ -36,24 +62,25 @@ class KafkaUtils { // Consumer class for internal use to retrieve messages from Kafka - private KafkaConsumer consumer; + private transient KafkaConsumer consumer; - private KafkaProducer producer; + private transient KafkaProducer producer; // Properties of the consumer, as defined in Kafka documentation private final Properties consumerProperties; private final Properties producerProperties; // Timeout for Kafka Consumer - private int consumerTimeout; + private long consumerTimeout; /** * Class constructor + * * @param consumerProperties Properties of consumer * @param producerProperties Properties of producer * @param consumerTimeout Timeout for consumer poll requests */ - public KafkaUtils(Properties consumerProperties, Properties producerProperties, int consumerTimeout) { + public KafkaUtils(Properties consumerProperties, Properties producerProperties, long consumerTimeout) { this.consumerProperties = consumerProperties; this.producerProperties = producerProperties; this.consumerTimeout = consumerTimeout; @@ -66,7 +93,9 @@ public KafkaUtils(Properties consumerProperties, Properties producerProperties, } /** - * Method used to initialize Kafka Consumer, i.e. instantiate it and subscribe to configured topic + * Method used to initialize Kafka Consumer, i.e. instantiate it and + * subscribe to configured topic + * * @param topics List of Kafka topics that consumer should subscribe to */ public void initializeConsumer(Collection topics) { @@ -75,19 +104,29 @@ public void initializeConsumer(Collection topics) { consumer = new KafkaConsumer<>(consumerProperties); } consumer.subscribe(topics); +// consumer.seekToBeginning(consumer.assignment()); + } + + public void closeConsumer() { + if (consumer != null) { + consumer.unsubscribe(); + consumer.close(); + } } - public void initializeProducer(){ + public void initializeProducer() { // lazy instantiation - if(producer==null){ + if (producer == null) { producer = new KafkaProducer<>(producerProperties); - } + } } - + /** * Method for reading new messages from Kafka topics + * * @return Collection of read messages - * @throws Exception Exception is thrown when consumer was not initialized or is not subscribed to any topic. + * @throws Exception Exception is thrown when consumer was not initialized + * or is not subscribed to any topic. */ public List getKafkaMessages() throws Exception { @@ -107,16 +146,24 @@ public List getKafkaMessages() throws Exception { private List getMessagesBytes(ConsumerRecords poll) { Iterator> iterator = poll.iterator(); List ret = new ArrayList<>(); - while(iterator.hasNext()){ + while (iterator.hasNext()) { ret.add(iterator.next().value()); } return ret; } - - public void sendKafkaMessage(String topic, byte[] message){ - if(producer!=null){ - producer.send(new ProducerRecord(topic, message)); + + public long sendKafkaMessage(String topic, byte[] message) { + if (producer != null) { + try{ + ProducerRecord record = new ProducerRecord(topic, message); + long offset = producer.send(record).get(10, TimeUnit.SECONDS).offset(); producer.flush(); + return offset; + } catch(InterruptedException | ExecutionException | TimeoutException e){ + Logger.getLogger(KafkaUtils.class.getName()).log(Level.SEVERE, null, e); + } + } + return -1; } } diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorTest.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorTest.java new file mode 100644 index 00000000..2a92a31a --- /dev/null +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorTest.java @@ -0,0 +1,212 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.samoa.streams.kafka; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2017 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +import com.google.gson.Gson; +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.Properties; +import java.util.Random; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.logging.Level; +import java.util.logging.Logger; +import mockit.Mocked; +import mockit.Tested; +import mockit.Expectations; +import org.apache.samoa.core.ContentEvent; +import org.apache.samoa.core.Processor; +import org.apache.samoa.learners.InstanceContentEvent; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import static org.junit.Assert.*; +import kafka.admin.AdminUtils; +import kafka.admin.RackAwareMode; +import kafka.server.KafkaConfig; +import kafka.server.KafkaServer; +import kafka.utils.MockTime; +import kafka.utils.TestUtils; +import org.apache.kafka.common.utils.Time; +import kafka.utils.ZKStringSerializer$; +import kafka.utils.ZkUtils; +import kafka.zk.EmbeddedZookeeper; +import org.I0Itec.zkclient.ZkClient; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.samoa.instances.Attribute; +import org.apache.samoa.instances.DenseInstance; +import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.moa.core.FastVector; +import org.apache.samoa.moa.core.InstanceExample; +import org.apache.samoa.streams.InstanceStream; + +/** + * + * @author pwawrzyniak + */ +public class KafkaEntranceProcessorTest { + +// @Tested +// private KafkaEntranceProcessor kep; + private static final String ZKHOST = "127.0.0.1"; + private static final String BROKERHOST = "127.0.0.1"; + private static final String BROKERPORT = "9092"; + private static final String TOPIC = "test"; + private static final int NUM_INSTANCES = 500; + + + private static KafkaServer kafkaServer; + private static EmbeddedZookeeper zkServer; + private static ZkClient zkClient; + private static String zkConnect; + + + public KafkaEntranceProcessorTest() { + } + + @BeforeClass + public static void setUpClass() throws IOException { + // setup Zookeeper + zkServer = new EmbeddedZookeeper(); + zkConnect = ZKHOST + ":" + zkServer.port(); + zkClient = new ZkClient(zkConnect, 30000, 30000, ZKStringSerializer$.MODULE$); + ZkUtils zkUtils = ZkUtils.apply(zkClient, false); + + // setup Broker + Properties brokerProps = new Properties(); + brokerProps.setProperty("zookeeper.connect", zkConnect); + brokerProps.setProperty("broker.id", "0"); + brokerProps.setProperty("log.dirs", Files.createTempDirectory("kafka-").toAbsolutePath().toString()); + brokerProps.setProperty("listeners", "PLAINTEXT://" + BROKERHOST + ":" + BROKERPORT); + KafkaConfig config = new KafkaConfig(brokerProps); + Time mock = new MockTime(); + kafkaServer = TestUtils.createServer(config, mock); + + // create topic + AdminUtils.createTopic(zkUtils, TOPIC, 1, 1, new Properties(), RackAwareMode.Disabled$.MODULE$); + + } + + @AfterClass + public static void tearDownClass() { + kafkaServer.shutdown(); + zkClient.close(); + zkServer.shutdown(); + } + + @Before + public void setUp() throws IOException { + + } + + @After + public void tearDown() { + + } + + @Test + public void testFetchingNewData() throws InterruptedException, ExecutionException, TimeoutException { + + Logger logger = Logger.getLogger(KafkaEntranceProcessorTest.class.getName()); + Properties props = TestUtilsForKafka.getConsumerProperties(); + props.setProperty("auto.offset.reset", "earliest"); + KafkaEntranceProcessor kep = new KafkaEntranceProcessor(props, TOPIC, 10000, new KafkaJsonMapper(Charset.defaultCharset())); + kep.onCreate(1); + +// prepare new thread for data producing + Thread th = new Thread(new Runnable() { + @Override + public void run() { + KafkaProducer producer = new KafkaProducer<>(TestUtilsForKafka.getProducerProperties()); + + Random r = new Random(); + InstancesHeader header = TestUtilsForKafka.generateHeader(10); + Gson gson = new Gson(); + int i = 0; + for (i = 0; i < NUM_INSTANCES; i++) { + try { + ProducerRecord record = new ProducerRecord(TOPIC, gson.toJson(TestUtilsForKafka.getData(r, 10, header)).getBytes()); + long stat = producer.send(record).get(10, TimeUnit.DAYS).offset(); + Thread.sleep(5); + Logger.getLogger(KafkaEntranceProcessorTest.class.getName()).log(Level.INFO, "Sent message with ID={0} to Kafka!, offset={1}", new Object[]{i, stat}); + } catch (InterruptedException | ExecutionException | TimeoutException ex) { + Logger.getLogger(KafkaEntranceProcessorTest.class.getName()).log(Level.SEVERE, null, ex); + } + } + producer.flush(); + producer.close(); + } + }); + th.start(); + + int z = 0; + while (kep.hasNext() && z < NUM_INSTANCES) { + logger.log(Level.INFO, "{0} {1}", new Object[]{z++, kep.nextEvent().toString()}); + } + + assertEquals("Number of sent and received instances", NUM_INSTANCES, z); + + + } + +// private Properties getProducerProperties() { +// Properties producerProps = new Properties(); +//// props.setProperty("zookeeper.connect", zkConnect); +// producerProps.setProperty("bootstrap.servers", BROKERHOST + ":" + BROKERPORT); +// producerProps.setProperty("key.serializer", "org.apache.kafka.common.serialization.StringSerializer"); +// producerProps.setProperty("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer"); +//// producerProps.setProperty("group.id", "test"); +// return producerProps; +// } +// +// private Properties getConsumerProperties() { +// Properties consumerProps = new Properties(); +// consumerProps.setProperty("bootstrap.servers", BROKERHOST + ":" + BROKERPORT); +// consumerProps.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); +// consumerProps.setProperty("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer"); +//// consumerProps.setProperty("group.id", "test"); +// consumerProps.setProperty("group.id", "group0"); +// consumerProps.setProperty("client.id", "consumer0"); +// return consumerProps; + +} diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaUtilsTest.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaUtilsTest.java new file mode 100644 index 00000000..4cd51356 --- /dev/null +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaUtilsTest.java @@ -0,0 +1,235 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.samoa.streams.kafka; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2017 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + + +import com.google.gson.Gson; +import java.io.IOException; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.Properties; +import java.util.Random; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.logging.Level; +import java.util.logging.Logger; +import kafka.admin.AdminUtils; +import kafka.admin.RackAwareMode; +import kafka.server.KafkaConfig; +import kafka.server.KafkaServer; +import kafka.utils.MockTime; +import kafka.utils.TestUtils; +import kafka.utils.ZKStringSerializer$; +import kafka.utils.ZkUtils; +import kafka.zk.EmbeddedZookeeper; +import org.I0Itec.zkclient.ZkClient; +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.common.utils.Time; +import org.apache.samoa.instances.InstancesHeader; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import static org.junit.Assert.*; + +/** + * + * @author pwawrzyniak + */ +public class KafkaUtilsTest { + + private static final String ZKHOST = "127.0.0.1"; + private static final String BROKERHOST = "127.0.0.1"; + private static final String BROKERPORT = "9092"; + private static final String TOPIC_R = "test-r"; + private static final String TOPIC_S = "test-s"; + + private static KafkaServer kafkaServer; + private static EmbeddedZookeeper zkServer; + private static ZkClient zkClient; + private static String zkConnect; + + private Logger logger = Logger.getLogger(KafkaUtilsTest.class.getCanonicalName()); + private long CONSUMER_TIMEOUT = 1000; + + public KafkaUtilsTest() { + } + + @BeforeClass + public static void setUpClass() throws IOException { + // setup Zookeeper + zkServer = new EmbeddedZookeeper(); + zkConnect = ZKHOST + ":" + zkServer.port(); + zkClient = new ZkClient(zkConnect, 30000, 30000, ZKStringSerializer$.MODULE$); + ZkUtils zkUtils = ZkUtils.apply(zkClient, false); + + // setup Broker + Properties brokerProps = new Properties(); + brokerProps.setProperty("zookeeper.connect", zkConnect); + brokerProps.setProperty("broker.id", "0"); + brokerProps.setProperty("log.dirs", Files.createTempDirectory("kafkaUtils-").toAbsolutePath().toString()); + brokerProps.setProperty("listeners", "PLAINTEXT://" + BROKERHOST + ":" + BROKERPORT); + KafkaConfig config = new KafkaConfig(brokerProps); + Time mock = new MockTime(); + kafkaServer = TestUtils.createServer(config, mock); + + // create topics + AdminUtils.createTopic(zkUtils, TOPIC_R, 1, 1, new Properties(), RackAwareMode.Disabled$.MODULE$); + AdminUtils.createTopic(zkUtils, TOPIC_S, 1, 1, new Properties(), RackAwareMode.Disabled$.MODULE$); + + } + + @AfterClass + public static void tearDownClass() { + kafkaServer.shutdown(); + zkClient.close(); + zkServer.shutdown(); + } + + @Before + public void setUp() { + } + + @After + public void tearDown() { + } + + /** + * Test of initializeConsumer method, of class KafkaUtils. + */ + @Test + public void testInitializeConsumer() throws Exception { + logger.log(Level.INFO, "initializeConsumer"); + Collection topics = Arrays.asList(TOPIC_R); + KafkaUtils instance = new KafkaUtils(TestUtilsForKafka.getConsumerProperties(), TestUtilsForKafka.getProducerProperties(), CONSUMER_TIMEOUT); + assertNotNull(instance); + + instance.initializeConsumer(topics); + + assertNotNull(instance.getKafkaMessages()); + instance.closeConsumer(); + } + + /** + * Test of getKafkaMessages method, of class KafkaUtils. + */ + @Test + public void testGetKafkaMessages() throws Exception { + logger.log(Level.INFO, "getKafkaMessages"); + Collection topics = Arrays.asList(TOPIC_R); + KafkaUtils instance = new KafkaUtils(TestUtilsForKafka.getConsumerProperties(), TestUtilsForKafka.getProducerProperties(), CONSUMER_TIMEOUT); + assertNotNull(instance); + + logger.log(Level.INFO, "Initialising consumer"); + instance.initializeConsumer(topics); + + logger.log(Level.INFO, "Produce data"); + List expResult = sendAndGetMessages(500); + + logger.log(Level.INFO, "Get results from Kafka"); + List result = instance.getKafkaMessages(); + + assertArrayEquals(expResult.toArray(), result.toArray()); + instance.closeConsumer(); + } + + private List sendAndGetMessages(int maxNum) throws InterruptedException, ExecutionException, TimeoutException { + List ret; + try (KafkaProducer producer = new KafkaProducer<>(TestUtilsForKafka.getProducerProperties("sendM-test"))) { + ret = new ArrayList<>(); + Random r = new Random(); + InstancesHeader header = TestUtilsForKafka.generateHeader(10); + Gson gson = new Gson(); + int i = 0; + for (i = 0; i < maxNum; i++) { + ProducerRecord record = new ProducerRecord(TOPIC_R, gson.toJson(TestUtilsForKafka.getData(r, 10, header)).getBytes()); + ret.add(record.value()); + producer.send(record); + } producer.flush(); + } + return ret; + } + + /** + * Test of sendKafkaMessage method, of class KafkaUtils. + */ + @Test + public void testSendKafkaMessage() { + logger.log(Level.INFO, "sendKafkaMessage"); + + logger.log(Level.INFO, "Initialising producer"); + KafkaUtils instance = new KafkaUtils(TestUtilsForKafka.getConsumerProperties(), TestUtilsForKafka.getProducerProperties("rcv-test"), CONSUMER_TIMEOUT); + instance.initializeProducer(); + + logger.log(Level.INFO, "Initialising consumer"); + KafkaConsumer consumer; + consumer = new KafkaConsumer<>(TestUtilsForKafka.getConsumerProperties()); + consumer.subscribe(Arrays.asList(TOPIC_S)); + + logger.log(Level.INFO, "Produce data"); + List sent = new ArrayList<>(); + Random r = new Random(); + InstancesHeader header = TestUtilsForKafka.generateHeader(10); + Gson gson = new Gson(); + for (int i = 0; i < 500; i++) { + byte[] val = gson.toJson(TestUtilsForKafka.getData(r, 10, header)).getBytes(); + sent.add(val); + instance.sendKafkaMessage(TOPIC_S, val); + } + + logger.log(Level.INFO, "Get results from Kafka"); + ConsumerRecords records = consumer.poll(CONSUMER_TIMEOUT); + Iterator> it = records.iterator(); + List consumed = new ArrayList<>(); + while (it.hasNext()) { + consumed.add(it.next().value()); + } + consumer.close(); + + assertArrayEquals(sent.toArray(), consumed.toArray()); + } + +} diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/TestUtilsForKafka.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/TestUtilsForKafka.java new file mode 100644 index 00000000..0d30429e --- /dev/null +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/TestUtilsForKafka.java @@ -0,0 +1,132 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.samoa.streams.kafka; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2017 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + + +import java.util.Properties; +import java.util.Random; +import org.apache.samoa.instances.Attribute; +import org.apache.samoa.instances.DenseInstance; +import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.Instances; +import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.learners.InstanceContentEvent; +import org.apache.samoa.moa.core.FastVector; + +/** + * + * @author pwawrzyniak + */ +public class TestUtilsForKafka { + + private static final String ZKHOST = "127.0.0.1"; + private static final String BROKERHOST = "127.0.0.1"; + private static final String BROKERPORT = "9092"; + private static final String TOPIC = "test"; + + protected static InstanceContentEvent getData(Random instanceRandom, int numAtts, InstancesHeader header) { + double[] attVals = new double[numAtts + 1]; + double sum = 0.0; + double sumWeights = 0.0; + for (int i = 0; i < numAtts; i++) { + attVals[i] = instanceRandom.nextDouble(); +// sum += this.weights[i] * attVals[i]; +// sumWeights += this.weights[i]; + } + int classLabel; + if (sum >= sumWeights * 0.5) { + classLabel = 1; + } else { + classLabel = 0; + } + + Instance inst = new DenseInstance(1.0, attVals); + inst.setDataset(header); + inst.setClassValue(classLabel); + + return new InstanceContentEvent(0, inst, true, false); + } + + @SuppressWarnings({"rawtypes", "unchecked"}) + protected static InstancesHeader generateHeader(int numAttributes) { + FastVector attributes = new FastVector(); + for (int i = 0; i < numAttributes; i++) { + attributes.addElement(new Attribute("att" + (i + 1))); + } + + FastVector classLabels = new FastVector(); + for (int i = 0; i < numAttributes; i++) { + classLabels.addElement("class" + (i + 1)); + } + attributes.addElement(new Attribute("class", classLabels)); + InstancesHeader streamHeader = new InstancesHeader(new Instances("test-kafka", attributes, 0)); + streamHeader.setClassIndex(streamHeader.numAttributes() - 1); + return streamHeader; + } + + + protected static Properties getProducerProperties() { + return getProducerProperties("test"); + } + + /** + * + * @param clientId + * @return + */ + protected static Properties getProducerProperties(String clientId) { + Properties producerProps = new Properties(); + producerProps.setProperty("bootstrap.servers", BROKERHOST + ":" + BROKERPORT); + producerProps.setProperty("key.serializer", "org.apache.kafka.common.serialization.StringSerializer"); + producerProps.setProperty("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer"); + producerProps.setProperty("group.id", "test"); + producerProps.setProperty("client.id", clientId); + return producerProps; + } + + protected static Properties getConsumerProperties() { + Properties consumerProps = new Properties(); + consumerProps.setProperty("bootstrap.servers", BROKERHOST + ":" + BROKERPORT); + consumerProps.put("enable.auto.commit", "true"); + consumerProps.put("auto.commit.interval.ms", "1000"); + consumerProps.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); + consumerProps.setProperty("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer"); + consumerProps.setProperty("group.id", "test"); + consumerProps.setProperty("auto.offset.reset", "earliest"); +// consumerProps.setProperty("client.id", "consumer0"); + return consumerProps; + } +} From 208febc0524ddc328ae1a785f0642650173f7101 Mon Sep 17 00:00:00 2001 From: pwawrzyniak Date: Tue, 11 Apr 2017 16:44:30 +0200 Subject: [PATCH 07/17] Tests for KafkaDestinationProcessor, minor changes in classes --- .../kafka/KafkaDestinationProcessor.java | 50 +++--- .../samoa/streams/kafka/KafkaUtils.java | 6 + .../kafka/KafkaDestinationProcessorTest.java | 167 ++++++++++++++++++ 3 files changed, 201 insertions(+), 22 deletions(-) create mode 100644 samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessorTest.java diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessor.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessor.java index 67dfbaaf..420d43ce 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessor.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessor.java @@ -13,28 +13,28 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.samoa.streams.kafka; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2017 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - +package org.apache.samoa.streams.kafka; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2017 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + import java.util.Properties; import java.util.logging.Level; @@ -50,6 +50,12 @@ */ public class KafkaDestinationProcessor implements Processor { + @Override + protected void finalize() throws Throwable { + super.finalize(); + kafkaUtils.closeProducer(); + } + private final KafkaUtils kafkaUtils; private final String topic; private final KafkaSerializer serializer; diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java index f5227d3f..06358776 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java @@ -121,6 +121,12 @@ public void initializeProducer() { } } + public void closeProducer(){ + if(producer != null){ + producer.close(1, TimeUnit.MINUTES); + } + } + /** * Method for reading new messages from Kafka topics * diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessorTest.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessorTest.java new file mode 100644 index 00000000..a138763c --- /dev/null +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessorTest.java @@ -0,0 +1,167 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.samoa.streams.kafka; + +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.file.Files; +import java.util.Arrays; +import java.util.Iterator; +import java.util.Properties; +import java.util.Random; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeoutException; +import java.util.logging.Level; +import java.util.logging.Logger; +import kafka.admin.AdminUtils; +import kafka.admin.RackAwareMode; +import kafka.server.KafkaConfig; +import kafka.server.KafkaServer; +import kafka.utils.MockTime; +import kafka.utils.TestUtils; +import kafka.utils.ZKStringSerializer$; +import kafka.utils.ZkUtils; +import kafka.zk.EmbeddedZookeeper; +import org.I0Itec.zkclient.ZkClient; +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.common.utils.Time; +import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.learners.InstanceContentEvent; +import org.junit.After; +import org.junit.AfterClass; +import static org.junit.Assert.*; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * + * @author pwawrzyniak + */ +public class KafkaDestinationProcessorTest { + + private static final String ZKHOST = "127.0.0.1"; + private static final String BROKERHOST = "127.0.0.1"; + private static final String BROKERPORT = "9092"; + private static final String TOPIC = "test-kdp"; + private static final int NUM_INSTANCES = 500; + private static final int CONSUMER_TIMEOUT = 1000; + + private static KafkaServer kafkaServer; + private static EmbeddedZookeeper zkServer; + private static ZkClient zkClient; + private static String zkConnect; + + public KafkaDestinationProcessorTest() { + } + + @BeforeClass + public static void setUpClass() throws IOException { + // setup Zookeeper + zkServer = new EmbeddedZookeeper(); + zkConnect = ZKHOST + ":" + zkServer.port(); + zkClient = new ZkClient(zkConnect, 30000, 30000, ZKStringSerializer$.MODULE$); + ZkUtils zkUtils = ZkUtils.apply(zkClient, false); + + // setup Broker + Properties brokerProps = new Properties(); + brokerProps.setProperty("zookeeper.connect", zkConnect); + brokerProps.setProperty("broker.id", "0"); + brokerProps.setProperty("log.dirs", Files.createTempDirectory("kafka-").toAbsolutePath().toString()); + brokerProps.setProperty("listeners", "PLAINTEXT://" + BROKERHOST + ":" + BROKERPORT); + KafkaConfig config = new KafkaConfig(brokerProps); + Time mock = new MockTime(); + kafkaServer = TestUtils.createServer(config, mock); + + // create topic + AdminUtils.createTopic(zkUtils, TOPIC, 1, 1, new Properties(), RackAwareMode.Disabled$.MODULE$); + + } + + @AfterClass + public static void tearDownClass() { + kafkaServer.shutdown(); + zkClient.close(); + zkServer.shutdown(); + } + + @Before + public void setUp() throws IOException { + + } + + @After + public void tearDown() { + + } + + @Test + public void testSendingData() throws InterruptedException, ExecutionException, TimeoutException { + + final Logger logger = Logger.getLogger(KafkaDestinationProcessorTest.class.getName()); + Properties props = TestUtilsForKafka.getProducerProperties(); + props.setProperty("auto.offset.reset", "earliest"); + KafkaDestinationProcessor kdp = new KafkaDestinationProcessor(props, TOPIC, new KafkaJsonMapper(Charset.defaultCharset())); + kdp.onCreate(1); + + final int[] i = {0}; + +// prepare new thread for data receiveing + Thread th = new Thread(new Runnable() { + @Override + public void run() { + KafkaConsumer consumer = new KafkaConsumer<>(TestUtilsForKafka.getConsumerProperties()); + consumer.subscribe(Arrays.asList(TOPIC)); + while (i[0] < NUM_INSTANCES) { + try { + ConsumerRecords cr = consumer.poll(CONSUMER_TIMEOUT); + + Iterator> it = cr.iterator(); + while (it.hasNext()) { + ConsumerRecord record = it.next(); + logger.info(new String(record.value())); + logger.log(Level.INFO, "Current read offset is: {0}", record.offset()); + i[0]++; + } + + Thread.sleep(1); + + } catch (InterruptedException ex) { + Logger.getLogger(KafkaDestinationProcessorTest.class.getName()).log(Level.SEVERE, null, ex); + } + } + consumer.close(); + } + }); + th.start(); + + int z = 0; + Random r = new Random(); + InstancesHeader header = TestUtilsForKafka.generateHeader(10); + + for (z = 0; z < NUM_INSTANCES; z++) { + InstanceContentEvent event = TestUtilsForKafka.getData(r, 10, header); + kdp.process(event); + logger.log(Level.INFO, "{0} {1}", new Object[]{"Sent item with id: ", z}); + Thread.sleep(5); + } + // wait for all instances to be read + Thread.sleep(100); + assertEquals("Number of sent and received instances", z, i[0]); + } +} From ff6b84dc5403c102388646a31f120c11ee416063 Mon Sep 17 00:00:00 2001 From: Jakub Jankowski Date: Fri, 28 Apr 2017 17:08:29 +0200 Subject: [PATCH 08/17] Added Kafka Avro serializer with unit tests. Added Kafka task with unit test --- .../samoa/streams/kafka/KafkaAvroMapper.java | 158 ++++++++++++++++++ .../apache/samoa/streams/kafka/KafkaTask.java | 148 ++++++++++++++++ .../topology/SimpleComponentFactory.java | 53 ++++++ .../streams/kafka/topology/SimpleEngine.java | 37 ++++ .../SimpleEntranceProcessingItem.java | 33 ++++ .../kafka/topology/SimpleProcessingItem.java | 87 ++++++++++ .../streams/kafka/topology/SimpleStream.java | 95 +++++++++++ .../kafka/topology/SimpleTopology.java | 46 +++++ samoa-api/src/main/resources/kafka.avsc | 61 +++++++ .../kafka/KafkaEntranceProcessorTest.java | 74 ++++++-- .../samoa/streams/kafka/KafkaTaskTest.java | 138 +++++++++++++++ .../samoa/streams/kafka/KafkaUtilsTest.java | 24 +-- .../streams/kafka/TestUtilsForKafka.java | 25 ++- 13 files changed, 952 insertions(+), 27 deletions(-) create mode 100644 samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaAvroMapper.java create mode 100644 samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaTask.java create mode 100644 samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleComponentFactory.java create mode 100644 samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleEngine.java create mode 100644 samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleEntranceProcessingItem.java create mode 100644 samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleProcessingItem.java create mode 100644 samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleStream.java create mode 100644 samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleTopology.java create mode 100644 samoa-api/src/main/resources/kafka.avsc create mode 100644 samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaTaskTest.java diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaAvroMapper.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaAvroMapper.java new file mode 100644 index 00000000..91902d06 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaAvroMapper.java @@ -0,0 +1,158 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.samoa.streams.kafka; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.io.BinaryEncoder; +import org.apache.avro.io.DatumWriter; +import org.apache.avro.io.Decoder; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.io.Encoder; +import org.apache.avro.io.EncoderFactory; +import org.apache.avro.reflect.ReflectData; +import org.apache.avro.reflect.ReflectDatumWriter; +import org.apache.avro.specific.SpecificDatumReader; +import org.apache.avro.specific.SpecificDatumWriter; +import org.apache.avro.specific.SpecificRecord; +import org.apache.samoa.learners.InstanceContentEvent; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2017 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +/** + * Sample class for serializing and deserializing {@link InstanceContentEvent} + * from/to Avro format + * + * @author Jakub Jankowski + * @version 0.5.0-incubating-SNAPSHOT + * @since 0.5.0-incubating + */ +public class KafkaAvroMapper implements KafkaDeserializer, KafkaSerializer { + + private static Logger logger = LoggerFactory.getLogger(KafkaAvroMapper.class); + + @Override + public byte[] serialize(InstanceContentEvent message) { + return toBytesGeneric(InstanceContentEvent.class, message); + } + + @Override + public InstanceContentEvent deserialize(byte[] message) { + return avroDeserialize(message, InstanceContentEvent.class, null); + } + + public static byte[] avroSerialize(Class clazz, Object object) { + byte[] ret = null; + try { + if (object == null || !(object instanceof SpecificRecord)) { + return null; + } + + T record = (T) object; + ByteArrayOutputStream out = new ByteArrayOutputStream(); + Encoder e = EncoderFactory.get().directBinaryEncoder(out, null); + SpecificDatumWriter w = new SpecificDatumWriter(clazz); + w.write(record, e); + e.flush(); + ret = out.toByteArray(); + } catch (IOException e) { + + } + + return ret; + } + + public static T avroDeserialize(byte[] avroBytes, Class clazz, Schema schema) { + T ret = null; + try { + ByteArrayInputStream in = new ByteArrayInputStream(avroBytes); + Decoder d = DecoderFactory.get().directBinaryDecoder(in, null); + SpecificDatumReader reader = new SpecificDatumReader(clazz); + ret = reader.read(null, d); + } catch (IOException e) { + + } + + return ret; + } + + public static byte[] toBytesGeneric(final Class cls, final V v) { + final ByteArrayOutputStream bout = new ByteArrayOutputStream(); + final Schema schema = ReflectData.AllowNull.get().getSchema(cls); + final DatumWriter writer = new ReflectDatumWriter(schema); + final BinaryEncoder binEncoder = EncoderFactory.get().binaryEncoder(bout, null); + try { + writer.write(v, binEncoder); + binEncoder.flush(); + } catch (final Exception e) { + throw new RuntimeException(e); + } + + return bout.toByteArray(); + } + + public static byte[] avroBurrSerialize(final Class cls, final V v) { + ByteArrayOutputStream bout = new ByteArrayOutputStream(); + try { + Schema schema = new Schema.Parser().parse(new File("C:/java/avro/kafka.avsc")); + DatumWriter writer; + + if (v instanceof SpecificRecord) { + writer = new SpecificDatumWriter<>(schema); + } else { + writer = new ReflectDatumWriter<>(schema); + } + + BinaryEncoder binEncoder = EncoderFactory.get().binaryEncoder(bout, null); + writer.write(v, binEncoder); + binEncoder.flush(); + + } catch (IOException e) { + e.printStackTrace(); + } catch (final Exception e) { + throw new RuntimeException(e); + } + + return bout.toByteArray(); + + } + +} diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaTask.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaTask.java new file mode 100644 index 00000000..26012f2f --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaTask.java @@ -0,0 +1,148 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.samoa.streams.kafka; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2017 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.Properties; + +import org.apache.samoa.tasks.Task; +import org.apache.samoa.topology.ComponentFactory; +import org.apache.samoa.topology.Stream; +import org.apache.samoa.topology.Topology; +import org.apache.samoa.topology.TopologyBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.github.javacliparser.Configurable; +import com.github.javacliparser.IntOption; +import com.github.javacliparser.StringOption; + +/** + * Kafka task + * + * @author Jakub Jankowski + * @version 0.5.0-incubating-SNAPSHOT + * @since 0.5.0-incubating + * + */ + +public class KafkaTask implements Task, Configurable { + + private static final long serialVersionUID = 3984474041982397855L; + private static Logger logger = LoggerFactory.getLogger(KafkaTask.class); + + //czy identyczne dla enterance i destination? + Properties producerProps; + Properties consumerProps; + int timeout; + private final KafkaDeserializer deserializer; + private final KafkaSerializer serializer; + private final String topic; + + private TopologyBuilder builder; + private Topology kafkaTopology; + + public IntOption kafkaParallelismOption = new IntOption("parallelismOption", 'p', + "Number of destination Processors", 1, 1, Integer.MAX_VALUE); + + public StringOption evaluationNameOption = new StringOption("evaluationName", 'n', "Identifier of the evaluation", + "KafkaTask" + new SimpleDateFormat("yyyyMMddHHmmss").format(new Date())); + + /** + * Class constructor + * @param props Properties of Kafka Producer and Consumer + * @see Kafka Producer configuration + * @see Kafka Consumer configuration + * @param topic Topic to which destination processor will write into + * @param timeout Timeout used when polling Kafka for new messages + * @param serializer Implementation of KafkaSerializer that handles arriving data serialization + * @param serializer Implementation of KafkaDeserializer that handles arriving data deserialization + */ + public KafkaTask(Properties producerProps, Properties consumerProps, String topic, int timeout, KafkaSerializer serializer, KafkaDeserializer deserializer) { + this.producerProps = producerProps; + this.consumerProps = consumerProps; + this.deserializer = deserializer; + this.serializer = serializer; + this.topic = topic; + this.timeout = timeout; + } + + @Override + public void init() { + logger.info("Invoking init"); + if (builder == null) { + builder = new TopologyBuilder(); + logger.info("Successfully instantiating TopologyBuilder"); + + builder.initTopology(evaluationNameOption.getValue()); + logger.info("Successfully initializing SAMOA topology with name {}", evaluationNameOption.getValue()); + } + + // create enterance processor + KafkaEntranceProcessor sourceProcessor = new KafkaEntranceProcessor(consumerProps, topic, timeout, deserializer); + builder.addEntranceProcessor(sourceProcessor); + + // create stream + Stream stream = builder.createStream(sourceProcessor); + + // create destination processor + KafkaDestinationProcessor destProcessor = new KafkaDestinationProcessor(producerProps, topic, serializer); + builder.addProcessor(destProcessor, kafkaParallelismOption.getValue()); + builder.connectInputShuffleStream(stream, destProcessor); + + // build topology + kafkaTopology = builder.build(); + logger.info("Successfully built the topology"); + } + + @Override + public Topology getTopology() { + return kafkaTopology; + } + + @Override + public void setFactory(ComponentFactory factory) { + logger.info("Invoking setFactory: "+factory.toString()); + builder = new TopologyBuilder(factory); + logger.info("Successfully instantiating TopologyBuilder"); + + builder.initTopology(evaluationNameOption.getValue()); + logger.info("Successfully initializing SAMOA topology with name {}", evaluationNameOption.getValue()); + + } + +} diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleComponentFactory.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleComponentFactory.java new file mode 100644 index 00000000..155ce1f4 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleComponentFactory.java @@ -0,0 +1,53 @@ +package org.apache.samoa.streams.kafka.topology; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.core.EntranceProcessor; +import org.apache.samoa.core.Processor; +import org.apache.samoa.topology.ComponentFactory; +import org.apache.samoa.topology.EntranceProcessingItem; +import org.apache.samoa.topology.IProcessingItem; +import org.apache.samoa.topology.ProcessingItem; +import org.apache.samoa.topology.Stream; +import org.apache.samoa.topology.Topology; + +public class SimpleComponentFactory implements ComponentFactory { + + public ProcessingItem createPi(Processor processor, int paralellism) { + return new SimpleProcessingItem(processor, paralellism); + } + + public ProcessingItem createPi(Processor processor) { + return this.createPi(processor, 1); + } + + public EntranceProcessingItem createEntrancePi(EntranceProcessor processor) { + return new SimpleEntranceProcessingItem(processor); + } + + public Stream createStream(IProcessingItem sourcePi) { + return new SimpleStream(sourcePi); + } + + public Topology createTopology(String topoName) { + return new SimpleTopology(topoName); + } +} diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleEngine.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleEngine.java new file mode 100644 index 00000000..d446018b --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleEngine.java @@ -0,0 +1,37 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ +package org.apache.samoa.streams.kafka.topology; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.topology.Topology; + +public class SimpleEngine { + + public static void submitTopology(Topology topology) { + SimpleTopology simpleTopology = (SimpleTopology) topology; + simpleTopology.run(); + // runs until completion + } + +} diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleEntranceProcessingItem.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleEntranceProcessingItem.java new file mode 100644 index 00000000..4c626dc2 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleEntranceProcessingItem.java @@ -0,0 +1,33 @@ +package org.apache.samoa.streams.kafka.topology; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.core.EntranceProcessor; +import org.apache.samoa.topology.LocalEntranceProcessingItem; + +class SimpleEntranceProcessingItem extends LocalEntranceProcessingItem { + public SimpleEntranceProcessingItem(EntranceProcessor processor) { + super(processor); + } + + // The default waiting time when there is no available events is 100ms + // Override waitForNewEvents() to change it +} diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleProcessingItem.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleProcessingItem.java new file mode 100644 index 00000000..3549b856 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleProcessingItem.java @@ -0,0 +1,87 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ +package org.apache.samoa.streams.kafka.topology; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.core.ContentEvent; +import org.apache.samoa.core.Processor; +import org.apache.samoa.topology.AbstractProcessingItem; +import org.apache.samoa.topology.IProcessingItem; +import org.apache.samoa.topology.ProcessingItem; +import org.apache.samoa.topology.Stream; +import org.apache.samoa.utils.PartitioningScheme; +import org.apache.samoa.utils.StreamDestination; + +/** + * + * @author abifet + */ +class SimpleProcessingItem extends AbstractProcessingItem { + private IProcessingItem[] arrayProcessingItem; + + SimpleProcessingItem(Processor processor) { + super(processor); + } + + SimpleProcessingItem(Processor processor, int parallelism) { + super(processor); + this.setParallelism(parallelism); + } + + public IProcessingItem getProcessingItem(int i) { + return arrayProcessingItem[i]; + } + + @Override + protected ProcessingItem addInputStream(Stream inputStream, PartitioningScheme scheme) { + StreamDestination destination = new StreamDestination(this, this.getParallelism(), scheme); + ((SimpleStream) inputStream).addDestination(destination); + return this; + } + + public SimpleProcessingItem copy() { + Processor processor = this.getProcessor(); + return new SimpleProcessingItem(processor.newProcessor(processor)); + } + + public void processEvent(ContentEvent event, int counter) { + + int parallelism = this.getParallelism(); + // System.out.println("Process event "+event+" (isLast="+event.isLastEvent()+") with counter="+counter+" while parallelism="+parallelism); + if (this.arrayProcessingItem == null && parallelism > 0) { + // Init processing elements, the first time they are needed + this.arrayProcessingItem = new IProcessingItem[parallelism]; + for (int j = 0; j < parallelism; j++) { + arrayProcessingItem[j] = this.copy(); + arrayProcessingItem[j].getProcessor().onCreate(j); + } + } + if (this.arrayProcessingItem != null) { + IProcessingItem pi = this.getProcessingItem(counter); + Processor p = pi.getProcessor(); + // System.out.println("PI="+pi+", p="+p); + this.getProcessingItem(counter).getProcessor().process(event); + } + } +} diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleStream.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleStream.java new file mode 100644 index 00000000..269e0cc4 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleStream.java @@ -0,0 +1,95 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ +package org.apache.samoa.streams.kafka.topology; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.LinkedList; +import java.util.List; + +import org.apache.commons.lang3.builder.HashCodeBuilder; +import org.apache.samoa.core.ContentEvent; +import org.apache.samoa.topology.AbstractStream; +import org.apache.samoa.topology.IProcessingItem; +import org.apache.samoa.utils.StreamDestination; + +/** + * + * @author abifet + */ +class SimpleStream extends AbstractStream { + private List destinations; + private int maxCounter; + private int eventCounter; + + SimpleStream(IProcessingItem sourcePi) { + super(sourcePi); + this.destinations = new LinkedList<>(); + this.eventCounter = 0; + this.maxCounter = 1; + } + + private int getNextCounter() { + if (maxCounter > 0 && eventCounter >= maxCounter) + eventCounter = 0; + this.eventCounter++; + return this.eventCounter; + } + + @Override + public void put(ContentEvent event) { + this.put(event, this.getNextCounter()); + } + + private void put(ContentEvent event, int counter) { + SimpleProcessingItem pi; + int parallelism; + for (StreamDestination destination : destinations) { + pi = (SimpleProcessingItem) destination.getProcessingItem(); + parallelism = destination.getParallelism(); + switch (destination.getPartitioningScheme()) { + case SHUFFLE: + pi.processEvent(event, counter % parallelism); + break; + case GROUP_BY_KEY: + HashCodeBuilder hb = new HashCodeBuilder(); + hb.append(event.getKey()); + int key = hb.build() % parallelism; + pi.processEvent(event, key); + break; + case BROADCAST: + for (int p = 0; p < parallelism; p++) { + pi.processEvent(event, p); + } + break; + } + } + } + + public void addDestination(StreamDestination destination) { + this.destinations.add(destination); + if (maxCounter <= 0) + maxCounter = 1; + maxCounter *= destination.getParallelism(); + } +} diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleTopology.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleTopology.java new file mode 100644 index 00000000..98dd7a5d --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleTopology.java @@ -0,0 +1,46 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ +package org.apache.samoa.streams.kafka.topology; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.topology.AbstractTopology; + +public class SimpleTopology extends AbstractTopology { + SimpleTopology(String name) { + super(name); + } + + public void run() { + if (this.getEntranceProcessingItems() == null) + throw new IllegalStateException("You need to set entrance PI before running the topology."); + if (this.getEntranceProcessingItems().size() != 1) + throw new IllegalStateException("SimpleTopology supports 1 entrance PI only. Number of entrance PIs is " + + this.getEntranceProcessingItems().size()); + + SimpleEntranceProcessingItem entrancePi = (SimpleEntranceProcessingItem) this.getEntranceProcessingItems() + .toArray()[0]; + entrancePi.getProcessor().onCreate(0); // id=0 as it is not used in simple mode + entrancePi.startSendingEvents(); + } +} diff --git a/samoa-api/src/main/resources/kafka.avsc b/samoa-api/src/main/resources/kafka.avsc new file mode 100644 index 00000000..c21e1534 --- /dev/null +++ b/samoa-api/src/main/resources/kafka.avsc @@ -0,0 +1,61 @@ +[ +{ + "type": "record", + "name": "InstanceData", + "fields": [ + ] +}, +{ + "type": "record", + "name": "SingleClassInstanceData", + "fields": [ + {"name":"classValue", "type": "double"} + ] +}, +{ + "type": "record", + "name": "DenseInstanceData", + "fields": [ + {"name":"attributeValues", "type": {"type": "array", "items": "double"}} + ] +}, +{ + "type": "record", + "name": "SparseInstanceData", + "fields": [ + {"name":"attributeValues", "type": {"type": "array", "items": "double"}}, + {"name":"indexValues", "type": {"type": "array", "items": "int"}}, + {"name":"numberAttributes", "type": "int"} + ] +}, +{ + "type": "record", + "name": "SerializableInstance", + "fields": [ + {"name": "weight", "type": "double"}, + {"name": "instanceData", "type": ["null", "InstanceData", "DenseInstanceData", "SparseInstanceData", "SingleClassInstanceData"]}, + {"name": "classData", "type": "InstanceData"} + ] +}, +{ + "type": "record", + "name": "InstanceContent", + "fields": [ + {"name": "instanceIndex", "type": "long"}, + {"name": "classifierIndex", "type": "int"}, + {"name": "evaluationIndex", "type": "int"}, + {"name":"instance", "type":"SerializableInstance"}, + {"name": "isTraining", "type": "boolean"}, + {"name": "isTesting", "type": "boolean"}, + {"name": "isLast", "type": "boolean"} + ] +}, +{ + "type": "record", + "name": "InstanceContentEvent", + "fields": [ + {"name": "instanceContent", "type": "InstanceContent"} + ] +} +] + diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorTest.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorTest.java index 2a92a31a..3da9d6f3 100644 --- a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorTest.java +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorTest.java @@ -56,6 +56,7 @@ import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; +import org.junit.Ignore; import org.junit.Test; import static org.junit.Assert.*; import kafka.admin.AdminUtils; @@ -84,15 +85,16 @@ * * @author pwawrzyniak */ +//@Ignore public class KafkaEntranceProcessorTest { // @Tested // private KafkaEntranceProcessor kep; - private static final String ZKHOST = "127.0.0.1"; - private static final String BROKERHOST = "127.0.0.1"; - private static final String BROKERPORT = "9092"; - private static final String TOPIC = "test"; - private static final int NUM_INSTANCES = 500; + private static final String ZKHOST = "10.255.251.202"; //10.255.251.202 + private static final String BROKERHOST = "10.255.251.214"; //10.255.251.214 + private static final String BROKERPORT = "6667"; //6667, local: 9092 + private static final String TOPIC = "samoa_test"; //samoa_test, local: test + private static final int NUM_INSTANCES = 50; private static KafkaServer kafkaServer; @@ -108,28 +110,28 @@ public KafkaEntranceProcessorTest() { public static void setUpClass() throws IOException { // setup Zookeeper zkServer = new EmbeddedZookeeper(); - zkConnect = ZKHOST + ":" + zkServer.port(); + zkConnect = ZKHOST + ":" + "2181"; //+ zkServer.port(); zkClient = new ZkClient(zkConnect, 30000, 30000, ZKStringSerializer$.MODULE$); ZkUtils zkUtils = ZkUtils.apply(zkClient, false); // setup Broker - Properties brokerProps = new Properties(); + /*Properties brokerProps = new Properties(); brokerProps.setProperty("zookeeper.connect", zkConnect); brokerProps.setProperty("broker.id", "0"); brokerProps.setProperty("log.dirs", Files.createTempDirectory("kafka-").toAbsolutePath().toString()); brokerProps.setProperty("listeners", "PLAINTEXT://" + BROKERHOST + ":" + BROKERPORT); KafkaConfig config = new KafkaConfig(brokerProps); Time mock = new MockTime(); - kafkaServer = TestUtils.createServer(config, mock); + kafkaServer = TestUtils.createServer(config, mock);*/ // create topic - AdminUtils.createTopic(zkUtils, TOPIC, 1, 1, new Properties(), RackAwareMode.Disabled$.MODULE$); + //AdminUtils.createTopic(zkUtils, TOPIC, 1, 1, new Properties(), RackAwareMode.Disabled$.MODULE$); } @AfterClass public static void tearDownClass() { - kafkaServer.shutdown(); + //kafkaServer.shutdown(); zkClient.close(); zkServer.shutdown(); } @@ -144,7 +146,7 @@ public void tearDown() { } - @Test + /*@Test public void testFetchingNewData() throws InterruptedException, ExecutionException, TimeoutException { Logger logger = Logger.getLogger(KafkaEntranceProcessorTest.class.getName()); @@ -187,6 +189,56 @@ public void run() { assertEquals("Number of sent and received instances", NUM_INSTANCES, z); + }*/ + + @Test + public void testFetchingNewDataWithAvro() throws InterruptedException, ExecutionException, TimeoutException { + Logger logger = Logger.getLogger(KafkaEntranceProcessorTest.class.getName()); + logger.log(Level.INFO, "AVRO"); + logger.log(Level.INFO, "testFetchingNewDataWithAvro"); + Properties props = TestUtilsForKafka.getConsumerProperties(); + props.setProperty("auto.offset.reset", "earliest"); + KafkaEntranceProcessor kep = new KafkaEntranceProcessor(props, TOPIC, 10000, new KafkaAvroMapper()); + kep.onCreate(1); + +// prepare new thread for data producing + Thread th = new Thread(new Runnable() { + @Override + public void run() { + KafkaProducer producer = new KafkaProducer<>(TestUtilsForKafka.getProducerProperties()); + + Random r = new Random(); + InstancesHeader header = TestUtilsForKafka.generateHeader(10); + KafkaAvroMapper avroMapper = new KafkaAvroMapper(); + int i = 0; + for (i = 0; i < NUM_INSTANCES; i++) { + try { + //byte[] data = avroMapper.serialize(TestUtilsForKafka.getData(r, 10, header)); + byte[] data = KafkaAvroMapper.avroBurrSerialize(InstanceContentEvent.class, TestUtilsForKafka.getData(r, 10, header)); + if(data == null) + Logger.getLogger(KafkaEntranceProcessorTest.class.getName()).log(Level.INFO, "Serialize result: null ("+i+")"); + ProducerRecord record = new ProducerRecord(TOPIC, data); + long stat = producer.send(record).get(10, TimeUnit.DAYS).offset(); + Thread.sleep(5); + Logger.getLogger(KafkaEntranceProcessorTest.class.getName()).log(Level.INFO, "Sent avro message with ID={0} to Kafka!, offset={1}", new Object[]{i, stat}); + } catch (InterruptedException | ExecutionException | TimeoutException ex) { + Logger.getLogger(KafkaEntranceProcessorTest.class.getName()).log(Level.SEVERE, null, ex); + } + } + producer.flush(); + producer.close(); + } + }); + th.start(); + + int z = 0; + while (kep.hasNext() && z < NUM_INSTANCES) { + logger.log(Level.INFO, "{0} {1}", new Object[]{z++, kep.nextEvent().toString()}); + } + + assertEquals("Number of sent and received instances", NUM_INSTANCES, z); + + } // private Properties getProducerProperties() { diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaTaskTest.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaTaskTest.java new file mode 100644 index 00000000..31f34fbb --- /dev/null +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaTaskTest.java @@ -0,0 +1,138 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.samoa.streams.kafka; + +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.file.Files; +import java.util.Properties; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeoutException; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.I0Itec.zkclient.ZkClient; +import org.apache.kafka.common.utils.Time; +import org.apache.samoa.streams.kafka.topology.SimpleComponentFactory; +import org.apache.samoa.streams.kafka.topology.SimpleEngine; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Ignore; +import org.junit.Test; + +import kafka.admin.AdminUtils; +import kafka.admin.RackAwareMode; +import kafka.server.KafkaConfig; +import kafka.server.KafkaServer; +import kafka.utils.MockTime; +import kafka.utils.TestUtils; +import kafka.utils.ZKStringSerializer$; +import kafka.utils.ZkUtils; +import kafka.zk.EmbeddedZookeeper; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2017 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +/** +* +* @author Jakub Jankowski +*/ +@Ignore +public class KafkaTaskTest { + + private static final String ZKHOST = "10.255.251.202"; //10.255.251.202 + private static final String BROKERHOST = "10.255.251.214"; //10.255.251.214 + private static final String BROKERPORT = "6667"; //6667, local: 9092 + private static final String TOPIC = "samoa_test"; //samoa_test, local: test + private static final int NUM_INSTANCES = 500; + + + private static KafkaServer kafkaServer; + private static EmbeddedZookeeper zkServer; + private static ZkClient zkClient; + private static String zkConnect; + + @BeforeClass + public static void setUpClass() throws IOException { + // setup Zookeeper + zkServer = new EmbeddedZookeeper(); + zkConnect = ZKHOST + ":" + "2181"; //+ zkServer.port(); + zkClient = new ZkClient(zkConnect, 30000, 30000, ZKStringSerializer$.MODULE$); + ZkUtils zkUtils = ZkUtils.apply(zkClient, false); + + // setup Broker + /*Properties brokerProps = new Properties(); + brokerProps.setProperty("zookeeper.connect", zkConnect); + brokerProps.setProperty("broker.id", "0"); + brokerProps.setProperty("log.dirs", Files.createTempDirectory("kafka-").toAbsolutePath().toString()); + brokerProps.setProperty("listeners", "PLAINTEXT://" + BROKERHOST + ":" + BROKERPORT); + KafkaConfig config = new KafkaConfig(brokerProps); + Time mock = new MockTime(); + kafkaServer = TestUtils.createServer(config, mock);*/ + + // create topic + //AdminUtils.createTopic(zkUtils, TOPIC, 1, 1, new Properties(), RackAwareMode.Disabled$.MODULE$); + } + + @AfterClass + public static void tearDownClass() { + //kafkaServer.shutdown(); + zkClient.close(); + zkServer.shutdown(); + } + + @Before + public void setUp() throws IOException { + + } + + @After + public void tearDown() { + + } + + @Test + public void testKafkaTask() throws InterruptedException, ExecutionException, TimeoutException { + Logger logger = Logger.getLogger(KafkaTaskTest.class.getName()); + logger.log(Level.INFO, "KafkaTask"); + Properties producerProps = TestUtilsForKafka.getProducerProperties(); + Properties consumerProps = TestUtilsForKafka.getConsumerProperties(); + + KafkaTask task = new KafkaTask(producerProps, consumerProps, "kafkaTaskTest", 10000, new KafkaJsonMapper(Charset.defaultCharset()), new KafkaJsonMapper(Charset.defaultCharset())); + task.setFactory(new SimpleComponentFactory()); + task.init(); + SimpleEngine.submitTopology(task.getTopology()); + } +} diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaUtilsTest.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaUtilsTest.java index 4cd51356..7c1c7c07 100644 --- a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaUtilsTest.java +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaUtilsTest.java @@ -72,6 +72,7 @@ import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; +import org.junit.Ignore; import org.junit.Test; import static org.junit.Assert.*; @@ -79,11 +80,12 @@ * * @author pwawrzyniak */ +@Ignore public class KafkaUtilsTest { - private static final String ZKHOST = "127.0.0.1"; - private static final String BROKERHOST = "127.0.0.1"; - private static final String BROKERPORT = "9092"; + private static final String ZKHOST = "10.255.251.202"; //10.255.251.202 + private static final String BROKERHOST = "10.255.251.214"; //10.255.251.214 + private static final String BROKERPORT = "6667"; //6667, local: 9092 private static final String TOPIC_R = "test-r"; private static final String TOPIC_S = "test-s"; @@ -102,29 +104,29 @@ public KafkaUtilsTest() { public static void setUpClass() throws IOException { // setup Zookeeper zkServer = new EmbeddedZookeeper(); - zkConnect = ZKHOST + ":" + zkServer.port(); + zkConnect = ZKHOST + ":" + "2181"; //+ zkServer.port(); zkClient = new ZkClient(zkConnect, 30000, 30000, ZKStringSerializer$.MODULE$); ZkUtils zkUtils = ZkUtils.apply(zkClient, false); // setup Broker - Properties brokerProps = new Properties(); + /*Properties brokerProps = new Properties(); brokerProps.setProperty("zookeeper.connect", zkConnect); brokerProps.setProperty("broker.id", "0"); brokerProps.setProperty("log.dirs", Files.createTempDirectory("kafkaUtils-").toAbsolutePath().toString()); brokerProps.setProperty("listeners", "PLAINTEXT://" + BROKERHOST + ":" + BROKERPORT); KafkaConfig config = new KafkaConfig(brokerProps); Time mock = new MockTime(); - kafkaServer = TestUtils.createServer(config, mock); + kafkaServer = TestUtils.createServer(config, mock);*/ // create topics - AdminUtils.createTopic(zkUtils, TOPIC_R, 1, 1, new Properties(), RackAwareMode.Disabled$.MODULE$); - AdminUtils.createTopic(zkUtils, TOPIC_S, 1, 1, new Properties(), RackAwareMode.Disabled$.MODULE$); + //AdminUtils.createTopic(zkUtils, TOPIC_R, 1, 1, new Properties(), RackAwareMode.Disabled$.MODULE$); + //AdminUtils.createTopic(zkUtils, TOPIC_S, 1, 1, new Properties(), RackAwareMode.Disabled$.MODULE$); } @AfterClass public static void tearDownClass() { - kafkaServer.shutdown(); + //kafkaServer.shutdown(); zkClient.close(); zkServer.shutdown(); } @@ -167,7 +169,7 @@ public void testGetKafkaMessages() throws Exception { instance.initializeConsumer(topics); logger.log(Level.INFO, "Produce data"); - List expResult = sendAndGetMessages(500); + List expResult = sendAndGetMessages(50); logger.log(Level.INFO, "Get results from Kafka"); List result = instance.getKafkaMessages(); @@ -214,7 +216,7 @@ public void testSendKafkaMessage() { Random r = new Random(); InstancesHeader header = TestUtilsForKafka.generateHeader(10); Gson gson = new Gson(); - for (int i = 0; i < 500; i++) { + for (int i = 0; i < 50; i++) { byte[] val = gson.toJson(TestUtilsForKafka.getData(r, 10, header)).getBytes(); sent.add(val); instance.sendKafkaMessage(TOPIC_S, val); diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/TestUtilsForKafka.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/TestUtilsForKafka.java index 0d30429e..8d85fd75 100644 --- a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/TestUtilsForKafka.java +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/TestUtilsForKafka.java @@ -52,10 +52,10 @@ */ public class TestUtilsForKafka { - private static final String ZKHOST = "127.0.0.1"; - private static final String BROKERHOST = "127.0.0.1"; - private static final String BROKERPORT = "9092"; - private static final String TOPIC = "test"; + private static final String ZKHOST = "10.255.251.202"; //10.255.251.202 + private static final String BROKERHOST = "10.255.251.214"; //10.255.251.214 + private static final String BROKERPORT = "6667"; //6667, local: 9092 + private static final String TOPIC = "samoa_test"; //samoa_test, local: test protected static InstanceContentEvent getData(Random instanceRandom, int numAtts, InstancesHeader header) { double[] attVals = new double[numAtts + 1]; @@ -126,7 +126,22 @@ protected static Properties getConsumerProperties() { consumerProps.setProperty("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer"); consumerProps.setProperty("group.id", "test"); consumerProps.setProperty("auto.offset.reset", "earliest"); -// consumerProps.setProperty("client.id", "consumer0"); + //consumerProps.setProperty("client.id", "consumer0"); return consumerProps; } + + protected static Properties getConsumerProducerProperties() { + Properties props = new Properties(); + props.setProperty("bootstrap.servers", BROKERHOST + ":" + BROKERPORT); + props.put("enable.auto.commit", "true"); + props.put("auto.commit.interval.ms", "1000"); + props.setProperty("key.serializer", "org.apache.kafka.common.serialization.StringSerializer"); + props.setProperty("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer"); + props.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); + props.setProperty("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer"); + props.setProperty("group.id", "burrito"); + props.setProperty("auto.offset.reset", "earliest"); + props.setProperty("client.id", "burrito"); + return props; + } } From ae7889fce1f52afdaa6d7e6080cff54dd1b01b9b Mon Sep 17 00:00:00 2001 From: Jakub Jankowski Date: Wed, 10 May 2017 14:37:25 +0200 Subject: [PATCH 09/17] Fixing avro deserialization --- .../samoa/streams/kafka/KafkaAvroMapper.java | 110 +++++++++--------- .../kafka/KafkaEntranceProcessorTest.java | 5 +- 2 files changed, 61 insertions(+), 54 deletions(-) diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaAvroMapper.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaAvroMapper.java index 91902d06..42d11bc3 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaAvroMapper.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaAvroMapper.java @@ -21,14 +21,17 @@ import java.io.IOException; import org.apache.avro.Schema; +import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.io.BinaryEncoder; +import org.apache.avro.io.DatumReader; import org.apache.avro.io.DatumWriter; import org.apache.avro.io.Decoder; import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; import org.apache.avro.reflect.ReflectData; +import org.apache.avro.reflect.ReflectDatumReader; import org.apache.avro.reflect.ReflectDatumWriter; import org.apache.avro.specific.SpecificDatumReader; import org.apache.avro.specific.SpecificDatumWriter; @@ -71,65 +74,22 @@ public class KafkaAvroMapper implements KafkaDeserializer, @Override public byte[] serialize(InstanceContentEvent message) { - return toBytesGeneric(InstanceContentEvent.class, message); + return avroSerialize(InstanceContentEvent.class, message); } @Override public InstanceContentEvent deserialize(byte[] message) { - return avroDeserialize(message, InstanceContentEvent.class, null); + return avroDeserialize(message, InstanceContentEvent.class); } - public static byte[] avroSerialize(Class clazz, Object object) { - byte[] ret = null; - try { - if (object == null || !(object instanceof SpecificRecord)) { - return null; - } - - T record = (T) object; - ByteArrayOutputStream out = new ByteArrayOutputStream(); - Encoder e = EncoderFactory.get().directBinaryEncoder(out, null); - SpecificDatumWriter w = new SpecificDatumWriter(clazz); - w.write(record, e); - e.flush(); - ret = out.toByteArray(); - } catch (IOException e) { - - } - - return ret; - } - - public static T avroDeserialize(byte[] avroBytes, Class clazz, Schema schema) { - T ret = null; - try { - ByteArrayInputStream in = new ByteArrayInputStream(avroBytes); - Decoder d = DecoderFactory.get().directBinaryDecoder(in, null); - SpecificDatumReader reader = new SpecificDatumReader(clazz); - ret = reader.read(null, d); - } catch (IOException e) { - - } - - return ret; - } - - public static byte[] toBytesGeneric(final Class cls, final V v) { - final ByteArrayOutputStream bout = new ByteArrayOutputStream(); - final Schema schema = ReflectData.AllowNull.get().getSchema(cls); - final DatumWriter writer = new ReflectDatumWriter(schema); - final BinaryEncoder binEncoder = EncoderFactory.get().binaryEncoder(bout, null); - try { - writer.write(v, binEncoder); - binEncoder.flush(); - } catch (final Exception e) { - throw new RuntimeException(e); - } - return bout.toByteArray(); - } - - public static byte[] avroBurrSerialize(final Class cls, final V v) { + /** + * Avro serialization based on specified schema + * @param cls + * @param v + * @return + */ + public static byte[] avroSerialize(final Class cls, final V v) { ByteArrayOutputStream bout = new ByteArrayOutputStream(); try { Schema schema = new Schema.Parser().parse(new File("C:/java/avro/kafka.avsc")); @@ -154,5 +114,51 @@ public static byte[] avroBurrSerialize(final Class cls, final V v) { return bout.toByteArray(); } + + /** + * Avro deserialization based on specified schema + * @param cls + * @param v + * @return + */ + public static V avroDeserialize(byte[] avroBytes, Class clazz) { + V ret = null; + try { + Schema schema = new Schema.Parser().parse(new File("C:/java/avro/kafka.avsc")); + ByteArrayInputStream in = new ByteArrayInputStream(avroBytes); + DatumReader reader = new GenericDatumReader<>(schema); + + Decoder decoder = DecoderFactory.get().directBinaryDecoder(in, null); + + ret = reader.read(null, decoder); + } catch (IOException e) { + e.printStackTrace(); + } catch (final Exception e) { + throw new RuntimeException(e); + } + + return ret; + } + + /** + * Avro serialization using reflection + * @param cls + * @param v + * @return + */ + public static byte[] toBytesGeneric(final Class cls, final V v) { + final ByteArrayOutputStream bout = new ByteArrayOutputStream(); + final Schema schema = ReflectData.AllowNull.get().getSchema(cls); + final DatumWriter writer = new ReflectDatumWriter(schema); + final BinaryEncoder binEncoder = EncoderFactory.get().binaryEncoder(bout, null); + try { + writer.write(v, binEncoder); + binEncoder.flush(); + } catch (final Exception e) { + throw new RuntimeException(e); + } + + return bout.toByteArray(); + } } diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorTest.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorTest.java index 3da9d6f3..bc2a11e0 100644 --- a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorTest.java +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorTest.java @@ -214,7 +214,7 @@ public void run() { for (i = 0; i < NUM_INSTANCES; i++) { try { //byte[] data = avroMapper.serialize(TestUtilsForKafka.getData(r, 10, header)); - byte[] data = KafkaAvroMapper.avroBurrSerialize(InstanceContentEvent.class, TestUtilsForKafka.getData(r, 10, header)); + byte[] data = KafkaAvroMapper.avroSerialize(InstanceContentEvent.class, TestUtilsForKafka.getData(r, 10, header)); if(data == null) Logger.getLogger(KafkaEntranceProcessorTest.class.getName()).log(Level.INFO, "Serialize result: null ("+i+")"); ProducerRecord record = new ProducerRecord(TOPIC, data); @@ -233,7 +233,8 @@ public void run() { int z = 0; while (kep.hasNext() && z < NUM_INSTANCES) { - logger.log(Level.INFO, "{0} {1}", new Object[]{z++, kep.nextEvent().toString()}); + InstanceContentEvent event = (InstanceContentEvent)kep.nextEvent(); + logger.log(Level.INFO, "{0} {1}", new Object[]{z++, event.getInstance().toString()}); } assertEquals("Number of sent and received instances", NUM_INSTANCES, z); From 107c4c5f72fcacc38d02bfc7cd1552a1593cdd5a Mon Sep 17 00:00:00 2001 From: Jakub Jankowski Date: Wed, 10 May 2017 14:45:16 +0200 Subject: [PATCH 10/17] Fixing avro deserialization --- .../java/org/apache/samoa/streams/kafka/KafkaAvroMapper.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaAvroMapper.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaAvroMapper.java index 42d11bc3..afbc0022 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaAvroMapper.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaAvroMapper.java @@ -21,7 +21,6 @@ import java.io.IOException; import org.apache.avro.Schema; -import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.io.BinaryEncoder; import org.apache.avro.io.DatumReader; @@ -126,7 +125,7 @@ public static V avroDeserialize(byte[] avroBytes, Class clazz) { try { Schema schema = new Schema.Parser().parse(new File("C:/java/avro/kafka.avsc")); ByteArrayInputStream in = new ByteArrayInputStream(avroBytes); - DatumReader reader = new GenericDatumReader<>(schema); + DatumReader reader = new ReflectDatumReader<>(schema); Decoder decoder = DecoderFactory.get().directBinaryDecoder(in, null); From cde0642ccc768f7492e8c5811b2f1232e58acec4 Mon Sep 17 00:00:00 2001 From: pwawrzyniak Date: Fri, 12 May 2017 17:17:27 +0200 Subject: [PATCH 11/17] Rebuild of Consumer-related classes (added separate thread to read from Kafka, blocking until sth received) Major cleanup in tests --- .../streams/kafka/KafkaConsumerThread.java | 157 ++++++++++++++++++ .../streams/kafka/KafkaEntranceProcessor.java | 2 +- .../samoa/streams/kafka/KafkaUtils.java | 47 ++---- .../kafka/KafkaDestinationProcessorTest.java | 47 ++++-- .../kafka/KafkaEntranceProcessorTest.java | 151 +++++++---------- .../samoa/streams/kafka/KafkaTaskTest.java | 56 +++++-- .../samoa/streams/kafka/KafkaUtilsTest.java | 64 ++++--- .../streams/kafka/TestUtilsForKafka.java | 22 +-- 8 files changed, 349 insertions(+), 197 deletions(-) create mode 100644 samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaConsumerThread.java diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaConsumerThread.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaConsumerThread.java new file mode 100644 index 00000000..6522f67c --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaConsumerThread.java @@ -0,0 +1,157 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.samoa.streams.kafka; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.Properties; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.kafka.clients.consumer.KafkaConsumer; + +/** + * + * @author pwawrzyniak + */ +class KafkaConsumerThread extends Thread { + + // Consumer class for internal use to retrieve messages from Kafka + private transient KafkaConsumer consumer; + + private Logger log = Logger.getLogger(KafkaConsumerThread.class.getName()); + + private final Properties consumerProperties; + private final Collection topics; + private final long consumerTimeout; + private final List buffer; + // used to synchronize things + private final Object lock; + private boolean running; + + /** + * Class constructor + * + * @param consumerProperties Properties of Consumer + * @param topics Topics to fetch (subscribe) + * @param consumerTimeout Timeout for data polling + */ + KafkaConsumerThread(Properties consumerProperties, Collection topics, long consumerTimeout) { + this.running = false; + this.consumerProperties = consumerProperties; + this.topics = topics; + this.consumerTimeout = consumerTimeout; + this.buffer = new ArrayList<>(); + lock = new Object(); + } + + @Override + public void run() { + + initializeConsumer(); + + while (running) { + fetchDataFromKafka(); + } + + cleanUp(); + } + + /** + * Method for fetching data from Apache Kafka. It takes care of received + * data + */ + private void fetchDataFromKafka() { + if (consumer != null) { + if (!consumer.subscription().isEmpty()) { + try { + List kafkaMsg = getMessagesBytes(consumer.poll(consumerTimeout)); + fillBufferAndNotifyWaits(kafkaMsg); + } catch (Throwable t) { + Logger.getLogger(KafkaConsumerThread.class.getName()).log(Level.SEVERE, null, t); + } + } + } + } + + /** + * Copies received messages to class buffer and notifies Processor to grab + * the data. + * + * @param kafkaMsg Messages received from Kafka + */ + private void fillBufferAndNotifyWaits(List kafkaMsg) { + synchronized (lock) { + buffer.addAll(kafkaMsg); + if (buffer.size() > 0) { + lock.notifyAll(); + } + } + } + + private void cleanUp() { + // clean resources + if (consumer != null) { + consumer.unsubscribe(); + consumer.close(); + } + } + + private void initializeConsumer() { + // lazy instantiation + log.log(Level.INFO, "Instantiating Kafka consumer"); + if (consumer == null) { + consumer = new KafkaConsumer<>(consumerProperties); + running = true; + } + consumer.subscribe(topics); + } + + private List getMessagesBytes(ConsumerRecords poll) { + Iterator> iterator = poll.iterator(); + List ret = new ArrayList<>(); + while (iterator.hasNext()) { + ret.add(iterator.next().value()); + } + return ret; + } + + void close() { + running = false; + } + + List getKafkaMessages() { + synchronized (lock) { + if (buffer.isEmpty()) { + try { + // block the call until new messages are received + lock.wait(); + } catch (InterruptedException ex) { + Logger.getLogger(KafkaConsumerThread.class.getName()).log(Level.SEVERE, null, ex); + } + } + ArrayList ret = new ArrayList<>(); + // copy buffer to return list + ret.addAll(buffer); + // clear message buffer + buffer.clear(); + return ret; + } + } +} diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessor.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessor.java index 2b0b808c..7079c588 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessor.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessor.java @@ -104,7 +104,7 @@ public boolean hasNext() { @Override public ContentEvent nextEvent() { - // assume this will never be called when buffer is empty! + // assume this will never be called when buffer is empty! return this.deserializer.deserialize(buffer.remove(buffer.size() - 1)); } diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java index 06358776..75b54021 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaUtils.java @@ -61,8 +61,7 @@ */ class KafkaUtils { - // Consumer class for internal use to retrieve messages from Kafka - private transient KafkaConsumer consumer; + private transient KafkaConsumerThread kafkaConsumerThread; private transient KafkaProducer producer; @@ -72,6 +71,7 @@ class KafkaUtils { // Timeout for Kafka Consumer private long consumerTimeout; + /** * Class constructor @@ -86,6 +86,10 @@ public KafkaUtils(Properties consumerProperties, Properties producerProperties, this.consumerTimeout = consumerTimeout; } + /** + * Creates new KafkaUtils from existing instance + * @param kafkaUtils Instance of KafkaUtils + */ KafkaUtils(KafkaUtils kafkaUtils) { this.consumerProperties = kafkaUtils.consumerProperties; this.producerProperties = kafkaUtils.producerProperties; @@ -93,25 +97,18 @@ public KafkaUtils(Properties consumerProperties, Properties producerProperties, } /** - * Method used to initialize Kafka Consumer, i.e. instantiate it and + * Method used to initialize Kafka Consumer Thread, i.e. instantiate it and * subscribe to configured topic * * @param topics List of Kafka topics that consumer should subscribe to */ - public void initializeConsumer(Collection topics) { - // lazy instantiation - if (consumer == null) { - consumer = new KafkaConsumer<>(consumerProperties); - } - consumer.subscribe(topics); -// consumer.seekToBeginning(consumer.assignment()); + public void initializeConsumer(Collection topics) { + kafkaConsumerThread = new KafkaConsumerThread(consumerProperties, topics, consumerTimeout); + kafkaConsumerThread.start(); } public void closeConsumer() { - if (consumer != null) { - consumer.unsubscribe(); - consumer.close(); - } + kafkaConsumerThread.close(); } public void initializeProducer() { @@ -135,27 +132,7 @@ public void closeProducer(){ * or is not subscribed to any topic. */ public List getKafkaMessages() throws Exception { - - if (consumer != null) { - if (!consumer.subscription().isEmpty()) { - return getMessagesBytes(consumer.poll(consumerTimeout)); - } else { - // TODO: do it more elegant way - throw new Exception("Consumer subscribed to no topics!"); - } - } else { - // TODO: do more elegant way - throw new Exception("Consumer not initialised"); - } - } - - private List getMessagesBytes(ConsumerRecords poll) { - Iterator> iterator = poll.iterator(); - List ret = new ArrayList<>(); - while (iterator.hasNext()) { - ret.add(iterator.next().value()); - } - return ret; + return kafkaConsumerThread.getKafkaMessages(); } public long sendKafkaMessage(String topic, byte[] message) { diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessorTest.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessorTest.java index a138763c..bf45ffba 100644 --- a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessorTest.java +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessorTest.java @@ -15,6 +15,25 @@ */ package org.apache.samoa.streams.kafka; +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2017 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ import java.io.IOException; import java.nio.charset.Charset; import java.nio.file.Files; @@ -51,7 +70,7 @@ /** * - * @author pwawrzyniak + * @author pwawrzyniak */ public class KafkaDestinationProcessorTest { @@ -59,7 +78,7 @@ public class KafkaDestinationProcessorTest { private static final String BROKERHOST = "127.0.0.1"; private static final String BROKERPORT = "9092"; private static final String TOPIC = "test-kdp"; - private static final int NUM_INSTANCES = 500; + private static final int NUM_INSTANCES = 11111; private static final int CONSUMER_TIMEOUT = 1000; private static KafkaServer kafkaServer; @@ -94,7 +113,7 @@ public static void setUpClass() throws IOException { } @AfterClass - public static void tearDownClass() { + public static void tearDownClass() { kafkaServer.shutdown(); zkClient.close(); zkServer.shutdown(); @@ -114,34 +133,28 @@ public void tearDown() { public void testSendingData() throws InterruptedException, ExecutionException, TimeoutException { final Logger logger = Logger.getLogger(KafkaDestinationProcessorTest.class.getName()); - Properties props = TestUtilsForKafka.getProducerProperties(); + Properties props = TestUtilsForKafka.getProducerProperties(BROKERHOST,BROKERPORT); props.setProperty("auto.offset.reset", "earliest"); KafkaDestinationProcessor kdp = new KafkaDestinationProcessor(props, TOPIC, new KafkaJsonMapper(Charset.defaultCharset())); kdp.onCreate(1); final int[] i = {0}; - -// prepare new thread for data receiveing + + // prepare new thread for data receiveing Thread th = new Thread(new Runnable() { @Override public void run() { - KafkaConsumer consumer = new KafkaConsumer<>(TestUtilsForKafka.getConsumerProperties()); + KafkaConsumer consumer = new KafkaConsumer<>(TestUtilsForKafka.getConsumerProperties(BROKERHOST, BROKERPORT)); consumer.subscribe(Arrays.asList(TOPIC)); while (i[0] < NUM_INSTANCES) { try { ConsumerRecords cr = consumer.poll(CONSUMER_TIMEOUT); - Iterator> it = cr.iterator(); while (it.hasNext()) { ConsumerRecord record = it.next(); - logger.info(new String(record.value())); - logger.log(Level.INFO, "Current read offset is: {0}", record.offset()); i[0]++; } - - Thread.sleep(1); - - } catch (InterruptedException ex) { + } catch (Exception ex) { Logger.getLogger(KafkaDestinationProcessorTest.class.getName()).log(Level.SEVERE, null, ex); } } @@ -157,11 +170,11 @@ public void run() { for (z = 0; z < NUM_INSTANCES; z++) { InstanceContentEvent event = TestUtilsForKafka.getData(r, 10, header); kdp.process(event); - logger.log(Level.INFO, "{0} {1}", new Object[]{"Sent item with id: ", z}); - Thread.sleep(5); +// logger.log(Level.INFO, "{0} {1}", new Object[]{"Sent item with id: ", z}); } + // wait for all instances to be read - Thread.sleep(100); + Thread.sleep(2 * CONSUMER_TIMEOUT); assertEquals("Number of sent and received instances", z, i[0]); } } diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorTest.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorTest.java index bc2a11e0..009a6a77 100644 --- a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorTest.java +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorTest.java @@ -38,7 +38,6 @@ import java.io.IOException; import java.nio.charset.Charset; import java.nio.file.Files; -import java.util.ArrayList; import java.util.Properties; import java.util.Random; import java.util.concurrent.ExecutionException; @@ -46,17 +45,11 @@ import java.util.concurrent.TimeoutException; import java.util.logging.Level; import java.util.logging.Logger; -import mockit.Mocked; -import mockit.Tested; -import mockit.Expectations; -import org.apache.samoa.core.ContentEvent; -import org.apache.samoa.core.Processor; import org.apache.samoa.learners.InstanceContentEvent; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; -import org.junit.Ignore; import org.junit.Test; import static org.junit.Assert.*; import kafka.admin.AdminUtils; @@ -72,36 +65,26 @@ import org.I0Itec.zkclient.ZkClient; import org.apache.kafka.clients.producer.KafkaProducer; import org.apache.kafka.clients.producer.ProducerRecord; -import org.apache.samoa.instances.Attribute; -import org.apache.samoa.instances.DenseInstance; -import org.apache.samoa.instances.Instance; -import org.apache.samoa.instances.Instances; import org.apache.samoa.instances.InstancesHeader; -import org.apache.samoa.moa.core.FastVector; -import org.apache.samoa.moa.core.InstanceExample; -import org.apache.samoa.streams.InstanceStream; /** * * @author pwawrzyniak */ -//@Ignore public class KafkaEntranceProcessorTest { -// @Tested -// private KafkaEntranceProcessor kep; - private static final String ZKHOST = "10.255.251.202"; //10.255.251.202 - private static final String BROKERHOST = "10.255.251.214"; //10.255.251.214 - private static final String BROKERPORT = "6667"; //6667, local: 9092 - private static final String TOPIC = "samoa_test"; //samoa_test, local: test - private static final int NUM_INSTANCES = 50; - - + private static final String ZKHOST = "127.0.0.1"; + private static final String BROKERHOST = "127.0.0.1"; + private static final String BROKERPORT = "9092"; + private static final String TOPIC_AVRO = "samoa_test-avro"; + private static final String TOPIC_JSON = "samoa_test-json"; + private static final int NUM_INSTANCES = 11111; + private static KafkaServer kafkaServer; private static EmbeddedZookeeper zkServer; private static ZkClient zkClient; private static String zkConnect; - + private static int TIMEOUT = 1000; public KafkaEntranceProcessorTest() { } @@ -110,30 +93,35 @@ public KafkaEntranceProcessorTest() { public static void setUpClass() throws IOException { // setup Zookeeper zkServer = new EmbeddedZookeeper(); - zkConnect = ZKHOST + ":" + "2181"; //+ zkServer.port(); + zkConnect = ZKHOST + ":" + zkServer.port(); zkClient = new ZkClient(zkConnect, 30000, 30000, ZKStringSerializer$.MODULE$); ZkUtils zkUtils = ZkUtils.apply(zkClient, false); // setup Broker - /*Properties brokerProps = new Properties(); + Properties brokerProps = new Properties(); brokerProps.setProperty("zookeeper.connect", zkConnect); brokerProps.setProperty("broker.id", "0"); brokerProps.setProperty("log.dirs", Files.createTempDirectory("kafka-").toAbsolutePath().toString()); brokerProps.setProperty("listeners", "PLAINTEXT://" + BROKERHOST + ":" + BROKERPORT); KafkaConfig config = new KafkaConfig(brokerProps); Time mock = new MockTime(); - kafkaServer = TestUtils.createServer(config, mock);*/ + kafkaServer = TestUtils.createServer(config, mock); - // create topic - //AdminUtils.createTopic(zkUtils, TOPIC, 1, 1, new Properties(), RackAwareMode.Disabled$.MODULE$); + // create topics + AdminUtils.createTopic(zkUtils, TOPIC_AVRO, 1, 1, new Properties(), RackAwareMode.Disabled$.MODULE$); + AdminUtils.createTopic(zkUtils, TOPIC_JSON, 1, 1, new Properties(), RackAwareMode.Disabled$.MODULE$); } @AfterClass public static void tearDownClass() { - //kafkaServer.shutdown(); - zkClient.close(); - zkServer.shutdown(); + try { + kafkaServer.shutdown(); + zkClient.close(); + zkServer.shutdown(); + } catch (Exception ex) { + Logger.getLogger(KafkaEntranceProcessorTest.class.getName()).log(Level.SEVERE, null, ex); + } } @Before @@ -146,20 +134,23 @@ public void tearDown() { } - /*@Test - public void testFetchingNewData() throws InterruptedException, ExecutionException, TimeoutException { + @Test + public void testFetchingNewDataWithJson() throws InterruptedException, ExecutionException, TimeoutException { Logger logger = Logger.getLogger(KafkaEntranceProcessorTest.class.getName()); - Properties props = TestUtilsForKafka.getConsumerProperties(); + logger.log(Level.INFO, "JSON"); + logger.log(Level.INFO, "testFetchingNewDataWithJson"); + Properties props = TestUtilsForKafka.getConsumerProperties(BROKERHOST, BROKERPORT); props.setProperty("auto.offset.reset", "earliest"); - KafkaEntranceProcessor kep = new KafkaEntranceProcessor(props, TOPIC, 10000, new KafkaJsonMapper(Charset.defaultCharset())); + KafkaEntranceProcessor kep = new KafkaEntranceProcessor(props, TOPIC_JSON, TIMEOUT, new KafkaJsonMapper(Charset.defaultCharset())); + kep.onCreate(1); -// prepare new thread for data producing + // prepare new thread for data producing Thread th = new Thread(new Runnable() { @Override public void run() { - KafkaProducer producer = new KafkaProducer<>(TestUtilsForKafka.getProducerProperties()); + KafkaProducer producer = new KafkaProducer<>(TestUtilsForKafka.getProducerProperties(BROKERHOST,BROKERPORT)); Random r = new Random(); InstancesHeader header = TestUtilsForKafka.generateHeader(10); @@ -167,10 +158,9 @@ public void run() { int i = 0; for (i = 0; i < NUM_INSTANCES; i++) { try { - ProducerRecord record = new ProducerRecord(TOPIC, gson.toJson(TestUtilsForKafka.getData(r, 10, header)).getBytes()); - long stat = producer.send(record).get(10, TimeUnit.DAYS).offset(); - Thread.sleep(5); - Logger.getLogger(KafkaEntranceProcessorTest.class.getName()).log(Level.INFO, "Sent message with ID={0} to Kafka!, offset={1}", new Object[]{i, stat}); + InstanceContentEvent event = TestUtilsForKafka.getData(r, 10, header); + ProducerRecord record = new ProducerRecord(TOPIC_JSON, gson.toJson(event).getBytes()); + long stat = producer.send(record).get(10, TimeUnit.SECONDS).offset(); } catch (InterruptedException | ExecutionException | TimeoutException ex) { Logger.getLogger(KafkaEntranceProcessorTest.class.getName()).log(Level.SEVERE, null, ex); } @@ -182,45 +172,44 @@ public void run() { th.start(); int z = 0; - while (kep.hasNext() && z < NUM_INSTANCES) { - logger.log(Level.INFO, "{0} {1}", new Object[]{z++, kep.nextEvent().toString()}); - } + while (z < NUM_INSTANCES && kep.hasNext()) { + InstanceContentEvent event = (InstanceContentEvent) kep.nextEvent(); + z++; +// logger.log(Level.INFO, "{0} {1}", new Object[]{z, event.getInstance().toString()}); + } + + assertEquals("Number of sent and received instances", NUM_INSTANCES, z); + + } - assertEquals("Number of sent and received instances", NUM_INSTANCES, z); - - - }*/ - @Test public void testFetchingNewDataWithAvro() throws InterruptedException, ExecutionException, TimeoutException { Logger logger = Logger.getLogger(KafkaEntranceProcessorTest.class.getName()); logger.log(Level.INFO, "AVRO"); - logger.log(Level.INFO, "testFetchingNewDataWithAvro"); - Properties props = TestUtilsForKafka.getConsumerProperties(); + logger.log(Level.INFO, "testFetchingNewDataWithAvro"); + Properties props = TestUtilsForKafka.getConsumerProperties(BROKERHOST, BROKERPORT); props.setProperty("auto.offset.reset", "earliest"); - KafkaEntranceProcessor kep = new KafkaEntranceProcessor(props, TOPIC, 10000, new KafkaAvroMapper()); + KafkaEntranceProcessor kep = new KafkaEntranceProcessor(props, TOPIC_AVRO, TIMEOUT, new KafkaAvroMapper()); kep.onCreate(1); // prepare new thread for data producing Thread th = new Thread(new Runnable() { @Override public void run() { - KafkaProducer producer = new KafkaProducer<>(TestUtilsForKafka.getProducerProperties()); + KafkaProducer producer = new KafkaProducer<>(TestUtilsForKafka.getProducerProperties(BROKERHOST,BROKERPORT)); Random r = new Random(); InstancesHeader header = TestUtilsForKafka.generateHeader(10); - KafkaAvroMapper avroMapper = new KafkaAvroMapper(); + int i = 0; for (i = 0; i < NUM_INSTANCES; i++) { try { - //byte[] data = avroMapper.serialize(TestUtilsForKafka.getData(r, 10, header)); - byte[] data = KafkaAvroMapper.avroSerialize(InstanceContentEvent.class, TestUtilsForKafka.getData(r, 10, header)); - if(data == null) - Logger.getLogger(KafkaEntranceProcessorTest.class.getName()).log(Level.INFO, "Serialize result: null ("+i+")"); - ProducerRecord record = new ProducerRecord(TOPIC, data); - long stat = producer.send(record).get(10, TimeUnit.DAYS).offset(); - Thread.sleep(5); - Logger.getLogger(KafkaEntranceProcessorTest.class.getName()).log(Level.INFO, "Sent avro message with ID={0} to Kafka!, offset={1}", new Object[]{i, stat}); + byte[] data = KafkaAvroMapper.avroSerialize(InstanceContentEvent.class, TestUtilsForKafka.getData(r, 10, header)); + if (data == null) { + Logger.getLogger(KafkaEntranceProcessorTest.class.getName()).log(Level.INFO, "Serialize result: null ({0})", i); + } + ProducerRecord record = new ProducerRecord(TOPIC_AVRO, data); + long stat = producer.send(record).get(10, TimeUnit.SECONDS).offset(); } catch (InterruptedException | ExecutionException | TimeoutException ex) { Logger.getLogger(KafkaEntranceProcessorTest.class.getName()).log(Level.SEVERE, null, ex); } @@ -232,34 +221,12 @@ public void run() { th.start(); int z = 0; - while (kep.hasNext() && z < NUM_INSTANCES) { - InstanceContentEvent event = (InstanceContentEvent)kep.nextEvent(); - logger.log(Level.INFO, "{0} {1}", new Object[]{z++, event.getInstance().toString()}); - } + while (z < NUM_INSTANCES && kep.hasNext()) { + InstanceContentEvent event = (InstanceContentEvent) kep.nextEvent(); + z++; +// logger.log(Level.INFO, "{0} {1}", new Object[]{z, event.getInstance().toString()}); + } - assertEquals("Number of sent and received instances", NUM_INSTANCES, z); - - + assertEquals("Number of sent and received instances", NUM_INSTANCES, z); } - -// private Properties getProducerProperties() { -// Properties producerProps = new Properties(); -//// props.setProperty("zookeeper.connect", zkConnect); -// producerProps.setProperty("bootstrap.servers", BROKERHOST + ":" + BROKERPORT); -// producerProps.setProperty("key.serializer", "org.apache.kafka.common.serialization.StringSerializer"); -// producerProps.setProperty("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer"); -//// producerProps.setProperty("group.id", "test"); -// return producerProps; -// } -// -// private Properties getConsumerProperties() { -// Properties consumerProps = new Properties(); -// consumerProps.setProperty("bootstrap.servers", BROKERHOST + ":" + BROKERPORT); -// consumerProps.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); -// consumerProps.setProperty("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer"); -//// consumerProps.setProperty("group.id", "test"); -// consumerProps.setProperty("group.id", "group0"); -// consumerProps.setProperty("client.id", "consumer0"); -// return consumerProps; - } diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaTaskTest.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaTaskTest.java index 31f34fbb..08aae11c 100644 --- a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaTaskTest.java +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaTaskTest.java @@ -15,11 +15,14 @@ */ package org.apache.samoa.streams.kafka; +import com.google.gson.Gson; import java.io.IOException; import java.nio.charset.Charset; import java.nio.file.Files; import java.util.Properties; +import java.util.Random; import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.logging.Level; import java.util.logging.Logger; @@ -44,6 +47,9 @@ import kafka.utils.ZKStringSerializer$; import kafka.utils.ZkUtils; import kafka.zk.EmbeddedZookeeper; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.samoa.instances.InstancesHeader; /* * #%L @@ -72,11 +78,11 @@ @Ignore public class KafkaTaskTest { - private static final String ZKHOST = "10.255.251.202"; //10.255.251.202 - private static final String BROKERHOST = "10.255.251.214"; //10.255.251.214 - private static final String BROKERPORT = "6667"; //6667, local: 9092 + private static final String ZKHOST = "127.0.0.1";//10.255.251.202"; //10.255.251.202 + private static final String BROKERHOST = "127.0.0.1";//"10.255.251.214"; //10.255.251.214 + private static final String BROKERPORT = "9092"; //6667, local: 9092 private static final String TOPIC = "samoa_test"; //samoa_test, local: test - private static final int NUM_INSTANCES = 500; + private static final int NUM_INSTANCES = 125922; private static KafkaServer kafkaServer; @@ -87,10 +93,10 @@ public class KafkaTaskTest { @BeforeClass public static void setUpClass() throws IOException { // setup Zookeeper - zkServer = new EmbeddedZookeeper(); - zkConnect = ZKHOST + ":" + "2181"; //+ zkServer.port(); - zkClient = new ZkClient(zkConnect, 30000, 30000, ZKStringSerializer$.MODULE$); - ZkUtils zkUtils = ZkUtils.apply(zkClient, false); +// zkServer = new EmbeddedZookeeper(); +// zkConnect = ZKHOST + ":" + "2181"; //+ zkServer.port(); +// zkClient = new ZkClient(zkConnect, 30000, 30000, ZKStringSerializer$.MODULE$); +// ZkUtils zkUtils = ZkUtils.apply(zkClient, false); // setup Broker /*Properties brokerProps = new Properties(); @@ -109,8 +115,8 @@ public static void setUpClass() throws IOException { @AfterClass public static void tearDownClass() { //kafkaServer.shutdown(); - zkClient.close(); - zkServer.shutdown(); +// zkClient.close(); +// zkServer.shutdown(); } @Before @@ -127,12 +133,38 @@ public void tearDown() { public void testKafkaTask() throws InterruptedException, ExecutionException, TimeoutException { Logger logger = Logger.getLogger(KafkaTaskTest.class.getName()); logger.log(Level.INFO, "KafkaTask"); - Properties producerProps = TestUtilsForKafka.getProducerProperties(); - Properties consumerProps = TestUtilsForKafka.getConsumerProperties(); + Properties producerProps = TestUtilsForKafka.getProducerProperties(BROKERHOST,BROKERPORT); + Properties consumerProps = TestUtilsForKafka.getConsumerProperties(BROKERHOST,BROKERPORT); KafkaTask task = new KafkaTask(producerProps, consumerProps, "kafkaTaskTest", 10000, new KafkaJsonMapper(Charset.defaultCharset()), new KafkaJsonMapper(Charset.defaultCharset())); task.setFactory(new SimpleComponentFactory()); task.init(); SimpleEngine.submitTopology(task.getTopology()); + + Thread th = new Thread(new Runnable() { + @Override + public void run() { + KafkaProducer producer = new KafkaProducer<>(TestUtilsForKafka.getProducerProperties(BROKERHOST,BROKERPORT)); + + Random r = new Random(); + InstancesHeader header = TestUtilsForKafka.generateHeader(10); + Gson gson = new Gson(); + int i = 0; + for (i = 0; i < NUM_INSTANCES; i++) { + try { + ProducerRecord record = new ProducerRecord(TOPIC, gson.toJson(TestUtilsForKafka.getData(r, 10, header)).getBytes()); + long stat = producer.send(record).get(10, TimeUnit.DAYS).offset(); +// Thread.sleep(5); + Logger.getLogger(KafkaEntranceProcessorTest.class.getName()).log(Level.INFO, "Sent message with ID={0} to Kafka!, offset={1}", new Object[]{i, stat}); + } catch (InterruptedException | ExecutionException | TimeoutException ex) { + Logger.getLogger(KafkaEntranceProcessorTest.class.getName()).log(Level.SEVERE, null, ex); + } + } + producer.flush(); + producer.close(); + } + }); + th.start(); + } } diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaUtilsTest.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaUtilsTest.java index 7c1c7c07..e2b36fd7 100644 --- a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaUtilsTest.java +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaUtilsTest.java @@ -34,8 +34,6 @@ * limitations under the License. * #L% */ - - import com.google.gson.Gson; import java.io.IOException; import java.nio.file.Files; @@ -47,7 +45,6 @@ import java.util.Properties; import java.util.Random; import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.logging.Level; import java.util.logging.Logger; @@ -70,32 +67,31 @@ import org.apache.samoa.instances.InstancesHeader; import org.junit.After; import org.junit.AfterClass; +import static org.junit.Assert.*; import org.junit.Before; import org.junit.BeforeClass; -import org.junit.Ignore; import org.junit.Test; -import static org.junit.Assert.*; /** * * @author pwawrzyniak */ -@Ignore public class KafkaUtilsTest { - private static final String ZKHOST = "10.255.251.202"; //10.255.251.202 - private static final String BROKERHOST = "10.255.251.214"; //10.255.251.214 - private static final String BROKERPORT = "6667"; //6667, local: 9092 + private static final String ZKHOST = "127.0.0.1"; + private static final String BROKERHOST = "127.0.0.1"; + private static final String BROKERPORT = "9092"; private static final String TOPIC_R = "test-r"; private static final String TOPIC_S = "test-s"; + private static final int NUM_INSTANCES = 50; private static KafkaServer kafkaServer; private static EmbeddedZookeeper zkServer; private static ZkClient zkClient; private static String zkConnect; - private Logger logger = Logger.getLogger(KafkaUtilsTest.class.getCanonicalName()); - private long CONSUMER_TIMEOUT = 1000; + private static final Logger logger = Logger.getLogger(KafkaUtilsTest.class.getCanonicalName()); + private final long CONSUMER_TIMEOUT = 1000; public KafkaUtilsTest() { } @@ -104,29 +100,29 @@ public KafkaUtilsTest() { public static void setUpClass() throws IOException { // setup Zookeeper zkServer = new EmbeddedZookeeper(); - zkConnect = ZKHOST + ":" + "2181"; //+ zkServer.port(); + zkConnect = ZKHOST + ":" + zkServer.port(); zkClient = new ZkClient(zkConnect, 30000, 30000, ZKStringSerializer$.MODULE$); ZkUtils zkUtils = ZkUtils.apply(zkClient, false); // setup Broker - /*Properties brokerProps = new Properties(); + Properties brokerProps = new Properties(); brokerProps.setProperty("zookeeper.connect", zkConnect); brokerProps.setProperty("broker.id", "0"); brokerProps.setProperty("log.dirs", Files.createTempDirectory("kafkaUtils-").toAbsolutePath().toString()); brokerProps.setProperty("listeners", "PLAINTEXT://" + BROKERHOST + ":" + BROKERPORT); KafkaConfig config = new KafkaConfig(brokerProps); Time mock = new MockTime(); - kafkaServer = TestUtils.createServer(config, mock);*/ + kafkaServer = TestUtils.createServer(config, mock); // create topics - //AdminUtils.createTopic(zkUtils, TOPIC_R, 1, 1, new Properties(), RackAwareMode.Disabled$.MODULE$); - //AdminUtils.createTopic(zkUtils, TOPIC_S, 1, 1, new Properties(), RackAwareMode.Disabled$.MODULE$); + AdminUtils.createTopic(zkUtils, TOPIC_R, 1, 1, new Properties(), RackAwareMode.Disabled$.MODULE$); + AdminUtils.createTopic(zkUtils, TOPIC_S, 1, 1, new Properties(), RackAwareMode.Disabled$.MODULE$); } @AfterClass public static void tearDownClass() { - //kafkaServer.shutdown(); + kafkaServer.shutdown(); zkClient.close(); zkServer.shutdown(); } @@ -146,13 +142,19 @@ public void tearDown() { public void testInitializeConsumer() throws Exception { logger.log(Level.INFO, "initializeConsumer"); Collection topics = Arrays.asList(TOPIC_R); - KafkaUtils instance = new KafkaUtils(TestUtilsForKafka.getConsumerProperties(), TestUtilsForKafka.getProducerProperties(), CONSUMER_TIMEOUT); + KafkaUtils instance = new KafkaUtils(TestUtilsForKafka.getConsumerProperties(BROKERHOST,BROKERPORT), TestUtilsForKafka.getProducerProperties(BROKERHOST,BROKERPORT), CONSUMER_TIMEOUT); assertNotNull(instance); instance.initializeConsumer(topics); + Thread.sleep(1000); + instance.closeConsumer(); + + Thread.sleep(CONSUMER_TIMEOUT); - assertNotNull(instance.getKafkaMessages()); + instance.initializeConsumer(topics); + Thread.sleep(1000); instance.closeConsumer(); + assertTrue(true); } /** @@ -162,14 +164,17 @@ public void testInitializeConsumer() throws Exception { public void testGetKafkaMessages() throws Exception { logger.log(Level.INFO, "getKafkaMessages"); Collection topics = Arrays.asList(TOPIC_R); - KafkaUtils instance = new KafkaUtils(TestUtilsForKafka.getConsumerProperties(), TestUtilsForKafka.getProducerProperties(), CONSUMER_TIMEOUT); + KafkaUtils instance = new KafkaUtils(TestUtilsForKafka.getConsumerProperties(BROKERHOST,BROKERPORT), TestUtilsForKafka.getProducerProperties(BROKERHOST,BROKERPORT), CONSUMER_TIMEOUT); assertNotNull(instance); logger.log(Level.INFO, "Initialising consumer"); instance.initializeConsumer(topics); logger.log(Level.INFO, "Produce data"); - List expResult = sendAndGetMessages(50); + List expResult = sendAndGetMessages(NUM_INSTANCES); + + logger.log(Level.INFO, "Wait a moment"); + Thread.sleep(CONSUMER_TIMEOUT); logger.log(Level.INFO, "Get results from Kafka"); List result = instance.getKafkaMessages(); @@ -180,7 +185,7 @@ public void testGetKafkaMessages() throws Exception { private List sendAndGetMessages(int maxNum) throws InterruptedException, ExecutionException, TimeoutException { List ret; - try (KafkaProducer producer = new KafkaProducer<>(TestUtilsForKafka.getProducerProperties("sendM-test"))) { + try (KafkaProducer producer = new KafkaProducer<>(TestUtilsForKafka.getProducerProperties("sendM-test",BROKERHOST,BROKERPORT))) { ret = new ArrayList<>(); Random r = new Random(); InstancesHeader header = TestUtilsForKafka.generateHeader(10); @@ -190,25 +195,28 @@ private List sendAndGetMessages(int maxNum) throws InterruptedException, ProducerRecord record = new ProducerRecord(TOPIC_R, gson.toJson(TestUtilsForKafka.getData(r, 10, header)).getBytes()); ret.add(record.value()); producer.send(record); - } producer.flush(); + } + producer.flush(); } return ret; } /** * Test of sendKafkaMessage method, of class KafkaUtils. + * + * @throws java.lang.InterruptedException */ @Test - public void testSendKafkaMessage() { + public void testSendKafkaMessage() throws InterruptedException { logger.log(Level.INFO, "sendKafkaMessage"); logger.log(Level.INFO, "Initialising producer"); - KafkaUtils instance = new KafkaUtils(TestUtilsForKafka.getConsumerProperties(), TestUtilsForKafka.getProducerProperties("rcv-test"), CONSUMER_TIMEOUT); + KafkaUtils instance = new KafkaUtils(TestUtilsForKafka.getConsumerProperties(BROKERHOST,BROKERPORT), TestUtilsForKafka.getProducerProperties("rcv-test", BROKERHOST,BROKERPORT), CONSUMER_TIMEOUT); instance.initializeProducer(); logger.log(Level.INFO, "Initialising consumer"); KafkaConsumer consumer; - consumer = new KafkaConsumer<>(TestUtilsForKafka.getConsumerProperties()); + consumer = new KafkaConsumer<>(TestUtilsForKafka.getConsumerProperties(BROKERHOST,BROKERPORT)); consumer.subscribe(Arrays.asList(TOPIC_S)); logger.log(Level.INFO, "Produce data"); @@ -216,11 +224,13 @@ public void testSendKafkaMessage() { Random r = new Random(); InstancesHeader header = TestUtilsForKafka.generateHeader(10); Gson gson = new Gson(); - for (int i = 0; i < 50; i++) { + for (int i = 0; i < NUM_INSTANCES; i++) { byte[] val = gson.toJson(TestUtilsForKafka.getData(r, 10, header)).getBytes(); sent.add(val); instance.sendKafkaMessage(TOPIC_S, val); } + // wait for Kafka a bit :) + Thread.sleep(CONSUMER_TIMEOUT); logger.log(Level.INFO, "Get results from Kafka"); ConsumerRecords records = consumer.poll(CONSUMER_TIMEOUT); diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/TestUtilsForKafka.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/TestUtilsForKafka.java index 8d85fd75..87ab16c9 100644 --- a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/TestUtilsForKafka.java +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/TestUtilsForKafka.java @@ -48,14 +48,12 @@ /** * - * @author pwawrzyniak + * @author pwawrzyniak */ public class TestUtilsForKafka { - private static final String ZKHOST = "10.255.251.202"; //10.255.251.202 - private static final String BROKERHOST = "10.255.251.214"; //10.255.251.214 - private static final String BROKERPORT = "6667"; //6667, local: 9092 - private static final String TOPIC = "samoa_test"; //samoa_test, local: test +// private static final String BROKERHOST = "127.0.0.1"; +// private static final String BROKERPORT = "9092"; protected static InstanceContentEvent getData(Random instanceRandom, int numAtts, InstancesHeader header) { double[] attVals = new double[numAtts + 1]; @@ -63,8 +61,7 @@ protected static InstanceContentEvent getData(Random instanceRandom, int numAtts double sumWeights = 0.0; for (int i = 0; i < numAtts; i++) { attVals[i] = instanceRandom.nextDouble(); -// sum += this.weights[i] * attVals[i]; -// sumWeights += this.weights[i]; + } int classLabel; if (sum >= sumWeights * 0.5) { @@ -98,8 +95,8 @@ protected static InstancesHeader generateHeader(int numAttributes) { } - protected static Properties getProducerProperties() { - return getProducerProperties("test"); + protected static Properties getProducerProperties(String BROKERHOST, String BROKERPORT) { + return getProducerProperties("test", BROKERHOST, BROKERPORT); } /** @@ -107,7 +104,7 @@ protected static Properties getProducerProperties() { * @param clientId * @return */ - protected static Properties getProducerProperties(String clientId) { + protected static Properties getProducerProperties(String clientId, String BROKERHOST, String BROKERPORT) { Properties producerProps = new Properties(); producerProps.setProperty("bootstrap.servers", BROKERHOST + ":" + BROKERPORT); producerProps.setProperty("key.serializer", "org.apache.kafka.common.serialization.StringSerializer"); @@ -117,7 +114,7 @@ protected static Properties getProducerProperties(String clientId) { return producerProps; } - protected static Properties getConsumerProperties() { + protected static Properties getConsumerProperties(String BROKERHOST, String BROKERPORT) { Properties consumerProps = new Properties(); consumerProps.setProperty("bootstrap.servers", BROKERHOST + ":" + BROKERPORT); consumerProps.put("enable.auto.commit", "true"); @@ -126,11 +123,10 @@ protected static Properties getConsumerProperties() { consumerProps.setProperty("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer"); consumerProps.setProperty("group.id", "test"); consumerProps.setProperty("auto.offset.reset", "earliest"); - //consumerProps.setProperty("client.id", "consumer0"); return consumerProps; } - protected static Properties getConsumerProducerProperties() { + protected static Properties getConsumerProducerProperties(String BROKERHOST, String BROKERPORT) { Properties props = new Properties(); props.setProperty("bootstrap.servers", BROKERHOST + ":" + BROKERPORT); props.put("enable.auto.commit", "true"); From f20301a4c64c9487fdbcff17829b6a8018e9a00b Mon Sep 17 00:00:00 2001 From: pwawrzyniak Date: Fri, 12 May 2017 17:22:54 +0200 Subject: [PATCH 12/17] Minor change in tests --- .../java/org/apache/samoa/streams/kafka/KafkaUtilsTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaUtilsTest.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaUtilsTest.java index e2b36fd7..8f77504b 100644 --- a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaUtilsTest.java +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaUtilsTest.java @@ -91,7 +91,7 @@ public class KafkaUtilsTest { private static String zkConnect; private static final Logger logger = Logger.getLogger(KafkaUtilsTest.class.getCanonicalName()); - private final long CONSUMER_TIMEOUT = 1000; + private final long CONSUMER_TIMEOUT = 1500; public KafkaUtilsTest() { } @@ -230,7 +230,7 @@ public void testSendKafkaMessage() throws InterruptedException { instance.sendKafkaMessage(TOPIC_S, val); } // wait for Kafka a bit :) - Thread.sleep(CONSUMER_TIMEOUT); + Thread.sleep(2*CONSUMER_TIMEOUT); logger.log(Level.INFO, "Get results from Kafka"); ConsumerRecords records = consumer.poll(CONSUMER_TIMEOUT); From b4ee754c54ac5be51f9846704947e7c19c186e73 Mon Sep 17 00:00:00 2001 From: Jakub Jankowski Date: Tue, 16 May 2017 14:11:36 +0200 Subject: [PATCH 13/17] Fixing avro deserializer (using ReflectDatumReader) to be able to read arrays in InstanceData objects --- .../samoa/streams/kafka/KafkaAvroMapper.java | 11 +- .../streams/kafka/avro/SamoaDatumReader.java | 115 ++++++++++++++++++ samoa-api/src/main/resources/kafka.avsc | 53 +++++++- .../kafka/AvroSerializerDeserializerTest.java | 70 +++++++++++ 4 files changed, 238 insertions(+), 11 deletions(-) create mode 100644 samoa-api/src/main/java/org/apache/samoa/streams/kafka/avro/SamoaDatumReader.java create mode 100644 samoa-api/src/test/java/org/apache/samoa/streams/kafka/AvroSerializerDeserializerTest.java diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaAvroMapper.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaAvroMapper.java index afbc0022..a045bed5 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaAvroMapper.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaAvroMapper.java @@ -21,21 +21,18 @@ import java.io.IOException; import org.apache.avro.Schema; -import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.io.BinaryEncoder; import org.apache.avro.io.DatumReader; import org.apache.avro.io.DatumWriter; import org.apache.avro.io.Decoder; import org.apache.avro.io.DecoderFactory; -import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; import org.apache.avro.reflect.ReflectData; -import org.apache.avro.reflect.ReflectDatumReader; import org.apache.avro.reflect.ReflectDatumWriter; -import org.apache.avro.specific.SpecificDatumReader; import org.apache.avro.specific.SpecificDatumWriter; import org.apache.avro.specific.SpecificRecord; import org.apache.samoa.learners.InstanceContentEvent; +import org.apache.samoa.streams.kafka.avro.SamoaDatumReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -91,7 +88,7 @@ public InstanceContentEvent deserialize(byte[] message) { public static byte[] avroSerialize(final Class cls, final V v) { ByteArrayOutputStream bout = new ByteArrayOutputStream(); try { - Schema schema = new Schema.Parser().parse(new File("C:/java/avro/kafka.avsc")); + Schema schema = new Schema.Parser().parse(KafkaAvroMapper.class.getResourceAsStream("/kafka.avsc")); DatumWriter writer; if (v instanceof SpecificRecord) { @@ -123,9 +120,9 @@ public static byte[] avroSerialize(final Class cls, final V v) { public static V avroDeserialize(byte[] avroBytes, Class clazz) { V ret = null; try { - Schema schema = new Schema.Parser().parse(new File("C:/java/avro/kafka.avsc")); + Schema schema = new Schema.Parser().parse(KafkaAvroMapper.class.getResourceAsStream("/kafka.avsc")); ByteArrayInputStream in = new ByteArrayInputStream(avroBytes); - DatumReader reader = new ReflectDatumReader<>(schema); + DatumReader reader = new SamoaDatumReader<>(schema); Decoder decoder = DecoderFactory.get().directBinaryDecoder(in, null); diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/avro/SamoaDatumReader.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/avro/SamoaDatumReader.java new file mode 100644 index 00000000..b7a18aa6 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/avro/SamoaDatumReader.java @@ -0,0 +1,115 @@ +package org.apache.samoa.streams.kafka.avro; + +import java.io.IOException; + +import org.apache.avro.AvroRuntimeException; +import org.apache.avro.Schema; +import org.apache.avro.Schema.Field; +import org.apache.avro.generic.GenericData.Array; +import org.apache.avro.generic.IndexedRecord; +import org.apache.avro.io.ResolvingDecoder; +import org.apache.avro.reflect.ReflectData; +import org.apache.avro.reflect.ReflectDatumReader; +import org.apache.avro.specific.SpecificRecordBase; +import org.apache.samoa.instances.DenseInstanceData; +import org.apache.samoa.instances.SingleClassInstanceData; +import org.apache.samoa.instances.SparseInstanceData; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * DatumReader used to read objects built with InstanceData classes + * @author Jakub Jankowski + * + * @param + */ +public class SamoaDatumReader extends ReflectDatumReader { + + private static Logger logger = LoggerFactory.getLogger(SamoaDatumReader.class); + + public SamoaDatumReader() { + super(); + } + + /** Construct for reading instances of a class. */ + public SamoaDatumReader(Class c) { + super(c); + } + + /** Construct where the writer's and reader's schemas are the same. */ + public SamoaDatumReader(Schema root) { + super(root); + } + + /** Construct given writer's and reader's schema. */ + public SamoaDatumReader(Schema writer, Schema reader) { + super(writer, reader); + } + + /** Construct given writer's and reader's schema and the data model. */ + public SamoaDatumReader(Schema writer, Schema reader, ReflectData data) { + super(writer, reader, data); + } + + /** Construct given a {@link ReflectData}. */ + public SamoaDatumReader(ReflectData data) { + super(data); + } + + @Override + /** + * Called to read a record instance. Overridden to read InstanceData. + */ + protected Object readRecord(Object old, Schema expected, ResolvingDecoder in) throws IOException { + Object r = getData().newRecord(old, expected); + Object state = null; + + for (Field f : in.readFieldOrder()) { + int pos = f.pos(); + String name = f.name(); + Object oldDatum = null; + if (r instanceof DenseInstanceData) { + r = readDenseInstanceData(r, f, oldDatum, in, state); + } else if (r instanceof SparseInstanceData) { + r = readSparseInstanceData(r, f, oldDatum, in, state); + } else + readField(r, f, oldDatum, in, state); + } + + return r; + } + + private Object readDenseInstanceData(Object record, Field f, Object oldDatum, ResolvingDecoder in, Object state) + throws IOException { + if (f.name().equals("attributeValues")) { + Array atributes = (Array) read(oldDatum, f.schema(), in); + double[] atributesArr = new double[atributes.size()]; + for (int i = 0; i < atributes.size(); i++) { + atributesArr[i] = (double) atributes.get(i); + } + return new DenseInstanceData(atributesArr); + } + return null; + } + + private Object readSparseInstanceData(Object record, Field f, Object oldDatum, ResolvingDecoder in, Object state) + throws IOException { + if(f.name().equals("attributeValues")) { + Array atributes = (Array) read(oldDatum, f.schema(), in); + double[] atributesArr = new double[atributes.size()]; + for (int i = 0; i < atributes.size(); i++) + atributesArr[i] = (double) atributes.get(i); + ((SparseInstanceData)record).setAttributeValues(atributesArr); + } + if(f.name().equals("indexValues")) { + Array indexValues = (Array) read(oldDatum, f.schema(), in); + int[] indexValuesArr = new int[indexValues.size()]; + for (int i = 0; i < indexValues.size(); i++) { + indexValuesArr[i] = (int) indexValues.get(i); + } + ((SparseInstanceData)record).setIndexValues(indexValuesArr); + } + return record; + } + +} diff --git a/samoa-api/src/main/resources/kafka.avsc b/samoa-api/src/main/resources/kafka.avsc index c21e1534..f5f12cf2 100644 --- a/samoa-api/src/main/resources/kafka.avsc +++ b/samoa-api/src/main/resources/kafka.avsc @@ -1,11 +1,31 @@ [ { + "namespace": "org.apache.samoa.streams.kafka.temp", + "type": "record", + "name": "BurrTest", + "fields": [ + {"name":"name", "type": "string"}, + {"name":"atrs", "type": {"type": "array", "items": "string"}}, + {"name":"nums", "type": {"type": "array", "items": "int"}}, + {"name":"list", "type": {"type": "array", "items": "string"}} + ] +}, +{ + "namespace": "org.apache.samoa.instances", + "type": "record", + "name": "Instance", + "fields": [ + ] +}, +{ + "namespace": "org.apache.samoa.instances", "type": "record", "name": "InstanceData", "fields": [ ] }, { + "namespace": "org.apache.samoa.instances", "type": "record", "name": "SingleClassInstanceData", "fields": [ @@ -13,6 +33,7 @@ ] }, { + "namespace": "org.apache.samoa.instances", "type": "record", "name": "DenseInstanceData", "fields": [ @@ -20,6 +41,7 @@ ] }, { + "namespace": "org.apache.samoa.instances", "type": "record", "name": "SparseInstanceData", "fields": [ @@ -29,32 +51,55 @@ ] }, { + "namespace": "org.apache.samoa.instances", + "type": "record", + "name": "SingleLabelInstance", + "fields": [ + {"name": "weight", "type": "double"}, + {"name": "instanceData", "type": ["null", "org.apache.samoa.instances.InstanceData", "org.apache.samoa.instances.DenseInstanceData", "org.apache.samoa.instances.SparseInstanceData", "org.apache.samoa.instances.SingleClassInstanceData"]}, + {"name": "classData", "type": ["null", "org.apache.samoa.instances.InstanceData", "org.apache.samoa.instances.DenseInstanceData", "org.apache.samoa.instances.SparseInstanceData", "org.apache.samoa.instances.SingleClassInstanceData"]} + ] +}, +{ + "namespace": "org.apache.samoa.instances", + "type": "record", + "name": "DenseInstance", + "fields": [ + {"name": "weight", "type": "double"}, + {"name": "instanceData", "type": ["null", "org.apache.samoa.instances.InstanceData", "org.apache.samoa.instances.DenseInstanceData", "org.apache.samoa.instances.SparseInstanceData", "org.apache.samoa.instances.SingleClassInstanceData"]}, + {"name": "classData", "type": ["null", "org.apache.samoa.instances.InstanceData", "org.apache.samoa.instances.DenseInstanceData", "org.apache.samoa.instances.SparseInstanceData", "org.apache.samoa.instances.SingleClassInstanceData"]} + ] +}, +{ + "namespace": "org.apache.samoa.core", "type": "record", "name": "SerializableInstance", "fields": [ {"name": "weight", "type": "double"}, - {"name": "instanceData", "type": ["null", "InstanceData", "DenseInstanceData", "SparseInstanceData", "SingleClassInstanceData"]}, - {"name": "classData", "type": "InstanceData"} + {"name": "instanceData", "type": ["null", "org.apache.samoa.instances.InstanceData", "org.apache.samoa.instances.DenseInstanceData", "org.apache.samoa.instances.SparseInstanceData", "org.apache.samoa.instances.SingleClassInstanceData"]}, + {"name": "classData", "type": ["null", "org.apache.samoa.instances.InstanceData", "org.apache.samoa.instances.DenseInstanceData", "org.apache.samoa.instances.SparseInstanceData", "org.apache.samoa.instances.SingleClassInstanceData"]} ] }, { + "namespace": "org.apache.samoa.learners", "type": "record", "name": "InstanceContent", "fields": [ {"name": "instanceIndex", "type": "long"}, {"name": "classifierIndex", "type": "int"}, {"name": "evaluationIndex", "type": "int"}, - {"name":"instance", "type":"SerializableInstance"}, + {"name":"instance", "type":"org.apache.samoa.core.SerializableInstance"}, {"name": "isTraining", "type": "boolean"}, {"name": "isTesting", "type": "boolean"}, {"name": "isLast", "type": "boolean"} ] }, { + "namespace": "org.apache.samoa.learners", "type": "record", "name": "InstanceContentEvent", "fields": [ - {"name": "instanceContent", "type": "InstanceContent"} + {"name": "instanceContent", "type": "org.apache.samoa.learners.InstanceContent"} ] } ] diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/AvroSerializerDeserializerTest.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/AvroSerializerDeserializerTest.java new file mode 100644 index 00000000..1a1a7180 --- /dev/null +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/AvroSerializerDeserializerTest.java @@ -0,0 +1,70 @@ +package org.apache.samoa.streams.kafka; + +import static org.junit.Assert.assertTrue; + +import java.util.Random; +import java.util.logging.Logger; + +import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.learners.InstanceContentEvent; +import org.apache.samoa.streams.kafka.KafkaAvroMapper; +import org.junit.Test; + +public class AvroSerializerDeserializerTest { + + private Logger logger = Logger.getLogger(AvroSerializerDeserializerTest.class.getName()); + public AvroSerializerDeserializerTest() {} + + @Test + public void testAvroSerialize() { + Random r = new Random(); + InstancesHeader header = TestUtilsForKafka.generateHeader(10); + InstanceContentEvent eventToSerialize = TestUtilsForKafka.getData(r, 10, header); + byte[] data = KafkaAvroMapper.avroSerialize(InstanceContentEvent.class, eventToSerialize); + + InstanceContentEvent eventDeserialized = KafkaAvroMapper.avroDeserialize(data, InstanceContentEvent.class); + + assertTrue("Serialized and deserialized event", isEqual(eventToSerialize, eventDeserialized)); + + } + + public boolean isEqual(InstanceContentEvent a, InstanceContentEvent b) { + if(a.getClassId() != b.getClassId()) { + logger.info("a.getClassId() != b.getClassId(): " + (a.getClassId() != b.getClassId())); + return false; + } + if(a.isLastEvent() != b.isLastEvent()) { + logger.info("a.isLastEvent() != b.isLastEvent(): " + (a.isLastEvent() != b.isLastEvent())); + return false; + } + if(a.isTesting() != b.isTesting()) { + logger.info("a.isTesting() != b.isTesting(): " + (a.isTesting() != b.isTesting())); + return false; + } + if(a.isTraining() != b.isTraining()) { + logger.info("a.isTraining() != b.isTraining(): " + (a.isTraining() != b.isTraining())); + return false; + } + if(a.getClassifierIndex() != b.getClassifierIndex()) { + logger.info("a.getClassifierIndex() != b.getClassifierIndex(): " + (a.getClassifierIndex() != b.getClassifierIndex())); + return false; + } + if(a.getEvaluationIndex() != b.getEvaluationIndex()) { + logger.info("a.getEvaluationIndex() != b.getEvaluationIndex(): " + (a.getEvaluationIndex() != b.getEvaluationIndex())); + return false; + } + if(a.getInstanceIndex() != b.getInstanceIndex()) { + logger.info("a.getInstanceIndex() != b.getInstanceIndex(): " + (a.getInstanceIndex() != b.getInstanceIndex())); + return false; + } + if(!a.getInstance().toString().equals(b.getInstance().toString())) { + logger.info("a.getInstance().toString()!= b.getInstance().toString(): " + (a.getInstance().toString()!= b.getInstance().toString())); + logger.info("a.toString(): " + a.getInstance().toString()); + logger.info("b.toString(): " + b.getInstance().toString()); + return false; + } + + return true; + } + +} From d636ba90851d8424a6d3e651d80c87d0a8d7394e Mon Sep 17 00:00:00 2001 From: pwawrzyniak Date: Tue, 16 May 2017 15:00:33 +0200 Subject: [PATCH 14/17] Changes in JSON mapper --- .../streams/kafka/KafkaConsumerThread.java | 23 ++++++++- .../samoa/streams/kafka/KafkaJsonMapper.java | 49 +++++++++++++------ .../kafka/KafkaEntranceProcessorTest.java | 13 +++-- 3 files changed, 63 insertions(+), 22 deletions(-) diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaConsumerThread.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaConsumerThread.java index 6522f67c..a93986e5 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaConsumerThread.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaConsumerThread.java @@ -15,6 +15,27 @@ */ package org.apache.samoa.streams.kafka; +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2017 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + + import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; @@ -28,7 +49,7 @@ /** * - * @author pwawrzyniak + * @author pwawrzyniak */ class KafkaConsumerThread extends Thread { diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaJsonMapper.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaJsonMapper.java index 1996b40a..2ac3e04f 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaJsonMapper.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaJsonMapper.java @@ -34,39 +34,43 @@ * limitations under the License. * #L% */ - - import com.google.gson.Gson; import com.google.gson.GsonBuilder; import com.google.gson.InstanceCreator; +import com.google.gson.JsonDeserializationContext; +import com.google.gson.JsonDeserializer; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import com.google.gson.JsonParseException; import java.lang.reflect.Type; import java.nio.charset.Charset; -import java.util.logging.Level; -import java.util.logging.Logger; +import org.apache.samoa.instances.DenseInstanceData; import org.apache.samoa.instances.InstanceData; -import org.apache.samoa.instances.SingleClassInstanceData; import org.apache.samoa.learners.InstanceContentEvent; /** - * Sample class for serializing and deserializing {@link InstanceContentEvent} from/to JSON format + * Sample class for serializing and deserializing {@link InstanceContentEvent} + * from/to JSON format + * * @author pwawrzyniak * @version 0.5.0-incubating-SNAPSHOT * @since 0.5.0-incubating */ -public class KafkaJsonMapper implements KafkaDeserializer, KafkaSerializer{ +public class KafkaJsonMapper implements KafkaDeserializer, KafkaSerializer { private final transient Gson gson; private final Charset charset; /** * Class constructor + * * @param charset Charset to be used for bytes parsing */ - public KafkaJsonMapper(Charset charset){ - this.gson = new GsonBuilder().registerTypeAdapter(InstanceData.class, new InstanceDataCreator()).create(); + public KafkaJsonMapper(Charset charset) { + this.gson = new GsonBuilder().registerTypeAdapter(InstanceData.class, new InstanceDataCustomDeserializer()).create(); this.charset = charset; } - + @Override public InstanceContentEvent deserialize(byte[] message) { return gson.fromJson(new String(message, this.charset), InstanceContentEvent.class); @@ -76,14 +80,27 @@ public InstanceContentEvent deserialize(byte[] message) { public byte[] serialize(InstanceContentEvent message) { return gson.toJson(message).getBytes(this.charset); } - - public class InstanceDataCreator implements InstanceCreator{ + + //Unused + public class InstanceDataCreator implements InstanceCreator { + + @Override + public InstanceData createInstance(Type type) { + return new DenseInstanceData(); + } + } + + public class InstanceDataCustomDeserializer implements JsonDeserializer { @Override - public InstanceData createInstance(Type type) { - return new SingleClassInstanceData(); + public DenseInstanceData deserialize(JsonElement je, Type type, JsonDeserializationContext jdc) throws JsonParseException { + double[] attributeValues = null; + JsonObject obj = (JsonObject) je; + attributeValues = jdc.deserialize(obj.get("attributeValues"), double[].class); + DenseInstanceData did = new DenseInstanceData(attributeValues); + return did; } - + } - + } diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorTest.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorTest.java index 009a6a77..933ba2a1 100644 --- a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorTest.java +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorTest.java @@ -38,6 +38,8 @@ import java.io.IOException; import java.nio.charset.Charset; import java.nio.file.Files; +import java.util.ArrayList; +import java.util.List; import java.util.Properties; import java.util.Random; import java.util.concurrent.ExecutionException; @@ -70,6 +72,7 @@ /** * * @author pwawrzyniak + * @author Jakub Jankowski */ public class KafkaEntranceProcessorTest { @@ -137,7 +140,7 @@ public void tearDown() { @Test public void testFetchingNewDataWithJson() throws InterruptedException, ExecutionException, TimeoutException { - Logger logger = Logger.getLogger(KafkaEntranceProcessorTest.class.getName()); + final Logger logger = Logger.getLogger(KafkaEntranceProcessorTest.class.getName()); logger.log(Level.INFO, "JSON"); logger.log(Level.INFO, "testFetchingNewDataWithJson"); Properties props = TestUtilsForKafka.getConsumerProperties(BROKERHOST, BROKERPORT); @@ -145,7 +148,7 @@ public void testFetchingNewDataWithJson() throws InterruptedException, Execution KafkaEntranceProcessor kep = new KafkaEntranceProcessor(props, TOPIC_JSON, TIMEOUT, new KafkaJsonMapper(Charset.defaultCharset())); kep.onCreate(1); - + // prepare new thread for data producing Thread th = new Thread(new Runnable() { @Override @@ -159,6 +162,7 @@ public void run() { for (i = 0; i < NUM_INSTANCES; i++) { try { InstanceContentEvent event = TestUtilsForKafka.getData(r, 10, header); + ProducerRecord record = new ProducerRecord(TOPIC_JSON, gson.toJson(event).getBytes()); long stat = producer.send(record).get(10, TimeUnit.SECONDS).offset(); } catch (InterruptedException | ExecutionException | TimeoutException ex) { @@ -173,11 +177,10 @@ public void run() { int z = 0; while (z < NUM_INSTANCES && kep.hasNext()) { - InstanceContentEvent event = (InstanceContentEvent) kep.nextEvent(); + InstanceContentEvent event = (InstanceContentEvent) kep.nextEvent(); z++; -// logger.log(Level.INFO, "{0} {1}", new Object[]{z, event.getInstance().toString()}); } - + assertEquals("Number of sent and received instances", NUM_INSTANCES, z); } From 57f80a5bf7d83c31d517a7d98741dd7457a4c84c Mon Sep 17 00:00:00 2001 From: pwawrzyniak Date: Wed, 7 Jun 2017 12:21:08 +0200 Subject: [PATCH 15/17] repo cleanup, split code into 3 branches --- .../samoa/streams/kafka/KafkaAvroMapper.java | 160 --------- .../samoa/streams/kafka/KafkaJsonMapper.java | 106 ------ .../streams/kafka/avro/SamoaDatumReader.java | 115 ------ .../kafka/AvroSerializerDeserializerTest.java | 70 ---- .../kafka/KafkaDestinationProcessorTest.java | 2 +- .../kafka/KafkaEntranceProcessorTest.java | 66 +--- .../samoa/streams/kafka/KafkaTaskTest.java | 327 +++++++++--------- .../streams/kafka/OosTestSerializer.java | 60 ++++ .../topology/SimpleComponentFactory.java | 106 +++--- .../streams/kafka/topology/SimpleEngine.java | 74 ++-- .../SimpleEntranceProcessingItem.java | 66 ++-- .../kafka/topology/SimpleProcessingItem.java | 174 +++++----- .../streams/kafka/topology/SimpleStream.java | 190 +++++----- .../kafka/topology/SimpleTopology.java | 92 ++--- 14 files changed, 578 insertions(+), 1030 deletions(-) delete mode 100644 samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaAvroMapper.java delete mode 100644 samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaJsonMapper.java delete mode 100644 samoa-api/src/main/java/org/apache/samoa/streams/kafka/avro/SamoaDatumReader.java delete mode 100644 samoa-api/src/test/java/org/apache/samoa/streams/kafka/AvroSerializerDeserializerTest.java create mode 100644 samoa-api/src/test/java/org/apache/samoa/streams/kafka/OosTestSerializer.java rename samoa-api/src/{main => test}/java/org/apache/samoa/streams/kafka/topology/SimpleComponentFactory.java (97%) rename samoa-api/src/{main => test}/java/org/apache/samoa/streams/kafka/topology/SimpleEngine.java (96%) rename samoa-api/src/{main => test}/java/org/apache/samoa/streams/kafka/topology/SimpleEntranceProcessingItem.java (97%) rename samoa-api/src/{main => test}/java/org/apache/samoa/streams/kafka/topology/SimpleProcessingItem.java (97%) rename samoa-api/src/{main => test}/java/org/apache/samoa/streams/kafka/topology/SimpleStream.java (96%) rename samoa-api/src/{main => test}/java/org/apache/samoa/streams/kafka/topology/SimpleTopology.java (97%) diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaAvroMapper.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaAvroMapper.java deleted file mode 100644 index a045bed5..00000000 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaAvroMapper.java +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Copyright 2017 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.samoa.streams.kafka; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.IOException; - -import org.apache.avro.Schema; -import org.apache.avro.io.BinaryEncoder; -import org.apache.avro.io.DatumReader; -import org.apache.avro.io.DatumWriter; -import org.apache.avro.io.Decoder; -import org.apache.avro.io.DecoderFactory; -import org.apache.avro.io.EncoderFactory; -import org.apache.avro.reflect.ReflectData; -import org.apache.avro.reflect.ReflectDatumWriter; -import org.apache.avro.specific.SpecificDatumWriter; -import org.apache.avro.specific.SpecificRecord; -import org.apache.samoa.learners.InstanceContentEvent; -import org.apache.samoa.streams.kafka.avro.SamoaDatumReader; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2017 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -/** - * Sample class for serializing and deserializing {@link InstanceContentEvent} - * from/to Avro format - * - * @author Jakub Jankowski - * @version 0.5.0-incubating-SNAPSHOT - * @since 0.5.0-incubating - */ -public class KafkaAvroMapper implements KafkaDeserializer, KafkaSerializer { - - private static Logger logger = LoggerFactory.getLogger(KafkaAvroMapper.class); - - @Override - public byte[] serialize(InstanceContentEvent message) { - return avroSerialize(InstanceContentEvent.class, message); - } - - @Override - public InstanceContentEvent deserialize(byte[] message) { - return avroDeserialize(message, InstanceContentEvent.class); - } - - - /** - * Avro serialization based on specified schema - * @param cls - * @param v - * @return - */ - public static byte[] avroSerialize(final Class cls, final V v) { - ByteArrayOutputStream bout = new ByteArrayOutputStream(); - try { - Schema schema = new Schema.Parser().parse(KafkaAvroMapper.class.getResourceAsStream("/kafka.avsc")); - DatumWriter writer; - - if (v instanceof SpecificRecord) { - writer = new SpecificDatumWriter<>(schema); - } else { - writer = new ReflectDatumWriter<>(schema); - } - - BinaryEncoder binEncoder = EncoderFactory.get().binaryEncoder(bout, null); - writer.write(v, binEncoder); - binEncoder.flush(); - - } catch (IOException e) { - e.printStackTrace(); - } catch (final Exception e) { - throw new RuntimeException(e); - } - - return bout.toByteArray(); - - } - - /** - * Avro deserialization based on specified schema - * @param cls - * @param v - * @return - */ - public static V avroDeserialize(byte[] avroBytes, Class clazz) { - V ret = null; - try { - Schema schema = new Schema.Parser().parse(KafkaAvroMapper.class.getResourceAsStream("/kafka.avsc")); - ByteArrayInputStream in = new ByteArrayInputStream(avroBytes); - DatumReader reader = new SamoaDatumReader<>(schema); - - Decoder decoder = DecoderFactory.get().directBinaryDecoder(in, null); - - ret = reader.read(null, decoder); - } catch (IOException e) { - e.printStackTrace(); - } catch (final Exception e) { - throw new RuntimeException(e); - } - - return ret; - } - - /** - * Avro serialization using reflection - * @param cls - * @param v - * @return - */ - public static byte[] toBytesGeneric(final Class cls, final V v) { - final ByteArrayOutputStream bout = new ByteArrayOutputStream(); - final Schema schema = ReflectData.AllowNull.get().getSchema(cls); - final DatumWriter writer = new ReflectDatumWriter(schema); - final BinaryEncoder binEncoder = EncoderFactory.get().binaryEncoder(bout, null); - try { - writer.write(v, binEncoder); - binEncoder.flush(); - } catch (final Exception e) { - throw new RuntimeException(e); - } - - return bout.toByteArray(); - } - -} diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaJsonMapper.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaJsonMapper.java deleted file mode 100644 index 2ac3e04f..00000000 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaJsonMapper.java +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright 2017 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.samoa.streams.kafka; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2017 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ -import com.google.gson.Gson; -import com.google.gson.GsonBuilder; -import com.google.gson.InstanceCreator; -import com.google.gson.JsonDeserializationContext; -import com.google.gson.JsonDeserializer; -import com.google.gson.JsonElement; -import com.google.gson.JsonObject; -import com.google.gson.JsonParseException; -import java.lang.reflect.Type; -import java.nio.charset.Charset; -import org.apache.samoa.instances.DenseInstanceData; -import org.apache.samoa.instances.InstanceData; -import org.apache.samoa.learners.InstanceContentEvent; - -/** - * Sample class for serializing and deserializing {@link InstanceContentEvent} - * from/to JSON format - * - * @author pwawrzyniak - * @version 0.5.0-incubating-SNAPSHOT - * @since 0.5.0-incubating - */ -public class KafkaJsonMapper implements KafkaDeserializer, KafkaSerializer { - - private final transient Gson gson; - private final Charset charset; - - /** - * Class constructor - * - * @param charset Charset to be used for bytes parsing - */ - public KafkaJsonMapper(Charset charset) { - this.gson = new GsonBuilder().registerTypeAdapter(InstanceData.class, new InstanceDataCustomDeserializer()).create(); - this.charset = charset; - } - - @Override - public InstanceContentEvent deserialize(byte[] message) { - return gson.fromJson(new String(message, this.charset), InstanceContentEvent.class); - } - - @Override - public byte[] serialize(InstanceContentEvent message) { - return gson.toJson(message).getBytes(this.charset); - } - - //Unused - public class InstanceDataCreator implements InstanceCreator { - - @Override - public InstanceData createInstance(Type type) { - return new DenseInstanceData(); - } - } - - public class InstanceDataCustomDeserializer implements JsonDeserializer { - - @Override - public DenseInstanceData deserialize(JsonElement je, Type type, JsonDeserializationContext jdc) throws JsonParseException { - double[] attributeValues = null; - JsonObject obj = (JsonObject) je; - attributeValues = jdc.deserialize(obj.get("attributeValues"), double[].class); - DenseInstanceData did = new DenseInstanceData(attributeValues); - return did; - } - - } - -} diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/avro/SamoaDatumReader.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/avro/SamoaDatumReader.java deleted file mode 100644 index b7a18aa6..00000000 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/avro/SamoaDatumReader.java +++ /dev/null @@ -1,115 +0,0 @@ -package org.apache.samoa.streams.kafka.avro; - -import java.io.IOException; - -import org.apache.avro.AvroRuntimeException; -import org.apache.avro.Schema; -import org.apache.avro.Schema.Field; -import org.apache.avro.generic.GenericData.Array; -import org.apache.avro.generic.IndexedRecord; -import org.apache.avro.io.ResolvingDecoder; -import org.apache.avro.reflect.ReflectData; -import org.apache.avro.reflect.ReflectDatumReader; -import org.apache.avro.specific.SpecificRecordBase; -import org.apache.samoa.instances.DenseInstanceData; -import org.apache.samoa.instances.SingleClassInstanceData; -import org.apache.samoa.instances.SparseInstanceData; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * DatumReader used to read objects built with InstanceData classes - * @author Jakub Jankowski - * - * @param - */ -public class SamoaDatumReader extends ReflectDatumReader { - - private static Logger logger = LoggerFactory.getLogger(SamoaDatumReader.class); - - public SamoaDatumReader() { - super(); - } - - /** Construct for reading instances of a class. */ - public SamoaDatumReader(Class c) { - super(c); - } - - /** Construct where the writer's and reader's schemas are the same. */ - public SamoaDatumReader(Schema root) { - super(root); - } - - /** Construct given writer's and reader's schema. */ - public SamoaDatumReader(Schema writer, Schema reader) { - super(writer, reader); - } - - /** Construct given writer's and reader's schema and the data model. */ - public SamoaDatumReader(Schema writer, Schema reader, ReflectData data) { - super(writer, reader, data); - } - - /** Construct given a {@link ReflectData}. */ - public SamoaDatumReader(ReflectData data) { - super(data); - } - - @Override - /** - * Called to read a record instance. Overridden to read InstanceData. - */ - protected Object readRecord(Object old, Schema expected, ResolvingDecoder in) throws IOException { - Object r = getData().newRecord(old, expected); - Object state = null; - - for (Field f : in.readFieldOrder()) { - int pos = f.pos(); - String name = f.name(); - Object oldDatum = null; - if (r instanceof DenseInstanceData) { - r = readDenseInstanceData(r, f, oldDatum, in, state); - } else if (r instanceof SparseInstanceData) { - r = readSparseInstanceData(r, f, oldDatum, in, state); - } else - readField(r, f, oldDatum, in, state); - } - - return r; - } - - private Object readDenseInstanceData(Object record, Field f, Object oldDatum, ResolvingDecoder in, Object state) - throws IOException { - if (f.name().equals("attributeValues")) { - Array atributes = (Array) read(oldDatum, f.schema(), in); - double[] atributesArr = new double[atributes.size()]; - for (int i = 0; i < atributes.size(); i++) { - atributesArr[i] = (double) atributes.get(i); - } - return new DenseInstanceData(atributesArr); - } - return null; - } - - private Object readSparseInstanceData(Object record, Field f, Object oldDatum, ResolvingDecoder in, Object state) - throws IOException { - if(f.name().equals("attributeValues")) { - Array atributes = (Array) read(oldDatum, f.schema(), in); - double[] atributesArr = new double[atributes.size()]; - for (int i = 0; i < atributes.size(); i++) - atributesArr[i] = (double) atributes.get(i); - ((SparseInstanceData)record).setAttributeValues(atributesArr); - } - if(f.name().equals("indexValues")) { - Array indexValues = (Array) read(oldDatum, f.schema(), in); - int[] indexValuesArr = new int[indexValues.size()]; - for (int i = 0; i < indexValues.size(); i++) { - indexValuesArr[i] = (int) indexValues.get(i); - } - ((SparseInstanceData)record).setIndexValues(indexValuesArr); - } - return record; - } - -} diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/AvroSerializerDeserializerTest.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/AvroSerializerDeserializerTest.java deleted file mode 100644 index 1a1a7180..00000000 --- a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/AvroSerializerDeserializerTest.java +++ /dev/null @@ -1,70 +0,0 @@ -package org.apache.samoa.streams.kafka; - -import static org.junit.Assert.assertTrue; - -import java.util.Random; -import java.util.logging.Logger; - -import org.apache.samoa.instances.InstancesHeader; -import org.apache.samoa.learners.InstanceContentEvent; -import org.apache.samoa.streams.kafka.KafkaAvroMapper; -import org.junit.Test; - -public class AvroSerializerDeserializerTest { - - private Logger logger = Logger.getLogger(AvroSerializerDeserializerTest.class.getName()); - public AvroSerializerDeserializerTest() {} - - @Test - public void testAvroSerialize() { - Random r = new Random(); - InstancesHeader header = TestUtilsForKafka.generateHeader(10); - InstanceContentEvent eventToSerialize = TestUtilsForKafka.getData(r, 10, header); - byte[] data = KafkaAvroMapper.avroSerialize(InstanceContentEvent.class, eventToSerialize); - - InstanceContentEvent eventDeserialized = KafkaAvroMapper.avroDeserialize(data, InstanceContentEvent.class); - - assertTrue("Serialized and deserialized event", isEqual(eventToSerialize, eventDeserialized)); - - } - - public boolean isEqual(InstanceContentEvent a, InstanceContentEvent b) { - if(a.getClassId() != b.getClassId()) { - logger.info("a.getClassId() != b.getClassId(): " + (a.getClassId() != b.getClassId())); - return false; - } - if(a.isLastEvent() != b.isLastEvent()) { - logger.info("a.isLastEvent() != b.isLastEvent(): " + (a.isLastEvent() != b.isLastEvent())); - return false; - } - if(a.isTesting() != b.isTesting()) { - logger.info("a.isTesting() != b.isTesting(): " + (a.isTesting() != b.isTesting())); - return false; - } - if(a.isTraining() != b.isTraining()) { - logger.info("a.isTraining() != b.isTraining(): " + (a.isTraining() != b.isTraining())); - return false; - } - if(a.getClassifierIndex() != b.getClassifierIndex()) { - logger.info("a.getClassifierIndex() != b.getClassifierIndex(): " + (a.getClassifierIndex() != b.getClassifierIndex())); - return false; - } - if(a.getEvaluationIndex() != b.getEvaluationIndex()) { - logger.info("a.getEvaluationIndex() != b.getEvaluationIndex(): " + (a.getEvaluationIndex() != b.getEvaluationIndex())); - return false; - } - if(a.getInstanceIndex() != b.getInstanceIndex()) { - logger.info("a.getInstanceIndex() != b.getInstanceIndex(): " + (a.getInstanceIndex() != b.getInstanceIndex())); - return false; - } - if(!a.getInstance().toString().equals(b.getInstance().toString())) { - logger.info("a.getInstance().toString()!= b.getInstance().toString(): " + (a.getInstance().toString()!= b.getInstance().toString())); - logger.info("a.toString(): " + a.getInstance().toString()); - logger.info("b.toString(): " + b.getInstance().toString()); - return false; - } - - return true; - } - -} diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessorTest.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessorTest.java index bf45ffba..2d594569 100644 --- a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessorTest.java +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaDestinationProcessorTest.java @@ -135,7 +135,7 @@ public void testSendingData() throws InterruptedException, ExecutionException, T final Logger logger = Logger.getLogger(KafkaDestinationProcessorTest.class.getName()); Properties props = TestUtilsForKafka.getProducerProperties(BROKERHOST,BROKERPORT); props.setProperty("auto.offset.reset", "earliest"); - KafkaDestinationProcessor kdp = new KafkaDestinationProcessor(props, TOPIC, new KafkaJsonMapper(Charset.defaultCharset())); + KafkaDestinationProcessor kdp = new KafkaDestinationProcessor(props, TOPIC, new OosTestSerializer()); kdp.onCreate(1); final int[] i = {0}; diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorTest.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorTest.java index 933ba2a1..b8b5c72f 100644 --- a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorTest.java +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorTest.java @@ -138,80 +138,32 @@ public void tearDown() { } @Test - public void testFetchingNewDataWithJson() throws InterruptedException, ExecutionException, TimeoutException { + public void testFetchingNewData() throws InterruptedException, ExecutionException, TimeoutException { final Logger logger = Logger.getLogger(KafkaEntranceProcessorTest.class.getName()); - logger.log(Level.INFO, "JSON"); - logger.log(Level.INFO, "testFetchingNewDataWithJson"); + logger.log(Level.INFO, "OOS"); + logger.log(Level.INFO, "testFetchingNewData"); Properties props = TestUtilsForKafka.getConsumerProperties(BROKERHOST, BROKERPORT); props.setProperty("auto.offset.reset", "earliest"); - KafkaEntranceProcessor kep = new KafkaEntranceProcessor(props, TOPIC_JSON, TIMEOUT, new KafkaJsonMapper(Charset.defaultCharset())); + KafkaEntranceProcessor kep = new KafkaEntranceProcessor(props, TOPIC_JSON, TIMEOUT, new OosTestSerializer()); kep.onCreate(1); - + // prepare new thread for data producing Thread th = new Thread(new Runnable() { @Override public void run() { - KafkaProducer producer = new KafkaProducer<>(TestUtilsForKafka.getProducerProperties(BROKERHOST,BROKERPORT)); + KafkaProducer producer = new KafkaProducer<>(TestUtilsForKafka.getProducerProperties(BROKERHOST, BROKERPORT)); Random r = new Random(); InstancesHeader header = TestUtilsForKafka.generateHeader(10); - Gson gson = new Gson(); + OosTestSerializer serializer = new OosTestSerializer(); int i = 0; for (i = 0; i < NUM_INSTANCES; i++) { try { InstanceContentEvent event = TestUtilsForKafka.getData(r, 10, header); - - ProducerRecord record = new ProducerRecord(TOPIC_JSON, gson.toJson(event).getBytes()); - long stat = producer.send(record).get(10, TimeUnit.SECONDS).offset(); - } catch (InterruptedException | ExecutionException | TimeoutException ex) { - Logger.getLogger(KafkaEntranceProcessorTest.class.getName()).log(Level.SEVERE, null, ex); - } - } - producer.flush(); - producer.close(); - } - }); - th.start(); - int z = 0; - while (z < NUM_INSTANCES && kep.hasNext()) { - InstanceContentEvent event = (InstanceContentEvent) kep.nextEvent(); - z++; - } - - assertEquals("Number of sent and received instances", NUM_INSTANCES, z); - - } - - @Test - public void testFetchingNewDataWithAvro() throws InterruptedException, ExecutionException, TimeoutException { - Logger logger = Logger.getLogger(KafkaEntranceProcessorTest.class.getName()); - logger.log(Level.INFO, "AVRO"); - logger.log(Level.INFO, "testFetchingNewDataWithAvro"); - Properties props = TestUtilsForKafka.getConsumerProperties(BROKERHOST, BROKERPORT); - props.setProperty("auto.offset.reset", "earliest"); - KafkaEntranceProcessor kep = new KafkaEntranceProcessor(props, TOPIC_AVRO, TIMEOUT, new KafkaAvroMapper()); - kep.onCreate(1); - -// prepare new thread for data producing - Thread th = new Thread(new Runnable() { - @Override - public void run() { - KafkaProducer producer = new KafkaProducer<>(TestUtilsForKafka.getProducerProperties(BROKERHOST,BROKERPORT)); - - Random r = new Random(); - InstancesHeader header = TestUtilsForKafka.generateHeader(10); - - int i = 0; - for (i = 0; i < NUM_INSTANCES; i++) { - try { - byte[] data = KafkaAvroMapper.avroSerialize(InstanceContentEvent.class, TestUtilsForKafka.getData(r, 10, header)); - if (data == null) { - Logger.getLogger(KafkaEntranceProcessorTest.class.getName()).log(Level.INFO, "Serialize result: null ({0})", i); - } - ProducerRecord record = new ProducerRecord(TOPIC_AVRO, data); + ProducerRecord record = new ProducerRecord(TOPIC_JSON, serializer.serialize(event)); long stat = producer.send(record).get(10, TimeUnit.SECONDS).offset(); } catch (InterruptedException | ExecutionException | TimeoutException ex) { Logger.getLogger(KafkaEntranceProcessorTest.class.getName()).log(Level.SEVERE, null, ex); @@ -227,9 +179,9 @@ public void run() { while (z < NUM_INSTANCES && kep.hasNext()) { InstanceContentEvent event = (InstanceContentEvent) kep.nextEvent(); z++; -// logger.log(Level.INFO, "{0} {1}", new Object[]{z, event.getInstance().toString()}); } assertEquals("Number of sent and received instances", NUM_INSTANCES, z); + } } diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaTaskTest.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaTaskTest.java index 08aae11c..4215b086 100644 --- a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaTaskTest.java +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaTaskTest.java @@ -1,170 +1,157 @@ -/* - * Copyright 2017 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.samoa.streams.kafka; - -import com.google.gson.Gson; -import java.io.IOException; -import java.nio.charset.Charset; -import java.nio.file.Files; -import java.util.Properties; -import java.util.Random; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; -import java.util.logging.Level; -import java.util.logging.Logger; - -import org.I0Itec.zkclient.ZkClient; -import org.apache.kafka.common.utils.Time; -import org.apache.samoa.streams.kafka.topology.SimpleComponentFactory; -import org.apache.samoa.streams.kafka.topology.SimpleEngine; -import org.junit.After; -import org.junit.AfterClass; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Ignore; -import org.junit.Test; - -import kafka.admin.AdminUtils; -import kafka.admin.RackAwareMode; -import kafka.server.KafkaConfig; -import kafka.server.KafkaServer; -import kafka.utils.MockTime; -import kafka.utils.TestUtils; -import kafka.utils.ZKStringSerializer$; -import kafka.utils.ZkUtils; -import kafka.zk.EmbeddedZookeeper; -import org.apache.kafka.clients.producer.KafkaProducer; -import org.apache.kafka.clients.producer.ProducerRecord; -import org.apache.samoa.instances.InstancesHeader; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2017 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -/** -* -* @author Jakub Jankowski -*/ -@Ignore -public class KafkaTaskTest { - - private static final String ZKHOST = "127.0.0.1";//10.255.251.202"; //10.255.251.202 - private static final String BROKERHOST = "127.0.0.1";//"10.255.251.214"; //10.255.251.214 - private static final String BROKERPORT = "9092"; //6667, local: 9092 - private static final String TOPIC = "samoa_test"; //samoa_test, local: test - private static final int NUM_INSTANCES = 125922; - - - private static KafkaServer kafkaServer; - private static EmbeddedZookeeper zkServer; - private static ZkClient zkClient; - private static String zkConnect; - - @BeforeClass - public static void setUpClass() throws IOException { - // setup Zookeeper -// zkServer = new EmbeddedZookeeper(); -// zkConnect = ZKHOST + ":" + "2181"; //+ zkServer.port(); -// zkClient = new ZkClient(zkConnect, 30000, 30000, ZKStringSerializer$.MODULE$); -// ZkUtils zkUtils = ZkUtils.apply(zkClient, false); - - // setup Broker - /*Properties brokerProps = new Properties(); - brokerProps.setProperty("zookeeper.connect", zkConnect); - brokerProps.setProperty("broker.id", "0"); - brokerProps.setProperty("log.dirs", Files.createTempDirectory("kafka-").toAbsolutePath().toString()); - brokerProps.setProperty("listeners", "PLAINTEXT://" + BROKERHOST + ":" + BROKERPORT); - KafkaConfig config = new KafkaConfig(brokerProps); - Time mock = new MockTime(); - kafkaServer = TestUtils.createServer(config, mock);*/ - - // create topic - //AdminUtils.createTopic(zkUtils, TOPIC, 1, 1, new Properties(), RackAwareMode.Disabled$.MODULE$); - } - - @AfterClass - public static void tearDownClass() { - //kafkaServer.shutdown(); -// zkClient.close(); -// zkServer.shutdown(); - } - - @Before - public void setUp() throws IOException { - - } - - @After - public void tearDown() { - - } - - @Test - public void testKafkaTask() throws InterruptedException, ExecutionException, TimeoutException { - Logger logger = Logger.getLogger(KafkaTaskTest.class.getName()); - logger.log(Level.INFO, "KafkaTask"); - Properties producerProps = TestUtilsForKafka.getProducerProperties(BROKERHOST,BROKERPORT); - Properties consumerProps = TestUtilsForKafka.getConsumerProperties(BROKERHOST,BROKERPORT); - - KafkaTask task = new KafkaTask(producerProps, consumerProps, "kafkaTaskTest", 10000, new KafkaJsonMapper(Charset.defaultCharset()), new KafkaJsonMapper(Charset.defaultCharset())); - task.setFactory(new SimpleComponentFactory()); - task.init(); - SimpleEngine.submitTopology(task.getTopology()); - - Thread th = new Thread(new Runnable() { - @Override - public void run() { - KafkaProducer producer = new KafkaProducer<>(TestUtilsForKafka.getProducerProperties(BROKERHOST,BROKERPORT)); - - Random r = new Random(); - InstancesHeader header = TestUtilsForKafka.generateHeader(10); - Gson gson = new Gson(); - int i = 0; - for (i = 0; i < NUM_INSTANCES; i++) { - try { - ProducerRecord record = new ProducerRecord(TOPIC, gson.toJson(TestUtilsForKafka.getData(r, 10, header)).getBytes()); - long stat = producer.send(record).get(10, TimeUnit.DAYS).offset(); -// Thread.sleep(5); - Logger.getLogger(KafkaEntranceProcessorTest.class.getName()).log(Level.INFO, "Sent message with ID={0} to Kafka!, offset={1}", new Object[]{i, stat}); - } catch (InterruptedException | ExecutionException | TimeoutException ex) { - Logger.getLogger(KafkaEntranceProcessorTest.class.getName()).log(Level.SEVERE, null, ex); - } - } - producer.flush(); - producer.close(); - } - }); - th.start(); - - } -} +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.samoa.streams.kafka; + +import java.io.IOException; +import java.util.Properties; +import java.util.Random; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.logging.Level; +import java.util.logging.Logger; + + +import org.I0Itec.zkclient.ZkClient; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Ignore; +import org.junit.Test; + +import kafka.server.KafkaServer; +import kafka.zk.EmbeddedZookeeper; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.streams.kafka.topology.SimpleComponentFactory; +import org.apache.samoa.streams.kafka.topology.SimpleEngine; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2017 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +/** + * + * @author Jakub Jankowski + */ +@Ignore +public class KafkaTaskTest { + + private static final String ZKHOST = "127.0.0.1";//10.255.251.202"; //10.255.251.202 + private static final String BROKERHOST = "127.0.0.1";//"10.255.251.214"; //10.255.251.214 + private static final String BROKERPORT = "9092"; //6667, local: 9092 + private static final String TOPIC = "samoa_test"; //samoa_test, local: test + private static final int NUM_INSTANCES = 125922; + + private static KafkaServer kafkaServer; + private static EmbeddedZookeeper zkServer; + private static ZkClient zkClient; + private static String zkConnect; + + @BeforeClass + public static void setUpClass() throws IOException { + // setup Zookeeper +// zkServer = new EmbeddedZookeeper(); +// zkConnect = ZKHOST + ":" + "2181"; //+ zkServer.port(); +// zkClient = new ZkClient(zkConnect, 30000, 30000, ZKStringSerializer$.MODULE$); +// ZkUtils zkUtils = ZkUtils.apply(zkClient, false); + + // setup Broker + /*Properties brokerProps = new Properties(); + brokerProps.setProperty("zookeeper.connect", zkConnect); + brokerProps.setProperty("broker.id", "0"); + brokerProps.setProperty("log.dirs", Files.createTempDirectory("kafka-").toAbsolutePath().toString()); + brokerProps.setProperty("listeners", "PLAINTEXT://" + BROKERHOST + ":" + BROKERPORT); + KafkaConfig config = new KafkaConfig(brokerProps); + Time mock = new MockTime(); + kafkaServer = TestUtils.createServer(config, mock);*/ + // create topic + //AdminUtils.createTopic(zkUtils, TOPIC, 1, 1, new Properties(), RackAwareMode.Disabled$.MODULE$); + } + + @AfterClass + public static void tearDownClass() { + //kafkaServer.shutdown(); +// zkClient.close(); +// zkServer.shutdown(); + } + + @Before + public void setUp() throws IOException { + + } + + @After + public void tearDown() { + + } + + @Test + public void testKafkaTask() throws InterruptedException, ExecutionException, TimeoutException { + Logger logger = Logger.getLogger(KafkaTaskTest.class.getName()); + logger.log(Level.INFO, "KafkaTask"); + Properties producerProps = TestUtilsForKafka.getProducerProperties(BROKERHOST, BROKERPORT); + Properties consumerProps = TestUtilsForKafka.getConsumerProperties(BROKERHOST, BROKERPORT); + + KafkaTask task = new KafkaTask(producerProps, consumerProps, "kafkaTaskTest", 10000, new OosTestSerializer(), new OosTestSerializer()); + task.setFactory(new SimpleComponentFactory()); + task.init(); + SimpleEngine.submitTopology(task.getTopology()); + + Thread th = new Thread(new Runnable() { + @Override + public void run() { + KafkaProducer producer = new KafkaProducer<>(TestUtilsForKafka.getProducerProperties(BROKERHOST, BROKERPORT)); + + Random r = new Random(); + InstancesHeader header = TestUtilsForKafka.generateHeader(10); + OosTestSerializer serializer = new OosTestSerializer(); + int i = 0; + for (i = 0; i < NUM_INSTANCES; i++) { + try { + ProducerRecord record = new ProducerRecord(TOPIC, serializer.serialize(TestUtilsForKafka.getData(r, 10, header))); + long stat = producer.send(record).get(10, TimeUnit.DAYS).offset(); +// Thread.sleep(5); + Logger.getLogger(KafkaEntranceProcessorTest.class.getName()).log(Level.INFO, "Sent message with ID={0} to Kafka!, offset={1}", new Object[]{i, stat}); + } catch (InterruptedException | ExecutionException | TimeoutException ex) { + Logger.getLogger(KafkaEntranceProcessorTest.class.getName()).log(Level.SEVERE, null, ex); + } + } + producer.flush(); + producer.close(); + } + }); + th.start(); + + } +} diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/OosTestSerializer.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/OosTestSerializer.java new file mode 100644 index 00000000..649d3e01 --- /dev/null +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/OosTestSerializer.java @@ -0,0 +1,60 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.samoa.streams.kafka; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.apache.samoa.learners.InstanceContentEvent; + +/** + * + * @author Piotr Wawrzyniak + */ +public class OosTestSerializer implements KafkaDeserializer, KafkaSerializer { + + @Override + public InstanceContentEvent deserialize(byte[] message) { + try { + ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(message)); + InstanceContentEvent ice = (InstanceContentEvent)ois.readObject(); + return ice; + } catch (IOException | ClassNotFoundException ex) { + Logger.getLogger(OosTestSerializer.class.getName()).log(Level.SEVERE, null, ex); + } + return null; + } + + @Override + public byte[] serialize(InstanceContentEvent message) { + try { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + ObjectOutputStream oos = new ObjectOutputStream(baos); + oos.writeObject(message); + oos.flush(); + return baos.toByteArray(); + } catch (IOException ex) { + Logger.getLogger(OosTestSerializer.class.getName()).log(Level.SEVERE, null, ex); + } + return null; + } + + +} diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleComponentFactory.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/topology/SimpleComponentFactory.java similarity index 97% rename from samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleComponentFactory.java rename to samoa-api/src/test/java/org/apache/samoa/streams/kafka/topology/SimpleComponentFactory.java index 155ce1f4..202833ea 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleComponentFactory.java +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/topology/SimpleComponentFactory.java @@ -1,53 +1,53 @@ -package org.apache.samoa.streams.kafka.topology; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import org.apache.samoa.core.EntranceProcessor; -import org.apache.samoa.core.Processor; -import org.apache.samoa.topology.ComponentFactory; -import org.apache.samoa.topology.EntranceProcessingItem; -import org.apache.samoa.topology.IProcessingItem; -import org.apache.samoa.topology.ProcessingItem; -import org.apache.samoa.topology.Stream; -import org.apache.samoa.topology.Topology; - -public class SimpleComponentFactory implements ComponentFactory { - - public ProcessingItem createPi(Processor processor, int paralellism) { - return new SimpleProcessingItem(processor, paralellism); - } - - public ProcessingItem createPi(Processor processor) { - return this.createPi(processor, 1); - } - - public EntranceProcessingItem createEntrancePi(EntranceProcessor processor) { - return new SimpleEntranceProcessingItem(processor); - } - - public Stream createStream(IProcessingItem sourcePi) { - return new SimpleStream(sourcePi); - } - - public Topology createTopology(String topoName) { - return new SimpleTopology(topoName); - } -} +package org.apache.samoa.streams.kafka.topology; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.core.EntranceProcessor; +import org.apache.samoa.core.Processor; +import org.apache.samoa.topology.ComponentFactory; +import org.apache.samoa.topology.EntranceProcessingItem; +import org.apache.samoa.topology.IProcessingItem; +import org.apache.samoa.topology.ProcessingItem; +import org.apache.samoa.topology.Stream; +import org.apache.samoa.topology.Topology; + +public class SimpleComponentFactory implements ComponentFactory { + + public ProcessingItem createPi(Processor processor, int paralellism) { + return new SimpleProcessingItem(processor, paralellism); + } + + public ProcessingItem createPi(Processor processor) { + return this.createPi(processor, 1); + } + + public EntranceProcessingItem createEntrancePi(EntranceProcessor processor) { + return new SimpleEntranceProcessingItem(processor); + } + + public Stream createStream(IProcessingItem sourcePi) { + return new SimpleStream(sourcePi); + } + + public Topology createTopology(String topoName) { + return new SimpleTopology(topoName); + } +} diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleEngine.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/topology/SimpleEngine.java similarity index 96% rename from samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleEngine.java rename to samoa-api/src/test/java/org/apache/samoa/streams/kafka/topology/SimpleEngine.java index d446018b..338444b7 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleEngine.java +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/topology/SimpleEngine.java @@ -1,37 +1,37 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ -package org.apache.samoa.streams.kafka.topology; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import org.apache.samoa.topology.Topology; - -public class SimpleEngine { - - public static void submitTopology(Topology topology) { - SimpleTopology simpleTopology = (SimpleTopology) topology; - simpleTopology.run(); - // runs until completion - } - -} +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ +package org.apache.samoa.streams.kafka.topology; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.topology.Topology; + +public class SimpleEngine { + + public static void submitTopology(Topology topology) { + SimpleTopology simpleTopology = (SimpleTopology) topology; + simpleTopology.run(); + // runs until completion + } + +} diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleEntranceProcessingItem.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/topology/SimpleEntranceProcessingItem.java similarity index 97% rename from samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleEntranceProcessingItem.java rename to samoa-api/src/test/java/org/apache/samoa/streams/kafka/topology/SimpleEntranceProcessingItem.java index 4c626dc2..26ed4710 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleEntranceProcessingItem.java +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/topology/SimpleEntranceProcessingItem.java @@ -1,33 +1,33 @@ -package org.apache.samoa.streams.kafka.topology; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import org.apache.samoa.core.EntranceProcessor; -import org.apache.samoa.topology.LocalEntranceProcessingItem; - -class SimpleEntranceProcessingItem extends LocalEntranceProcessingItem { - public SimpleEntranceProcessingItem(EntranceProcessor processor) { - super(processor); - } - - // The default waiting time when there is no available events is 100ms - // Override waitForNewEvents() to change it -} +package org.apache.samoa.streams.kafka.topology; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.core.EntranceProcessor; +import org.apache.samoa.topology.LocalEntranceProcessingItem; + +class SimpleEntranceProcessingItem extends LocalEntranceProcessingItem { + public SimpleEntranceProcessingItem(EntranceProcessor processor) { + super(processor); + } + + // The default waiting time when there is no available events is 100ms + // Override waitForNewEvents() to change it +} diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleProcessingItem.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/topology/SimpleProcessingItem.java similarity index 97% rename from samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleProcessingItem.java rename to samoa-api/src/test/java/org/apache/samoa/streams/kafka/topology/SimpleProcessingItem.java index 3549b856..bac03981 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleProcessingItem.java +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/topology/SimpleProcessingItem.java @@ -1,87 +1,87 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ -package org.apache.samoa.streams.kafka.topology; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import org.apache.samoa.core.ContentEvent; -import org.apache.samoa.core.Processor; -import org.apache.samoa.topology.AbstractProcessingItem; -import org.apache.samoa.topology.IProcessingItem; -import org.apache.samoa.topology.ProcessingItem; -import org.apache.samoa.topology.Stream; -import org.apache.samoa.utils.PartitioningScheme; -import org.apache.samoa.utils.StreamDestination; - -/** - * - * @author abifet - */ -class SimpleProcessingItem extends AbstractProcessingItem { - private IProcessingItem[] arrayProcessingItem; - - SimpleProcessingItem(Processor processor) { - super(processor); - } - - SimpleProcessingItem(Processor processor, int parallelism) { - super(processor); - this.setParallelism(parallelism); - } - - public IProcessingItem getProcessingItem(int i) { - return arrayProcessingItem[i]; - } - - @Override - protected ProcessingItem addInputStream(Stream inputStream, PartitioningScheme scheme) { - StreamDestination destination = new StreamDestination(this, this.getParallelism(), scheme); - ((SimpleStream) inputStream).addDestination(destination); - return this; - } - - public SimpleProcessingItem copy() { - Processor processor = this.getProcessor(); - return new SimpleProcessingItem(processor.newProcessor(processor)); - } - - public void processEvent(ContentEvent event, int counter) { - - int parallelism = this.getParallelism(); - // System.out.println("Process event "+event+" (isLast="+event.isLastEvent()+") with counter="+counter+" while parallelism="+parallelism); - if (this.arrayProcessingItem == null && parallelism > 0) { - // Init processing elements, the first time they are needed - this.arrayProcessingItem = new IProcessingItem[parallelism]; - for (int j = 0; j < parallelism; j++) { - arrayProcessingItem[j] = this.copy(); - arrayProcessingItem[j].getProcessor().onCreate(j); - } - } - if (this.arrayProcessingItem != null) { - IProcessingItem pi = this.getProcessingItem(counter); - Processor p = pi.getProcessor(); - // System.out.println("PI="+pi+", p="+p); - this.getProcessingItem(counter).getProcessor().process(event); - } - } -} +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ +package org.apache.samoa.streams.kafka.topology; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.core.ContentEvent; +import org.apache.samoa.core.Processor; +import org.apache.samoa.topology.AbstractProcessingItem; +import org.apache.samoa.topology.IProcessingItem; +import org.apache.samoa.topology.ProcessingItem; +import org.apache.samoa.topology.Stream; +import org.apache.samoa.utils.PartitioningScheme; +import org.apache.samoa.utils.StreamDestination; + +/** + * + * @author abifet + */ +class SimpleProcessingItem extends AbstractProcessingItem { + private IProcessingItem[] arrayProcessingItem; + + SimpleProcessingItem(Processor processor) { + super(processor); + } + + SimpleProcessingItem(Processor processor, int parallelism) { + super(processor); + this.setParallelism(parallelism); + } + + public IProcessingItem getProcessingItem(int i) { + return arrayProcessingItem[i]; + } + + @Override + protected ProcessingItem addInputStream(Stream inputStream, PartitioningScheme scheme) { + StreamDestination destination = new StreamDestination(this, this.getParallelism(), scheme); + ((SimpleStream) inputStream).addDestination(destination); + return this; + } + + public SimpleProcessingItem copy() { + Processor processor = this.getProcessor(); + return new SimpleProcessingItem(processor.newProcessor(processor)); + } + + public void processEvent(ContentEvent event, int counter) { + + int parallelism = this.getParallelism(); + // System.out.println("Process event "+event+" (isLast="+event.isLastEvent()+") with counter="+counter+" while parallelism="+parallelism); + if (this.arrayProcessingItem == null && parallelism > 0) { + // Init processing elements, the first time they are needed + this.arrayProcessingItem = new IProcessingItem[parallelism]; + for (int j = 0; j < parallelism; j++) { + arrayProcessingItem[j] = this.copy(); + arrayProcessingItem[j].getProcessor().onCreate(j); + } + } + if (this.arrayProcessingItem != null) { + IProcessingItem pi = this.getProcessingItem(counter); + Processor p = pi.getProcessor(); + // System.out.println("PI="+pi+", p="+p); + this.getProcessingItem(counter).getProcessor().process(event); + } + } +} diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleStream.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/topology/SimpleStream.java similarity index 96% rename from samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleStream.java rename to samoa-api/src/test/java/org/apache/samoa/streams/kafka/topology/SimpleStream.java index 269e0cc4..8405463c 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleStream.java +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/topology/SimpleStream.java @@ -1,95 +1,95 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ -package org.apache.samoa.streams.kafka.topology; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import java.util.LinkedList; -import java.util.List; - -import org.apache.commons.lang3.builder.HashCodeBuilder; -import org.apache.samoa.core.ContentEvent; -import org.apache.samoa.topology.AbstractStream; -import org.apache.samoa.topology.IProcessingItem; -import org.apache.samoa.utils.StreamDestination; - -/** - * - * @author abifet - */ -class SimpleStream extends AbstractStream { - private List destinations; - private int maxCounter; - private int eventCounter; - - SimpleStream(IProcessingItem sourcePi) { - super(sourcePi); - this.destinations = new LinkedList<>(); - this.eventCounter = 0; - this.maxCounter = 1; - } - - private int getNextCounter() { - if (maxCounter > 0 && eventCounter >= maxCounter) - eventCounter = 0; - this.eventCounter++; - return this.eventCounter; - } - - @Override - public void put(ContentEvent event) { - this.put(event, this.getNextCounter()); - } - - private void put(ContentEvent event, int counter) { - SimpleProcessingItem pi; - int parallelism; - for (StreamDestination destination : destinations) { - pi = (SimpleProcessingItem) destination.getProcessingItem(); - parallelism = destination.getParallelism(); - switch (destination.getPartitioningScheme()) { - case SHUFFLE: - pi.processEvent(event, counter % parallelism); - break; - case GROUP_BY_KEY: - HashCodeBuilder hb = new HashCodeBuilder(); - hb.append(event.getKey()); - int key = hb.build() % parallelism; - pi.processEvent(event, key); - break; - case BROADCAST: - for (int p = 0; p < parallelism; p++) { - pi.processEvent(event, p); - } - break; - } - } - } - - public void addDestination(StreamDestination destination) { - this.destinations.add(destination); - if (maxCounter <= 0) - maxCounter = 1; - maxCounter *= destination.getParallelism(); - } -} +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ +package org.apache.samoa.streams.kafka.topology; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.LinkedList; +import java.util.List; + +import org.apache.commons.lang3.builder.HashCodeBuilder; +import org.apache.samoa.core.ContentEvent; +import org.apache.samoa.topology.AbstractStream; +import org.apache.samoa.topology.IProcessingItem; +import org.apache.samoa.utils.StreamDestination; + +/** + * + * @author abifet + */ +class SimpleStream extends AbstractStream { + private List destinations; + private int maxCounter; + private int eventCounter; + + SimpleStream(IProcessingItem sourcePi) { + super(sourcePi); + this.destinations = new LinkedList<>(); + this.eventCounter = 0; + this.maxCounter = 1; + } + + private int getNextCounter() { + if (maxCounter > 0 && eventCounter >= maxCounter) + eventCounter = 0; + this.eventCounter++; + return this.eventCounter; + } + + @Override + public void put(ContentEvent event) { + this.put(event, this.getNextCounter()); + } + + private void put(ContentEvent event, int counter) { + SimpleProcessingItem pi; + int parallelism; + for (StreamDestination destination : destinations) { + pi = (SimpleProcessingItem) destination.getProcessingItem(); + parallelism = destination.getParallelism(); + switch (destination.getPartitioningScheme()) { + case SHUFFLE: + pi.processEvent(event, counter % parallelism); + break; + case GROUP_BY_KEY: + HashCodeBuilder hb = new HashCodeBuilder(); + hb.append(event.getKey()); + int key = hb.build() % parallelism; + pi.processEvent(event, key); + break; + case BROADCAST: + for (int p = 0; p < parallelism; p++) { + pi.processEvent(event, p); + } + break; + } + } + } + + public void addDestination(StreamDestination destination) { + this.destinations.add(destination); + if (maxCounter <= 0) + maxCounter = 1; + maxCounter *= destination.getParallelism(); + } +} diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleTopology.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/topology/SimpleTopology.java similarity index 97% rename from samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleTopology.java rename to samoa-api/src/test/java/org/apache/samoa/streams/kafka/topology/SimpleTopology.java index 98dd7a5d..d298b695 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/topology/SimpleTopology.java +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/topology/SimpleTopology.java @@ -1,46 +1,46 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ -package org.apache.samoa.streams.kafka.topology; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import org.apache.samoa.topology.AbstractTopology; - -public class SimpleTopology extends AbstractTopology { - SimpleTopology(String name) { - super(name); - } - - public void run() { - if (this.getEntranceProcessingItems() == null) - throw new IllegalStateException("You need to set entrance PI before running the topology."); - if (this.getEntranceProcessingItems().size() != 1) - throw new IllegalStateException("SimpleTopology supports 1 entrance PI only. Number of entrance PIs is " - + this.getEntranceProcessingItems().size()); - - SimpleEntranceProcessingItem entrancePi = (SimpleEntranceProcessingItem) this.getEntranceProcessingItems() - .toArray()[0]; - entrancePi.getProcessor().onCreate(0); // id=0 as it is not used in simple mode - entrancePi.startSendingEvents(); - } -} +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ +package org.apache.samoa.streams.kafka.topology; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.topology.AbstractTopology; + +public class SimpleTopology extends AbstractTopology { + SimpleTopology(String name) { + super(name); + } + + public void run() { + if (this.getEntranceProcessingItems() == null) + throw new IllegalStateException("You need to set entrance PI before running the topology."); + if (this.getEntranceProcessingItems().size() != 1) + throw new IllegalStateException("SimpleTopology supports 1 entrance PI only. Number of entrance PIs is " + + this.getEntranceProcessingItems().size()); + + SimpleEntranceProcessingItem entrancePi = (SimpleEntranceProcessingItem) this.getEntranceProcessingItems() + .toArray()[0]; + entrancePi.getProcessor().onCreate(0); // id=0 as it is not used in simple mode + entrancePi.startSendingEvents(); + } +} From 04a28bb5ecbc039f07c17a91d57859c4651f56d9 Mon Sep 17 00:00:00 2001 From: pwawrzyniak Date: Wed, 7 Jun 2017 12:50:15 +0200 Subject: [PATCH 16/17] JSON mapper with test --- .../samoa/streams/kafka/KafkaJsonMapper.java | 106 ++++++++++ .../KafkaEntranceProcessorWithJsonTest.java | 187 ++++++++++++++++++ 2 files changed, 293 insertions(+) create mode 100644 samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaJsonMapper.java create mode 100644 samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorWithJsonTest.java diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaJsonMapper.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaJsonMapper.java new file mode 100644 index 00000000..2ac3e04f --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaJsonMapper.java @@ -0,0 +1,106 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.samoa.streams.kafka; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2017 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.gson.InstanceCreator; +import com.google.gson.JsonDeserializationContext; +import com.google.gson.JsonDeserializer; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import com.google.gson.JsonParseException; +import java.lang.reflect.Type; +import java.nio.charset.Charset; +import org.apache.samoa.instances.DenseInstanceData; +import org.apache.samoa.instances.InstanceData; +import org.apache.samoa.learners.InstanceContentEvent; + +/** + * Sample class for serializing and deserializing {@link InstanceContentEvent} + * from/to JSON format + * + * @author pwawrzyniak + * @version 0.5.0-incubating-SNAPSHOT + * @since 0.5.0-incubating + */ +public class KafkaJsonMapper implements KafkaDeserializer, KafkaSerializer { + + private final transient Gson gson; + private final Charset charset; + + /** + * Class constructor + * + * @param charset Charset to be used for bytes parsing + */ + public KafkaJsonMapper(Charset charset) { + this.gson = new GsonBuilder().registerTypeAdapter(InstanceData.class, new InstanceDataCustomDeserializer()).create(); + this.charset = charset; + } + + @Override + public InstanceContentEvent deserialize(byte[] message) { + return gson.fromJson(new String(message, this.charset), InstanceContentEvent.class); + } + + @Override + public byte[] serialize(InstanceContentEvent message) { + return gson.toJson(message).getBytes(this.charset); + } + + //Unused + public class InstanceDataCreator implements InstanceCreator { + + @Override + public InstanceData createInstance(Type type) { + return new DenseInstanceData(); + } + } + + public class InstanceDataCustomDeserializer implements JsonDeserializer { + + @Override + public DenseInstanceData deserialize(JsonElement je, Type type, JsonDeserializationContext jdc) throws JsonParseException { + double[] attributeValues = null; + JsonObject obj = (JsonObject) je; + attributeValues = jdc.deserialize(obj.get("attributeValues"), double[].class); + DenseInstanceData did = new DenseInstanceData(attributeValues); + return did; + } + + } + +} diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorWithJsonTest.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorWithJsonTest.java new file mode 100644 index 00000000..e18028bb --- /dev/null +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorWithJsonTest.java @@ -0,0 +1,187 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.samoa.streams.kafka; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2017 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +import com.google.gson.Gson; +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; +import java.util.Random; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.apache.samoa.learners.InstanceContentEvent; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import static org.junit.Assert.*; +import kafka.admin.AdminUtils; +import kafka.admin.RackAwareMode; +import kafka.server.KafkaConfig; +import kafka.server.KafkaServer; +import kafka.utils.MockTime; +import kafka.utils.TestUtils; +import org.apache.kafka.common.utils.Time; +import kafka.utils.ZKStringSerializer$; +import kafka.utils.ZkUtils; +import kafka.zk.EmbeddedZookeeper; +import org.I0Itec.zkclient.ZkClient; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.samoa.instances.InstancesHeader; + +/** + * + * @author pwawrzyniak + * @author Jakub Jankowski + */ +public class KafkaEntranceProcessorWithJsonTest { + + private static final String ZKHOST = "127.0.0.1"; + private static final String BROKERHOST = "127.0.0.1"; + private static final String BROKERPORT = "9092"; + private static final String TOPIC_AVRO = "samoa_test-avro"; + private static final String TOPIC_JSON = "samoa_test-json"; + private static final int NUM_INSTANCES = 11111; + + private static KafkaServer kafkaServer; + private static EmbeddedZookeeper zkServer; + private static ZkClient zkClient; + private static String zkConnect; + private static int TIMEOUT = 1000; + + public KafkaEntranceProcessorWithJsonTest() { + } + + @BeforeClass + public static void setUpClass() throws IOException { + // setup Zookeeper + zkServer = new EmbeddedZookeeper(); + zkConnect = ZKHOST + ":" + zkServer.port(); + zkClient = new ZkClient(zkConnect, 30000, 30000, ZKStringSerializer$.MODULE$); + ZkUtils zkUtils = ZkUtils.apply(zkClient, false); + + // setup Broker + Properties brokerProps = new Properties(); + brokerProps.setProperty("zookeeper.connect", zkConnect); + brokerProps.setProperty("broker.id", "0"); + brokerProps.setProperty("log.dirs", Files.createTempDirectory("kafka-").toAbsolutePath().toString()); + brokerProps.setProperty("listeners", "PLAINTEXT://" + BROKERHOST + ":" + BROKERPORT); + KafkaConfig config = new KafkaConfig(brokerProps); + Time mock = new MockTime(); + kafkaServer = TestUtils.createServer(config, mock); + + // create topics + AdminUtils.createTopic(zkUtils, TOPIC_AVRO, 1, 1, new Properties(), RackAwareMode.Disabled$.MODULE$); + AdminUtils.createTopic(zkUtils, TOPIC_JSON, 1, 1, new Properties(), RackAwareMode.Disabled$.MODULE$); + + } + + @AfterClass + public static void tearDownClass() { + try { + kafkaServer.shutdown(); + zkClient.close(); + zkServer.shutdown(); + } catch (Exception ex) { + Logger.getLogger(KafkaEntranceProcessorWithJsonTest.class.getName()).log(Level.SEVERE, null, ex); + } + } + + @Before + public void setUp() throws IOException { + + } + + @After + public void tearDown() { + + } + +@Test + public void testFetchingNewDataWithJson() throws InterruptedException, ExecutionException, TimeoutException { + + final Logger logger = Logger.getLogger(KafkaEntranceProcessorTest.class.getName()); + logger.log(Level.INFO, "JSON"); + logger.log(Level.INFO, "testFetchingNewDataWithJson"); + Properties props = TestUtilsForKafka.getConsumerProperties(BROKERHOST, BROKERPORT); + props.setProperty("auto.offset.reset", "earliest"); + KafkaEntranceProcessor kep = new KafkaEntranceProcessor(props, TOPIC_JSON, TIMEOUT, new KafkaJsonMapper(Charset.defaultCharset())); + + kep.onCreate(1); + + // prepare new thread for data producing + Thread th = new Thread(new Runnable() { + @Override + public void run() { + KafkaProducer producer = new KafkaProducer<>(TestUtilsForKafka.getProducerProperties(BROKERHOST,BROKERPORT)); + + Random r = new Random(); + InstancesHeader header = TestUtilsForKafka.generateHeader(10); + Gson gson = new Gson(); + int i = 0; + for (i = 0; i < NUM_INSTANCES; i++) { + try { + InstanceContentEvent event = TestUtilsForKafka.getData(r, 10, header); + + ProducerRecord record = new ProducerRecord(TOPIC_JSON, gson.toJson(event).getBytes()); + long stat = producer.send(record).get(10, TimeUnit.SECONDS).offset(); + } catch (InterruptedException | ExecutionException | TimeoutException ex) { + Logger.getLogger(KafkaEntranceProcessorTest.class.getName()).log(Level.SEVERE, null, ex); + } + } + producer.flush(); + producer.close(); + } + }); + th.start(); + + int z = 0; + while (z < NUM_INSTANCES && kep.hasNext()) { + InstanceContentEvent event = (InstanceContentEvent) kep.nextEvent(); + z++; + } + + assertEquals("Number of sent and received instances", NUM_INSTANCES, z); + + } +} From 1a2d170d8d7753f74e71a36d3f90f73a33c6c363 Mon Sep 17 00:00:00 2001 From: pwawrzyniak Date: Mon, 19 Jun 2017 13:25:13 +0200 Subject: [PATCH 17/17] Minor changes, code cleanup etc. --- .../samoa/streams/kafka/KafkaJsonMapper.java | 3 --- .../KafkaEntranceProcessorWithJsonTest.java | 25 +++++++------------ 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaJsonMapper.java b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaJsonMapper.java index 2ac3e04f..c514ac07 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaJsonMapper.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaJsonMapper.java @@ -1,5 +1,4 @@ /* - * Copyright 2017 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,8 +18,6 @@ * #%L * SAMOA * %% - * Copyright (C) 2014 - 2017 Apache Software Foundation - * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at diff --git a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorWithJsonTest.java b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorWithJsonTest.java index e18028bb..061bbf4e 100644 --- a/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorWithJsonTest.java +++ b/samoa-api/src/test/java/org/apache/samoa/streams/kafka/KafkaEntranceProcessorWithJsonTest.java @@ -1,5 +1,4 @@ /* - * Copyright 2017 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,8 +18,6 @@ * #%L * SAMOA * %% - * Copyright (C) 2014 - 2017 Apache Software Foundation - * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -38,8 +35,6 @@ import java.io.IOException; import java.nio.charset.Charset; import java.nio.file.Files; -import java.util.ArrayList; -import java.util.List; import java.util.Properties; import java.util.Random; import java.util.concurrent.ExecutionException; @@ -47,27 +42,27 @@ import java.util.concurrent.TimeoutException; import java.util.logging.Level; import java.util.logging.Logger; -import org.apache.samoa.learners.InstanceContentEvent; -import org.junit.After; -import org.junit.AfterClass; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Test; -import static org.junit.Assert.*; import kafka.admin.AdminUtils; import kafka.admin.RackAwareMode; import kafka.server.KafkaConfig; import kafka.server.KafkaServer; import kafka.utils.MockTime; import kafka.utils.TestUtils; -import org.apache.kafka.common.utils.Time; import kafka.utils.ZKStringSerializer$; import kafka.utils.ZkUtils; import kafka.zk.EmbeddedZookeeper; import org.I0Itec.zkclient.ZkClient; import org.apache.kafka.clients.producer.KafkaProducer; import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.common.utils.Time; import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.learners.InstanceContentEvent; +import org.junit.After; +import org.junit.AfterClass; +import static org.junit.Assert.*; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; /** * @@ -79,7 +74,6 @@ public class KafkaEntranceProcessorWithJsonTest { private static final String ZKHOST = "127.0.0.1"; private static final String BROKERHOST = "127.0.0.1"; private static final String BROKERPORT = "9092"; - private static final String TOPIC_AVRO = "samoa_test-avro"; private static final String TOPIC_JSON = "samoa_test-json"; private static final int NUM_INSTANCES = 11111; @@ -110,8 +104,7 @@ public static void setUpClass() throws IOException { Time mock = new MockTime(); kafkaServer = TestUtils.createServer(config, mock); - // create topics - AdminUtils.createTopic(zkUtils, TOPIC_AVRO, 1, 1, new Properties(), RackAwareMode.Disabled$.MODULE$); + // create topics AdminUtils.createTopic(zkUtils, TOPIC_JSON, 1, 1, new Properties(), RackAwareMode.Disabled$.MODULE$); }