Skip to content

Commit 6449430

Browse files
committed
Added Kafka Producer
1 parent ae4cdd4 commit 6449430

File tree

12 files changed

+121
-74
lines changed

12 files changed

+121
-74
lines changed

.gitignore

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@
44

55
/main/src/HueLogging.ServiceV2/Properties/launchSettings.json
66

7-
*.env
7+
.env

docker-compose.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ services:
99
ports:
1010
- "9092:9092"
1111
environment:
12-
KAFKA_ADVERTISED_HOST_NAME: ${HueLogging:MachineHostIp}
12+
KAFKA_ADVERTISED_HOST_NAME: ${ADVERTISED_HOST_NAME}
1313
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
1414
volumes:
1515
- /var/run/docker.sock:/var/run/docker.sock
@@ -19,9 +19,9 @@ services:
1919
- "9042:9042"
2020
- "7000:7000"
2121
environment:
22-
CASSANDRA_BROADCAST_ADDRESS: ${HueLogging:MachineHostIp}
22+
CASSANDRA_BROADCAST_ADDRESS: ${ADVERTISED_HOST_NAME}
2323
volumes:
24-
- ${HueLogging:CassandraVolumePath}:/var/lib/cassandra
24+
- ${HueLogging_CassandraVolumePath}:/var/lib/cassandra
2525
polling-service:
2626
build:
2727
context: ./main/src

run.sh

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#!/bin/bash
2+
3+
streamName="hue-logging-stream"
4+
streamVersion="1.0-SNAPSHOT"
5+
streamBasePath="stream/${streamName}"
6+
streamJarPath="${streamBasePath}/target/${streamName}-${streamVersion}.jar"
7+
8+
# Before running this, please make sure that you have a .env file and the expected variables exist. See sample.env for expected variables
9+
# TODO fix the hostname issue. Services should be able to run and communicate using hostname/alias
10+
11+
# First we need to build stream using Maven
12+
echo "Building Stream Job"
13+
mvn -f "${streamBasePath}/pom.xml" clean package
14+
if [ $? -eq 0 ]
15+
# Now we need to spawn up all services
16+
then
17+
echo "Spawning up all services"
18+
docker-compose up -d
19+
fi
20+
if [ $? -eq 0 ]
21+
then
22+
# Then we start the stream job
23+
echo "Starting Stream Job"
24+
JOBMANAGER_CONTAINER=$(docker ps --filter name=jobmanager --format={{.ID}})
25+
docker cp ${streamJarPath} "$JOBMANAGER_CONTAINER":/job.jar
26+
CASSANDRA_HOST_IP=`docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' $(docker ps --filter name=cassandra --format={{.ID}})`
27+
docker exec -it -d "$JOBMANAGER_CONTAINER" flink run /job.jar --cassandraHost `echo $CASSANDRA_HOST_IP`
28+
fi

sample.env

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
HueLogging:ApiKey= #Hue Api Key that can be generated using CLI
2+
ADVERTISED_HOST_NAME= # Currently IP address has been tested and works. Still need to investigate why hostname does not
3+
HueLogging_CassandraVolumePath= # Filepath to the cassandra volume where data is stored

stop.sh

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
#!/bin/bash
2+
docker-compose down

stream/hue-logging-stream/pom.xml

+7
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,13 @@ under the License.
111111
<version>1.2.17</version>
112112
<scope>runtime</scope>
113113
</dependency>
114+
115+
<!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.module/jackson-module-scala -->
116+
<dependency>
117+
<groupId>com.fasterxml.jackson.module</groupId>
118+
<artifactId>jackson-module-scala_${scala.binary.version}</artifactId>
119+
<version>2.9.8</version>
120+
</dependency>
114121
</dependencies>
115122

116123
<build>

stream/hue-logging-stream/src/main/scala/com/github/novaonline/LightEvents.scala

+30-14
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,19 @@
1818

1919
package com.github.novaonline
2020

21+
import java.time.OffsetDateTime
2122
import java.util.Properties
2223

2324
import com.github.novaonline.cassandra.HueLoggingLocalCluster
24-
import com.github.novaonline.map.JsonToLightEvent
2525
import com.github.novaonline.model.light._
26+
import com.github.novaonline.serialization.SimpleJsonSchema
2627
import org.apache.flink.api.java.utils.ParameterTool
2728
import org.apache.flink.streaming.api.TimeCharacteristic
2829
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor
2930
import org.apache.flink.streaming.api.scala._
3031
import org.apache.flink.streaming.api.windowing.time.Time
3132
import org.apache.flink.streaming.connectors.cassandra.CassandraSink
32-
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010
33-
import org.apache.flink.streaming.util.serialization.JSONKeyValueDeserializationSchema
33+
import org.apache.flink.streaming.connectors.kafka.{FlinkKafkaConsumer010, FlinkKafkaProducer010}
3434
import org.slf4j.LoggerFactory
3535

3636

@@ -50,32 +50,39 @@ object LightEvents {
5050
val env = StreamExecutionEnvironment.getExecutionEnvironment
5151
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
5252

53+
val lightEventSchema = new SimpleJsonSchema[LightEvent]
54+
val lightSessionSchema = new SimpleJsonSchema[LightSession]
55+
val lightAccumlatedSchema = new SimpleJsonSchema[LightAccumulated]
56+
5357
val properties = new Properties()
5458
properties.setProperty("bootstrap.servers", s"$kafkaHost:9092")
5559
properties.setProperty("group.id", "Hue-Logging")
56-
val lightEventSource = new FlinkKafkaConsumer010("hue-logging-light-event", new JSONKeyValueDeserializationSchema(false), properties)
60+
val lightEventSource = new FlinkKafkaConsumer010("hue-logging-light-event", lightEventSchema, properties)
5761
lightEventSource.setStartFromEarliest()
5862

5963
val cassandraCluster = new HueLoggingLocalCluster(cassandraHost)
6064

6165
// Start Map Reduce
62-
val lightEventJsonStream = env.addSource(lightEventSource)
63-
val lightEventStream = lightEventJsonStream.map(JsonToLightEvent)
66+
val lightEventStream = env.addSource(lightEventSource).map(x => {
67+
// enrich anything else missed from serializing
68+
x.copy(state = x.state.copy(addDate = OffsetDateTime.parse(x.addDate).toEpochSecond))
69+
})
6470

6571
val watermarkedLightEventsStream = lightEventStream.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor[LightEvent](Time.seconds(10)) {
66-
override def extractTimestamp(t: LightEvent): Long = t.state.addDate
72+
override def extractTimestamp(t: LightEvent): Long = OffsetDateTime.parse(t.addDate).toEpochSecond
6773
})
6874

6975
val lightSessionsStream = watermarkedLightEventsStream
7076
.keyBy(x => x.light.id)
71-
.flatMapWithState[LightSession, LightState]((light: LightEvent, state: Option[LightState]) => {
72-
if (state.isEmpty && light.state.on) {
77+
.flatMapWithState[LightSession, LightState]((light: LightEvent, prevState: Option[LightState]) => {
78+
val currState = light.state
79+
if (prevState.isEmpty && currState.on) {
7380
LOG.info("Opened Light Session")
74-
(Iterator(LightSession(light.light, light.state, None, 0)), Some(light.state))
75-
} else if (state.isDefined && state.get.on && !light.state.on) {
81+
(Iterator(LightSession(light.light, currState, None, 0)), Some(currState))
82+
} else if (prevState.isDefined && prevState.get.on && !currState.on) {
7683
LOG.info("Closed Light Session")
77-
val duration = light.state.addDate - state.get.addDate
78-
(Iterator(LightSession(light.light, light.state, Some(light.state), duration)), None)
84+
val duration = currState.addDate - prevState.get.addDate
85+
(Iterator(LightSession(light.light, currState, Some(currState), duration)), None)
7986
} else {
8087
//LOG.info("Unknown State for Light Session")
8188
(Iterator.empty, None)
@@ -89,8 +96,17 @@ object LightEvents {
8996
.map(x => LightAccumulated(x.light, x.durationSeconds))
9097

9198

99+
// Publish Processed Streams to Kafka
100+
val kafkaLightSessionProducer = new FlinkKafkaProducer010("hue-logging-light-session", lightSessionSchema, properties)
101+
102+
val kafkaLightAccumulatedProducer = new FlinkKafkaProducer010("hue-logging-light-accumulated", lightAccumlatedSchema, properties)
103+
104+
lightSessionsStream.addSink(kafkaLightSessionProducer)
105+
lightAccumulated.addSink(kafkaLightAccumulatedProducer)
106+
107+
92108
CassandraSink.addSink(lightEventStream.map(x => {
93-
val r = x.toCassandraTuple()
109+
val r = x.toCassandraTuple
94110
LOG.info(s"mapped light event stream ${r.toString()}")
95111
r
96112
}))

stream/hue-logging-stream/src/main/scala/com/github/novaonline/map/JsonToLightEvent.scala

-46
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,9 @@
11
package com.github.novaonline.model.light
22

3-
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode
4-
53
case class Light(
64
id: String,
75
hueType: String,
86
name: String,
97
modelId: String,
10-
sWVersion: String,
11-
raw: Option[JsonNode] = None
8+
sWVersion: String
129
)
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
package com.github.novaonline.model.light
22

3-
case class LightEvent(light: Light, state: LightState)
3+
case class LightEvent(light: Light, state: LightState, addDate: String)
44
{
5-
def toCassandraTuple(): (String, String, Light, LightState ) = (light.name, light.id, light, state )
5+
def toCassandraTuple: (String, String, Light, LightState ) = (light.name, light.id, light, state )
66
}
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,10 @@
11
package com.github.novaonline.model.light
22

3-
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode
4-
53
case class LightState(
64
on: Boolean,
75
brightness: Int,
86
saturation: Int,
97
hue: Int,
108
reachable: Boolean,
11-
addDate: Long,
12-
raw: Option[JsonNode] = None
9+
addDate: Long
1310
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
package com.github.novaonline.serialization
2+
3+
import com.fasterxml.jackson.databind.{DeserializationFeature, MapperFeature, ObjectMapper}
4+
import com.fasterxml.jackson.module.scala.DefaultScalaModule
5+
import org.apache.flink.api.common.typeinfo.TypeInformation
6+
import org.apache.flink.api.java.typeutils.TypeExtractor
7+
import org.apache.flink.streaming.util.serialization.{KeyedDeserializationSchema, KeyedSerializationSchema}
8+
9+
import scala.reflect.ClassTag
10+
11+
class SimpleJsonSchema[T](implicit ct: ClassTag[T]) extends KeyedSerializationSchema[T] with KeyedDeserializationSchema[T] {
12+
13+
private var mapper: ObjectMapper = _
14+
15+
override def serializeKey(t: T): Array[Byte] = null
16+
17+
override def serializeValue(t: T): Array[Byte] = {
18+
if (this.mapper == null) {
19+
this.mapper = new ObjectMapper
20+
mapper.registerModule(DefaultScalaModule)
21+
mapper.configure(MapperFeature.ACCEPT_CASE_INSENSITIVE_PROPERTIES, true)
22+
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
23+
24+
}
25+
mapper.writeValueAsBytes(t)
26+
}
27+
28+
override def getTargetTopic(t: T): String = null
29+
30+
override def deserialize(k: Array[Byte], v: Array[Byte], s: String, i: Int, l: Long): T = {
31+
if (this.mapper == null) {
32+
this.mapper = new ObjectMapper
33+
mapper.registerModule(DefaultScalaModule)
34+
mapper.configure(MapperFeature.ACCEPT_CASE_INSENSITIVE_PROPERTIES, true)
35+
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
36+
}
37+
mapper.readValue(v, ct.runtimeClass.asInstanceOf[Class[T]])
38+
}
39+
40+
override def isEndOfStream(t: T): Boolean = false
41+
42+
override def getProducedType: TypeInformation[T] = TypeExtractor.getForClass(ct.runtimeClass.asInstanceOf[Class[T]])
43+
}

0 commit comments

Comments
 (0)