Skip to content

Commit 5cd957d

Browse files
feat: modify kafka extractor (#255)
* feat(kafka): extract number of partitions and allow urn prefixing * feat(kafka): skip default topics * fix(enrich): kafka custom properties not populated
1 parent c961884 commit 5cd957d

File tree

5 files changed

+114
-15
lines changed

5 files changed

+114
-15
lines changed

plugins/extractors/kafka/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ source:
1414
| Key | Value | Example | Description | |
1515
| :-- | :---- | :------ | :---------- | :- |
1616
| `broker` | `string` | `localhost:9092` | Kafka broker's host | *required* |
17+
| `urn_prefix` | `string` | `samplePrefix-` | Prefix to be prepended to urn field | *optional* |
1718

1819
## Outputs
1920

plugins/extractors/kafka/kafka.go

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package kafka
33
import (
44
"context"
55
_ "embed" // used to print the embedded assets
6+
67
"github.com/pkg/errors"
78

89
"github.com/odpf/meteor/models"
@@ -19,9 +20,16 @@ import (
1920
//go:embed README.md
2021
var summary string
2122

23+
// default topics map to skip
24+
var defaultTopics = map[string]byte{
25+
"__consumer_offsets": 0,
26+
"_schemas": 0,
27+
}
28+
2229
// Config hold the set of configuration for the kafka extractor
2330
type Config struct {
24-
Broker string `mapstructure:"broker" validate:"required"`
31+
Broker string `mapstructure:"broker" validate:"required"`
32+
UrnPrefix string `mapstructure:"urn_prefix"`
2533
}
2634

2735
var sampleConfig = `
@@ -85,30 +93,54 @@ func (e *Extractor) Extract(ctx context.Context, emit plugins.Emit) (err error)
8593
}
8694

8795
// collect topic list from partition list
88-
topics := map[string]bool{}
96+
topics := map[string]int{}
8997
for _, p := range partitions {
90-
topics[p.Topic] = true
98+
_, ok := topics[p.Topic]
99+
if !ok {
100+
topics[p.Topic] = 0
101+
}
102+
103+
topics[p.Topic]++
91104
}
92105

93-
// process topics
94-
for topicName := range topics {
95-
emit(models.NewRecord(e.buildTopic(topicName)))
106+
// build and push topics
107+
for topic, numOfPartitions := range topics {
108+
// skip if topic is a default topic
109+
_, isDefaultTopic := defaultTopics[topic]
110+
if isDefaultTopic {
111+
continue
112+
}
113+
114+
record := models.NewRecord(e.buildTopic(topic, numOfPartitions))
115+
emit(record)
96116
}
97117

98118
return
99119
}
100120

101-
// Build topic metadata model using a topic name
102-
func (e *Extractor) buildTopic(topicName string) *assets.Topic {
121+
// Build topic metadata model using a topic and number of partitions
122+
func (e *Extractor) buildTopic(topic string, numOfPartitions int) *assets.Topic {
103123
return &assets.Topic{
104124
Resource: &common.Resource{
105-
Urn: topicName,
106-
Name: topicName,
125+
Urn: e.buildUrn(topic),
126+
Name: topic,
107127
Service: "kafka",
108128
},
129+
Profile: &assets.TopicProfile{
130+
NumberOfPartitions: int64(numOfPartitions),
131+
},
109132
}
110133
}
111134

135+
// Build urn using prefixes from config
136+
func (e *Extractor) buildUrn(topic string) string {
137+
if e.config.UrnPrefix != "" {
138+
topic = e.config.UrnPrefix + topic
139+
}
140+
141+
return topic
142+
}
143+
112144
func init() {
113145
if err := registry.Extractors.Register("kafka", func() plugins.Extractor {
114146
return New(plugins.GetLog())

plugins/extractors/kafka/kafka_test.go

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@ package kafka_test
44

55
import (
66
"context"
7-
"github.com/odpf/meteor/test/utils"
87
"log"
98
"net"
109

10+
"github.com/odpf/meteor/test/utils"
11+
1112
"os"
1213
"strconv"
1314
"testing"
@@ -93,7 +94,7 @@ func TestInit(t *testing.T) {
9394
}
9495

9596
func TestExtract(t *testing.T) {
96-
t.Run("should return list of topic metadata", func(t *testing.T) {
97+
t.Run("should emit list of topic metadata", func(t *testing.T) {
9798
ctx := context.TODO()
9899
extr := newExtractor()
99100
err := extr.Init(ctx, map[string]interface{}{
@@ -115,20 +116,82 @@ func TestExtract(t *testing.T) {
115116
Name: "meteor-test-topic-1",
116117
Service: "kafka",
117118
},
119+
Profile: &assets.TopicProfile{
120+
NumberOfPartitions: 1,
121+
},
118122
}),
119123
models.NewRecord(&assets.Topic{
120124
Resource: &common.Resource{
121125
Urn: "meteor-test-topic-2",
122126
Name: "meteor-test-topic-2",
123127
Service: "kafka",
124128
},
129+
Profile: &assets.TopicProfile{
130+
NumberOfPartitions: 1,
131+
},
125132
}),
126133
models.NewRecord(&assets.Topic{
127134
Resource: &common.Resource{
128135
Urn: "meteor-test-topic-3",
129136
Name: "meteor-test-topic-3",
130137
Service: "kafka",
131138
},
139+
Profile: &assets.TopicProfile{
140+
NumberOfPartitions: 1,
141+
},
142+
}),
143+
}
144+
// We need this function because the extractor cannot guarantee order
145+
// so comparing expected slice and result slice will not be consistent
146+
assertResults(t, expected, emitter.Get())
147+
})
148+
149+
t.Run("should add prefix to urn if urn_prefix is defined", func(t *testing.T) {
150+
ctx := context.TODO()
151+
extr := newExtractor()
152+
err := extr.Init(ctx, map[string]interface{}{
153+
"broker": brokerHost,
154+
"urn_prefix": "samplePrefix-",
155+
})
156+
if err != nil {
157+
t.Fatal(err)
158+
}
159+
160+
emitter := mocks.NewEmitter()
161+
err = extr.Extract(ctx, emitter.Push)
162+
assert.NoError(t, err)
163+
164+
// assert results with expected data
165+
expected := []models.Record{
166+
models.NewRecord(&assets.Topic{
167+
Resource: &common.Resource{
168+
Urn: "samplePrefix-meteor-test-topic-1",
169+
Name: "meteor-test-topic-1",
170+
Service: "kafka",
171+
},
172+
Profile: &assets.TopicProfile{
173+
NumberOfPartitions: 1,
174+
},
175+
}),
176+
models.NewRecord(&assets.Topic{
177+
Resource: &common.Resource{
178+
Urn: "samplePrefix-meteor-test-topic-2",
179+
Name: "meteor-test-topic-2",
180+
Service: "kafka",
181+
},
182+
Profile: &assets.TopicProfile{
183+
NumberOfPartitions: 1,
184+
},
185+
}),
186+
models.NewRecord(&assets.Topic{
187+
Resource: &common.Resource{
188+
Urn: "samplePrefix-meteor-test-topic-3",
189+
Name: "meteor-test-topic-3",
190+
Service: "kafka",
191+
},
192+
Profile: &assets.TopicProfile{
193+
NumberOfPartitions: 1,
194+
},
132195
}),
133196
}
134197
// We need this function because the extractor cannot guarantee order

plugins/processors/enrich/processor.go

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,6 @@ func (p *Processor) process(record models.Record) (models.Metadata, error) {
6767
data := record.Data()
6868
p.logger.Debug("enriching record", "record", data.GetResource().Urn)
6969
customProps := utils.GetCustomProperties(data)
70-
if customProps == nil {
71-
return data, nil
72-
}
7370

7471
// update custom properties using value from config
7572
for key, value := range p.config {

utils/custom_properties.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,17 @@ func SetCustomProperties(metadata models.Metadata, customFields map[string]inter
3030

3131
switch metadata := metadata.(type) {
3232
case *assets.Table:
33+
metadata.Properties = properties
3334
case *assets.Topic:
35+
metadata.Properties = properties
3436
case *assets.Dashboard:
37+
metadata.Properties = properties
3538
case *assets.Bucket:
39+
metadata.Properties = properties
3640
case *assets.Group:
41+
metadata.Properties = properties
3742
case *assets.Job:
43+
metadata.Properties = properties
3844
case *assets.User:
3945
metadata.Properties = properties
4046
}

0 commit comments

Comments
 (0)