15
15
*/
16
16
package com .datastax .oss .pulsar .functions .transforms ;
17
17
18
+ import com .fasterxml .jackson .core .type .TypeReference ;
19
+ import com .fasterxml .jackson .databind .ObjectMapper ;
20
+ import com .fasterxml .jackson .databind .node .ObjectNode ;
18
21
import java .util .List ;
19
22
import java .util .Map ;
20
23
import java .util .concurrent .ConcurrentHashMap ;
25
28
import org .apache .pulsar .common .schema .SchemaType ;
26
29
27
30
public class MergeKeyValueStep implements TransformStep {
28
-
31
+ public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper ();
29
32
private final Map <org .apache .avro .Schema , Map <org .apache .avro .Schema , org .apache .avro .Schema >>
30
33
schemaCache = new ConcurrentHashMap <>();
31
34
@@ -35,61 +38,77 @@ public void process(TransformContext transformContext) {
35
38
if (keySchema == null ) {
36
39
return ;
37
40
}
38
- if (keySchema .getSchemaInfo ().getType () == SchemaType .AVRO
41
+ Object keyObject = transformContext .getKeyObject ();
42
+ Object valueObject = transformContext .getValueObject ();
43
+ if (keyObject instanceof Map && valueObject instanceof Map ) {
44
+ Map <Object , Object > value = (Map <Object , Object >) valueObject ;
45
+ Map <String , Object > keyCopy =
46
+ OBJECT_MAPPER .convertValue (keyObject , new TypeReference <>() {});
47
+ keyCopy .forEach (value ::putIfAbsent );
48
+ } else if (keySchema .getSchemaInfo ().getType () == SchemaType .AVRO
39
49
&& transformContext .getValueSchema ().getSchemaInfo ().getType () == SchemaType .AVRO ) {
40
- GenericRecord avroKeyRecord = (GenericRecord ) transformContext . getKeyObject () ;
50
+ GenericRecord avroKeyRecord = (GenericRecord ) keyObject ;
41
51
org .apache .avro .Schema avroKeySchema = avroKeyRecord .getSchema ();
42
52
43
- GenericRecord avroValueRecord = (GenericRecord ) transformContext . getValueObject () ;
53
+ GenericRecord avroValueRecord = (GenericRecord ) valueObject ;
44
54
org .apache .avro .Schema avroValueSchema = avroValueRecord .getSchema ();
45
55
46
- List <String > valueSchemaFieldNames =
47
- avroValueSchema
48
- .getFields ()
49
- .stream ()
50
- .map (org .apache .avro .Schema .Field ::name )
51
- .collect (Collectors .toList ());
52
- List <org .apache .avro .Schema .Field > fields =
53
- avroKeySchema
54
- .getFields ()
55
- .stream ()
56
- .filter (field -> !valueSchemaFieldNames .contains (field .name ()))
57
- .map (
58
- f ->
59
- new org .apache .avro .Schema .Field (
60
- f .name (), f .schema (), f .doc (), f .defaultVal (), f .order ()))
61
- .collect (Collectors .toList ());
62
- fields .addAll (
63
- avroValueSchema
64
- .getFields ()
65
- .stream ()
66
- .map (
67
- f ->
68
- new org .apache .avro .Schema .Field (
69
- f .name (), f .schema (), f .doc (), f .defaultVal (), f .order ()))
70
- .collect (Collectors .toList ()));
71
-
72
- Map <org .apache .avro .Schema , org .apache .avro .Schema > schemaCacheKey =
73
- schemaCache .computeIfAbsent (avroKeySchema , s -> new ConcurrentHashMap <>());
74
- org .apache .avro .Schema modified =
75
- schemaCacheKey .computeIfAbsent (
76
- avroValueSchema ,
77
- schema ->
78
- org .apache .avro .Schema .createRecord (
79
- avroValueSchema .getName (),
80
- null ,
81
- avroValueSchema .getNamespace (),
82
- false ,
83
- fields ));
84
- GenericRecord newRecord = new GenericData .Record (modified );
85
- for (String fieldName : valueSchemaFieldNames ) {
86
- newRecord .put (fieldName , avroValueRecord .get (fieldName ));
87
- }
56
+ org .apache .avro .Schema mergedSchema = getMergedSchema (avroKeySchema , avroValueSchema );
57
+ GenericRecord newRecord = new GenericData .Record (mergedSchema );
58
+ avroValueSchema
59
+ .getFields ()
60
+ .forEach (field -> newRecord .put (field .name (), avroValueRecord .get (field .name ())));
88
61
for (org .apache .avro .Schema .Field field : avroKeySchema .getFields ()) {
89
- newRecord .put (field .name (), avroKeyRecord .get (field .name ()));
62
+ if (avroValueSchema .getField (field .name ()) == null ) {
63
+ newRecord .put (field .name (), avroKeyRecord .get (field .name ()));
64
+ }
90
65
}
91
66
transformContext .setValueObject (newRecord );
92
67
transformContext .setValueModified (true );
68
+ } else if (keySchema .getSchemaInfo ().getType () == SchemaType .JSON
69
+ && transformContext .getValueSchema ().getSchemaInfo ().getType () == SchemaType .JSON ) {
70
+ org .apache .avro .Schema avroKeySchema =
71
+ (org .apache .avro .Schema ) keySchema .getNativeSchema ().orElseThrow ();
72
+ org .apache .avro .Schema avroValueSchema =
73
+ (org .apache .avro .Schema )
74
+ transformContext .getValueSchema ().getNativeSchema ().orElseThrow ();
75
+ org .apache .avro .Schema mergedSchema = getMergedSchema (avroKeySchema , avroValueSchema );
76
+ transformContext .setValueSchema (new JsonNodeSchema (mergedSchema ));
77
+ ObjectNode newValue = ((ObjectNode ) keyObject ).deepCopy ();
78
+ newValue .setAll (((ObjectNode ) valueObject ).deepCopy ());
79
+ transformContext .setValueObject (newValue );
80
+ transformContext .setValueModified (true );
93
81
}
94
82
}
83
+
84
+ private org .apache .avro .Schema getMergedSchema (
85
+ org .apache .avro .Schema avroKeySchema , org .apache .avro .Schema avroValueSchema ) {
86
+ List <org .apache .avro .Schema .Field > fields =
87
+ avroKeySchema
88
+ .getFields ()
89
+ .stream ()
90
+ .filter (field -> avroValueSchema .getField (field .name ()) == null )
91
+ .map (
92
+ f ->
93
+ new org .apache .avro .Schema .Field (
94
+ f .name (), f .schema (), f .doc (), f .defaultVal (), f .order ()))
95
+ .collect (Collectors .toList ());
96
+ fields .addAll (
97
+ avroValueSchema
98
+ .getFields ()
99
+ .stream ()
100
+ .map (
101
+ f ->
102
+ new org .apache .avro .Schema .Field (
103
+ f .name (), f .schema (), f .doc (), f .defaultVal (), f .order ()))
104
+ .collect (Collectors .toList ()));
105
+
106
+ Map <org .apache .avro .Schema , org .apache .avro .Schema > schemaCacheKey =
107
+ schemaCache .computeIfAbsent (avroKeySchema , s -> new ConcurrentHashMap <>());
108
+ return schemaCacheKey .computeIfAbsent (
109
+ avroValueSchema ,
110
+ schema ->
111
+ org .apache .avro .Schema .createRecord (
112
+ avroValueSchema .getName (), null , avroValueSchema .getNamespace (), false , fields ));
113
+ }
95
114
}
0 commit comments