-
Notifications
You must be signed in to change notification settings - Fork 547
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adding translated.from
annotation when using SchemaTranslator
#970
base: master
Are you sure you want to change the base?
Changes from all commits
ab54560
9ee9dd0
6a69749
3117297
10c6434
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,24 +18,29 @@ | |
|
||
|
||
import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper; | ||
import com.linkedin.avroutil1.compatibility.ConfigurableSchemaComparator; | ||
import com.linkedin.avroutil1.compatibility.SchemaComparisonConfiguration; | ||
import com.linkedin.avroutil1.compatibility.SchemaParseConfiguration; | ||
import com.linkedin.data.DataMap; | ||
import com.linkedin.data.DataMapBuilder; | ||
import com.linkedin.data.schema.DataSchema; | ||
import com.linkedin.data.schema.DataSchemaResolver; | ||
import com.linkedin.data.schema.DataSchemaTraverse; | ||
import com.linkedin.data.schema.NamedDataSchema; | ||
import com.linkedin.data.schema.RecordDataSchema; | ||
import com.linkedin.data.schema.SchemaFormatType; | ||
import com.linkedin.data.schema.SchemaParser; | ||
import com.linkedin.data.schema.SchemaParserFactory; | ||
import com.linkedin.data.schema.PegasusSchemaParser; | ||
import com.linkedin.data.schema.SchemaToPdlEncoder; | ||
import com.linkedin.data.schema.TyperefDataSchema; | ||
import com.linkedin.data.schema.resolver.DefaultDataSchemaResolver; | ||
import com.linkedin.data.schema.resolver.FileDataSchemaResolver; | ||
import com.linkedin.data.schema.validation.ValidationOptions; | ||
import com.linkedin.data.template.DataTemplateUtil; | ||
|
||
import java.util.Arrays; | ||
import java.util.Collections; | ||
import java.util.HashMap; | ||
import java.util.IdentityHashMap; | ||
import java.util.Map; | ||
|
@@ -53,6 +58,7 @@ public class SchemaTranslator | |
private static final Logger log = LoggerFactory.getLogger(SchemaTranslator.class); | ||
|
||
public static final String DATA_PROPERTY = "com.linkedin.data"; | ||
public static final String TRANSLATED_FROM_SOURCE_OPTION = "translated.from"; | ||
public static final String SCHEMA_PROPERTY = "schema"; | ||
public static final String OPTIONAL_DEFAULT_MODE_PROPERTY = "optionalDefaultMode"; | ||
public static final String AVRO_FILE_EXTENSION = ".avsc"; | ||
|
@@ -166,9 +172,12 @@ public static DataSchema avroToDataSchema(String avroSchemaInJson, AvroToDataSch | |
{ | ||
avroSchemaFromEmbedded.addProp(DATA_PROPERTY, embededSchemaPropertyVal); | ||
} | ||
if (!avroSchemaFromEmbedded.equals(avroSchemaFromJson)) | ||
{ | ||
throw new IllegalArgumentException("Embedded schema does not translate to input Avro schema: " + avroSchemaInJson); | ||
// Compare using configuration equivalent to STRICT, except ignore TRANSLATED_FROM_SOURCE_OPTION | ||
if (!ConfigurableSchemaComparator.equals(avroSchemaFromEmbedded, avroSchemaFromJson, | ||
SchemaComparisonConfiguration.STRICT.jsonPropNamesToIgnore( | ||
Collections.singleton(SchemaTranslator.TRANSLATED_FROM_SOURCE_OPTION)))) { | ||
throw new IllegalArgumentException( | ||
"Embedded schema does not translate to input Avro schema: " + avroSchemaInJson); | ||
} | ||
} | ||
} | ||
|
@@ -186,6 +195,9 @@ public static DataSchema avroToDataSchema(String avroSchemaInJson, AvroToDataSch | |
String dataSchemaJson = dataSchema.toString(); | ||
resultDataSchema = DataTemplateUtil.parseSchema(dataSchemaJson); | ||
} | ||
|
||
// add translated from annotation if this is a named dataSchema | ||
resultDataSchema = addTranslatedPropToNamedDataSchema(resultDataSchema); | ||
return resultDataSchema; | ||
} | ||
|
||
|
@@ -317,6 +329,8 @@ public static String dataToAvroSchemaJson(DataSchema dataSchema) | |
*/ | ||
public static String dataToAvroSchemaJson(DataSchema dataSchema, DataToAvroSchemaTranslationOptions options) throws IllegalArgumentException | ||
{ | ||
dataSchema = addTranslatedPropToNamedDataSchema(dataSchema); | ||
|
||
// Create a copy of the schema before the actual translation, since the translation process ends up modifying the | ||
// schema for unions with aliases, and we don't want to disturb the original schema. Use PDL to preserve annotations. | ||
final DataSchema translatedDataSchema = DataTemplateUtil.parseSchema( | ||
|
@@ -341,6 +355,36 @@ public static String dataToAvroSchemaJson(DataSchema dataSchema, DataToAvroSchem | |
return SchemaToAvroJsonEncoder.schemaToAvro(translatedDataSchema, dataSchema, defaultValueOverrides, options); | ||
} | ||
|
||
/** | ||
* Adds TRANSLATED_FROM_SOURCE_OPTION property to named data schemas if not already present. | ||
* @param dataSchema the data schema to add the property to | ||
* @return the data schema with the property added if applicable. | ||
*/ | ||
private static DataSchema addTranslatedPropToNamedDataSchema(DataSchema dataSchema) { | ||
// Add translated from annotation if this is a named dataSchema | ||
if (dataSchema instanceof NamedDataSchema) { | ||
if (dataSchema instanceof TyperefDataSchema) { | ||
((TyperefDataSchema) dataSchema).setReferencedType( | ||
addAnnotationToDataSchema(((TyperefDataSchema) dataSchema).getRef())); | ||
} else { | ||
return addAnnotationToDataSchema(dataSchema); | ||
} | ||
} | ||
return dataSchema; | ||
} | ||
|
||
private static DataSchema addAnnotationToDataSchema(DataSchema dataSchema) { | ||
if (dataSchema instanceof NamedDataSchema) { | ||
NamedDataSchema namedDataSchema = (NamedDataSchema) dataSchema; | ||
if (!namedDataSchema.getProperties().containsKey(TRANSLATED_FROM_SOURCE_OPTION)) { | ||
Map<String, Object> properties = new HashMap<>(namedDataSchema.getProperties()); | ||
properties.put(TRANSLATED_FROM_SOURCE_OPTION, namedDataSchema.getFullName()); | ||
namedDataSchema.setProperties(properties); | ||
} | ||
} | ||
return dataSchema; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this will have side effect of changing passed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there should be none, except one additional json property in the result avro schema, which is the requirement of this change. This new property will identify the avro schemas as a translated schema. |
||
} | ||
|
||
/** | ||
* Allows caller to specify a file path for schema resolution. | ||
*/ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what should you do if existing translated_from prop is different from updated one?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It doesn't update the source in that case. So, if schema
A
translates toB
, andB
is translated toC
,the translatedFrom option in C will be
A
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My original question is talking about an error case, I am guessing that should never happen and we should throw error?
But your response provides me something different from my understanding, I thought that annotation is just for direct source, seems not?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We don't need to stop users from translating a translated schema again, as it is allowed right now in SchemaTranslator right now, and stopping it might fail users.
However, we can use this annotation to fail other flows like PDL -> Proto or Avro -> proto.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Based on this logic,
translated.from
annotation is used to indicate the root source, not directly translated source, is this clearly communicated?