-
Notifications
You must be signed in to change notification settings - Fork 547
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adding translated.from
annotation when using SchemaTranslator
#970
base: master
Are you sure you want to change the base?
Changes from 3 commits
ab54560
9ee9dd0
6a69749
3117297
10c6434
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,24 +18,29 @@ | |
|
||
|
||
import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper; | ||
import com.linkedin.avroutil1.compatibility.ConfigurableSchemaComparator; | ||
import com.linkedin.avroutil1.compatibility.SchemaComparisonConfiguration; | ||
import com.linkedin.avroutil1.compatibility.SchemaParseConfiguration; | ||
import com.linkedin.data.DataMap; | ||
import com.linkedin.data.DataMapBuilder; | ||
import com.linkedin.data.schema.DataSchema; | ||
import com.linkedin.data.schema.DataSchemaResolver; | ||
import com.linkedin.data.schema.DataSchemaTraverse; | ||
import com.linkedin.data.schema.NamedDataSchema; | ||
import com.linkedin.data.schema.RecordDataSchema; | ||
import com.linkedin.data.schema.SchemaFormatType; | ||
import com.linkedin.data.schema.SchemaParser; | ||
import com.linkedin.data.schema.SchemaParserFactory; | ||
import com.linkedin.data.schema.PegasusSchemaParser; | ||
import com.linkedin.data.schema.SchemaToPdlEncoder; | ||
import com.linkedin.data.schema.TyperefDataSchema; | ||
import com.linkedin.data.schema.resolver.DefaultDataSchemaResolver; | ||
import com.linkedin.data.schema.resolver.FileDataSchemaResolver; | ||
import com.linkedin.data.schema.validation.ValidationOptions; | ||
import com.linkedin.data.template.DataTemplateUtil; | ||
|
||
import java.util.Arrays; | ||
import java.util.Collections; | ||
import java.util.HashMap; | ||
import java.util.IdentityHashMap; | ||
import java.util.Map; | ||
|
@@ -53,6 +58,7 @@ public class SchemaTranslator | |
private static final Logger log = LoggerFactory.getLogger(SchemaTranslator.class); | ||
|
||
public static final String DATA_PROPERTY = "com.linkedin.data"; | ||
public static final String TRANSLATED_FROM_SOURCE_OPTION = "schema.translated.from.src"; | ||
public static final String SCHEMA_PROPERTY = "schema"; | ||
public static final String OPTIONAL_DEFAULT_MODE_PROPERTY = "optionalDefaultMode"; | ||
public static final String AVRO_FILE_EXTENSION = ".avsc"; | ||
|
@@ -166,9 +172,12 @@ public static DataSchema avroToDataSchema(String avroSchemaInJson, AvroToDataSch | |
{ | ||
avroSchemaFromEmbedded.addProp(DATA_PROPERTY, embededSchemaPropertyVal); | ||
} | ||
if (!avroSchemaFromEmbedded.equals(avroSchemaFromJson)) | ||
{ | ||
throw new IllegalArgumentException("Embedded schema does not translate to input Avro schema: " + avroSchemaInJson); | ||
// Compare using configuration equivalent to STRICT, except ignore TRANSLATED_FROM_SOURCE_OPTION | ||
if (!ConfigurableSchemaComparator.equals(avroSchemaFromEmbedded, avroSchemaFromJson, | ||
new SchemaComparisonConfiguration(true, true, true, false, true, true, | ||
Collections.singleton((TRANSLATED_FROM_SOURCE_OPTION))))) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a bit hard to read. Why can you provide a constructor in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This check will never fail for Object embededSchemaPropertyVal = avroSchemaFromJson.getObjectProp(DATA_PROPERTY);
if (embededSchemaPropertyVal != null)
{
avroSchemaFromEmbedded.addProp(DATA_PROPERTY, embededSchemaPropertyVal);
} Updated the SchemaComparisonConfiguration call |
||
throw new IllegalArgumentException( | ||
"Embedded schema does not translate to input Avro schema: " + avroSchemaInJson); | ||
} | ||
} | ||
} | ||
|
@@ -186,6 +195,9 @@ public static DataSchema avroToDataSchema(String avroSchemaInJson, AvroToDataSch | |
String dataSchemaJson = dataSchema.toString(); | ||
resultDataSchema = DataTemplateUtil.parseSchema(dataSchemaJson); | ||
} | ||
|
||
// add translated from annotation if this is a named dataSchema | ||
resultDataSchema = addTranslatedPropToNamedDataSchema(resultDataSchema); | ||
return resultDataSchema; | ||
} | ||
|
||
|
@@ -317,6 +329,8 @@ public static String dataToAvroSchemaJson(DataSchema dataSchema) | |
*/ | ||
public static String dataToAvroSchemaJson(DataSchema dataSchema, DataToAvroSchemaTranslationOptions options) throws IllegalArgumentException | ||
{ | ||
dataSchema = addTranslatedPropToNamedDataSchema(dataSchema); | ||
|
||
// Create a copy of the schema before the actual translation, since the translation process ends up modifying the | ||
// schema for unions with aliases, and we don't want to disturb the original schema. Use PDL to preserve annotations. | ||
final DataSchema translatedDataSchema = DataTemplateUtil.parseSchema( | ||
|
@@ -341,6 +355,36 @@ public static String dataToAvroSchemaJson(DataSchema dataSchema, DataToAvroSchem | |
return SchemaToAvroJsonEncoder.schemaToAvro(translatedDataSchema, dataSchema, defaultValueOverrides, options); | ||
} | ||
|
||
/** | ||
* Adds TRANSLATED_FROM_SOURCE_OPTION property to named data schemas if not already present. | ||
* @param dataSchema the data schema to add the property to | ||
* @return the data schema with the property added if applicable. | ||
*/ | ||
private static DataSchema addTranslatedPropToNamedDataSchema(DataSchema dataSchema) { | ||
// Add translated from annotation if this is a named dataSchema | ||
if (dataSchema instanceof NamedDataSchema) { | ||
if (dataSchema instanceof TyperefDataSchema) { | ||
((TyperefDataSchema) dataSchema).setReferencedType( | ||
addAnnotationToDataSchema(((TyperefDataSchema) dataSchema).getRef())); | ||
} else { | ||
return addAnnotationToDataSchema(dataSchema); | ||
} | ||
} | ||
return dataSchema; | ||
} | ||
|
||
private static DataSchema addAnnotationToDataSchema(DataSchema dataSchema) { | ||
if (dataSchema instanceof NamedDataSchema) { | ||
NamedDataSchema namedDataSchema = (NamedDataSchema) dataSchema; | ||
if (!namedDataSchema.getProperties().containsKey(TRANSLATED_FROM_SOURCE_OPTION)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what should you do if existing translated_from prop is different from updated one? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It doesn't update the source in that case. So, if schema There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My original question is talking about an error case, I am guessing that should never happen and we should throw error? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't need to stop users from translating a translated schema again, as it is allowed right now in SchemaTranslator right now, and stopping it might fail users. However, we can use this annotation to fail other flows like PDL -> Proto or Avro -> proto. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Based on this logic, |
||
Map<String, Object> properties = new HashMap<>(namedDataSchema.getProperties()); | ||
properties.put(TRANSLATED_FROM_SOURCE_OPTION, namedDataSchema.getFullName()); | ||
namedDataSchema.setProperties(properties); | ||
} | ||
} | ||
return dataSchema; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this will have side effect of changing passed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there should be none, except one additional json property in the result avro schema, which is the requirement of this change. This new property will identify the avro schemas as a translated schema. |
||
} | ||
|
||
/** | ||
* Allows caller to specify a file path for schema resolution. | ||
*/ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
will
translated.from
enough?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. any unique key will work.
I'll update to
translated.from