diff --git a/CHANGELOG.md b/CHANGELOG.md index 58ea79a3f9..6aa2d2a7c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,9 @@ and what APIs have changed, if applicable. ## [Unreleased] +## [29.42.1] - 2023-05-11 +- Add support for returning location of schema elements from the PDL schema encoder. + ## [29.42.0] - 2023-05-02 - Remove the overriding of content-length for HEADER requests as per HTTP Spec More details about this issue can be found @ https://jira01.corp.linkedin.com:8443/browse/SI-31814 @@ -5462,7 +5465,8 @@ patch operations can re-use these classes for generating patch messages. ## [0.14.1] -[Unreleased]: https://github.com/linkedin/rest.li/compare/v29.42.0...master +[Unreleased]: https://github.com/linkedin/rest.li/compare/v29.42.1...master +[29.42.1]: https://github.com/linkedin/rest.li/compare/v29.42.0...v29.42.1 [29.42.0]: https://github.com/linkedin/rest.li/compare/v29.41.12...v29.42.0 [29.41.12]: https://github.com/linkedin/rest.li/compare/v29.41.11...v29.41.12 [29.41.11]: https://github.com/linkedin/rest.li/compare/v29.41.10...v29.41.11 diff --git a/data/src/main/java/com/linkedin/data/schema/SchemaToPdlEncoder.java b/data/src/main/java/com/linkedin/data/schema/SchemaToPdlEncoder.java index a7f0bca7ad..7d2c04f8c7 100644 --- a/data/src/main/java/com/linkedin/data/schema/SchemaToPdlEncoder.java +++ b/data/src/main/java/com/linkedin/data/schema/SchemaToPdlEncoder.java @@ -18,11 +18,14 @@ import com.linkedin.data.DataList; import com.linkedin.data.DataMap; +import com.linkedin.data.schema.grammar.PdlSchemaParser; +import com.linkedin.util.LineColumnNumberWriter; import java.io.IOException; import java.io.StringWriter; import java.io.Writer; import java.util.Collections; import java.util.HashSet; +import java.util.IdentityHashMap; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -107,15 +110,45 @@ PdlBuilder newBuilderInstance(Writer writer) private String _namespace = ""; private String _package = ""; + private final boolean _trackWriteLocations; + + private final Map _writeLocations; + /** * Construct a .pdl source code encoder. + * The encoding style defaults to {@link EncodingStyle#INDENTED} but may be changed by calling + * {@link #setEncodingStyle(EncodingStyle)}. * * @param out provides the encoded .pdl destination. */ public SchemaToPdlEncoder(Writer out) { - _writer = out; - _encodingStyle = EncodingStyle.INDENTED; + this(out, false); + } + + /** + * Construct a .pdl source code encoder with the option to track line/column of schema elements during writing. + * The encoding style defaults to {@link EncodingStyle#INDENTED} but may be changed by calling + * {@link #setEncodingStyle(EncodingStyle)}. + * + * @param out provides the encoded .pdl destination. + * @param returnContextLocations Enable recording the context locations of schema elements during parsing. The + * locations can be retrieved using {@link #getWriteLocations()} after parsing. + */ + public SchemaToPdlEncoder(Writer out, boolean returnContextLocations) + { + if (returnContextLocations) + { + _writeLocations = new IdentityHashMap<>(); + // Wrap the Writer to track line/column numbers to report to elementWriteListener + _writer = new LineColumnNumberWriter(out); + } else + { + _writer = out; + _writeLocations = Collections.emptyMap(); + } + setEncodingStyle(EncodingStyle.INDENTED); + _trackWriteLocations = returnContextLocations; } /** @@ -126,6 +159,18 @@ public SchemaToPdlEncoder(Writer out) public void setEncodingStyle(EncodingStyle encodingStyle) { _encodingStyle = encodingStyle; + + // When counting column numbers, CompactPDLBuilder treats ',' as whitespace + if (_writer instanceof LineColumnNumberWriter) + { + if (_encodingStyle == EncodingStyle.COMPACT) + { + ((LineColumnNumberWriter) _writer).setIsWhitespaceFunction(c -> Character.isWhitespace(c) || c == ','); + } else + { + ((LineColumnNumberWriter) _writer).setIsWhitespaceFunction(Character::isWhitespace); + } + } } /** @@ -150,10 +195,12 @@ public void encode(DataSchema schema) throws IOException { if (hasNamespace) { + markSchemaElementStartLocation(); _builder.write("namespace") .writeSpace() .writeIdentifier(namedSchema.getNamespace()) .newline(); + recordSchemaElementLocation(namedSchema.getNamespace()); _namespace = namedSchema.getNamespace(); } if (hasPackage) @@ -220,12 +267,14 @@ private void writeInlineSchema(DataSchema schema) throws IOException .increaseIndent(); if (hasNamespaceOverride) { + markSchemaElementStartLocation(); _builder .indent() .write("namespace") .writeSpace() .writeIdentifier(namedSchema.getNamespace()) .newline(); + recordSchemaElementLocation(namedSchema.getNamespace()); _namespace = namedSchema.getNamespace(); } if (hasPackageOverride) @@ -291,8 +340,14 @@ private void writeInlineSchema(DataSchema schema) throws IOException } } + public Map getWriteLocations() + { + return _writeLocations; + } + private void writeRecord(RecordDataSchema schema) throws IOException { + markSchemaElementStartLocation(); writeDocAndProperties(schema); _builder.write("record") .writeSpace() @@ -327,6 +382,7 @@ private void writeRecord(RecordDataSchema schema) throws IOException { writeIncludes(schema, includes); } + recordSchemaElementLocation(schema); } /** @@ -335,6 +391,7 @@ private void writeRecord(RecordDataSchema schema) throws IOException */ private void writeField(RecordDataSchema.Field field) throws IOException { + markSchemaElementStartLocation(); writeDocAndProperties(field); _builder.indent() .writeIdentifier(field.getName()) @@ -353,6 +410,7 @@ private void writeField(RecordDataSchema.Field field) throws IOException .writeSpace() .writeJson(field.getDefault(), field.getType()); } + recordSchemaElementLocation(field); _builder.newline(); } @@ -382,6 +440,7 @@ private void writeEnum(EnumDataSchema schema) throws IOException DataSchemaConstants.DEPRECATED_SYMBOLS_KEY, properties.get(DataSchemaConstants.DEPRECATED_SYMBOLS_KEY)); + markSchemaElementStartLocation(); writeDocAndProperties(schema); _builder.write("enum") .writeSpace() @@ -395,6 +454,7 @@ private void writeEnum(EnumDataSchema schema) throws IOException for (String symbol : schema.getSymbols()) { + markSchemaElementStartLocation(); String docString = docs.get(symbol); DataMap symbolProperties = coercePropertyToDataMapOrFail(schema, DataSchemaConstants.SYMBOL_PROPERTIES_KEY + "." + symbol, @@ -414,24 +474,29 @@ private void writeEnum(EnumDataSchema schema) throws IOException _builder.indent() .writeIdentifier(symbol) .newline(); + recordSchemaElementLocation(symbol); } _builder.decreaseIndent() .indent() .write("}"); + recordSchemaElementLocation(schema); } private void writeFixed(FixedDataSchema schema) throws IOException { + markSchemaElementStartLocation(); writeDocAndProperties(schema); _builder.write("fixed") .writeSpace() .writeIdentifier(schema.getName()) .writeSpace() .write(String.valueOf(schema.getSize())); + recordSchemaElementLocation(schema); } private void writeTyperef(TyperefDataSchema schema) throws IOException { + markSchemaElementStartLocation(); writeDocAndProperties(schema); _builder.write("typeref") .writeSpace() @@ -441,24 +506,29 @@ private void writeTyperef(TyperefDataSchema schema) throws IOException .writeSpace(); DataSchema ref = schema.getRef(); writeReferenceOrInline(ref, schema.isRefDeclaredInline()); + recordSchemaElementLocation(schema); } private void writeMap(MapDataSchema schema) throws IOException { + markSchemaElementStartLocation(); writeProperties(schema.getProperties()); _builder.write("map[string") .writeComma() .writeSpace(); writeReferenceOrInline(schema.getValues(), schema.isValuesDeclaredInline()); _builder.write("]"); + recordSchemaElementLocation(schema); } private void writeArray(ArrayDataSchema schema) throws IOException { + markSchemaElementStartLocation(); writeProperties(schema.getProperties()); _builder.write("array["); writeReferenceOrInline(schema.getItems(), schema.isItemsDeclaredInline()); _builder.write("]"); + recordSchemaElementLocation(schema); } /** @@ -467,6 +537,7 @@ private void writeArray(ArrayDataSchema schema) throws IOException */ private void writeUnion(UnionDataSchema schema) throws IOException { + markSchemaElementStartLocation(); writeProperties(schema.getProperties()); _builder.write("union["); final boolean useMultilineFormat = schema.areMembersAliased() || schema.getMembers().size() >= UNION_MULTILINE_THRESHOLD; @@ -496,6 +567,7 @@ private void writeUnion(UnionDataSchema schema) throws IOException .indent(); } _builder.write("]"); + recordSchemaElementLocation(schema); } /** @@ -505,6 +577,7 @@ private void writeUnion(UnionDataSchema schema) throws IOException */ private void writeUnionMember(UnionDataSchema.Member member, boolean useMultilineFormat) throws IOException { + markSchemaElementStartLocation(); if (member.hasAlias()) { if (StringUtils.isNotBlank(member.getDoc()) || !member.getProperties().isEmpty() || member.isDeclaredInline()) @@ -524,6 +597,7 @@ else if (useMultilineFormat) _builder.indent(); } writeReferenceOrInline(member.getType(), member.isDeclaredInline()); + recordSchemaElementLocation(member); } private void writePrimitive(PrimitiveDataSchema schema) throws IOException @@ -865,4 +939,25 @@ else if (_namespace.equals(schema.getNamespace()) && !_importsByLocalName.contai _builder.writeIdentifier(schema.getFullName()); } } + + void markSchemaElementStartLocation() + { + if (_trackWriteLocations) + { + ((LineColumnNumberWriter) _writer).saveCurrentPosition(); + } + } + + private void recordSchemaElementLocation(Object schemaElement) + { + if (_trackWriteLocations) + { + LineColumnNumberWriter.CharacterPosition startPosition = ((LineColumnNumberWriter) _writer).popSavedPosition(); + LineColumnNumberWriter.CharacterPosition endPosition = + ((LineColumnNumberWriter) _writer).getLastNonWhitespacePosition(); + _writeLocations.put(schemaElement, + new PdlSchemaParser.ParseLocation(startPosition.getLine(), startPosition.getColumn(), endPosition.getLine(), + endPosition.getColumn())); + } + } } diff --git a/data/src/main/java/com/linkedin/util/LineColumnNumberWriter.java b/data/src/main/java/com/linkedin/util/LineColumnNumberWriter.java new file mode 100644 index 0000000000..1e29d7b21f --- /dev/null +++ b/data/src/main/java/com/linkedin/util/LineColumnNumberWriter.java @@ -0,0 +1,226 @@ +package com.linkedin.util; + +import java.io.IOException; +import java.io.Writer; +import java.util.Objects; +import java.util.Stack; +import java.util.function.Predicate; + + +/** + * Wraps a {@link Writer} and tracks current line and column numbers + */ +public final class LineColumnNumberWriter extends Writer +{ + + private final Writer _writer; + private final Stack _savedPositionStack = new Stack<>(); + private int _column; + private int _line; + private int _previousChar; + private Predicate _isWhitespaceFunction; + private final CharacterPosition _lastNonWhitespacePosition; + + /** + * Creates a new writer. + * + * @param out a Writer object to provide the underlying stream. + */ + public LineColumnNumberWriter(Writer out) + { + _writer = out; + _column = 1; + _line = 1; + _previousChar = -1; + _isWhitespaceFunction = (Character::isWhitespace); + _lastNonWhitespacePosition = new CharacterPosition(0, 0); + } + + /** + * Returns 1 based indices of row and column next character will be written to + */ + public CharacterPosition getCurrentPosition() + { + return new CharacterPosition(_line, _column); + } + + /** + * Returns 1 based indices of last row and column ignoring trailing whitespace characters + */ + public CharacterPosition getLastNonWhitespacePosition() + { + return _lastNonWhitespacePosition; + } + + /** + * Saves current row and column to be retrieved later by calling {@link #popSavedPosition()} + * + * Saved positions are stored in a stack so that calls to saveCurrentPosition() and + * {@link #popSavedPosition()} can be nested. Saved positions are adjusted to skip whitespace to make it + * easier to get actual token start positions in indented output. If you call saveCurrentPosition() at column x + * and then write four spaces followed by non-whitespace, the column number returned by + * {@link #popSavedPosition()} will be x + 4. + */ + public void saveCurrentPosition() + { + _savedPositionStack.push(new CharacterPosition(_line, _column)); + } + + /** + * Retrieves row and column from the last time {@link #saveCurrentPosition()} was called + */ + public CharacterPosition popSavedPosition() + { + return _savedPositionStack.pop(); + } + + /** + * Override definition of whitespace used to adjust character positions to skip + * whitespace. By default, the definition of whitespace is provided by {@link java.lang.Character#isWhitespace} + */ + public void setIsWhitespaceFunction(Predicate isWhitespaceFunction) + { + _isWhitespaceFunction = isWhitespaceFunction; + } + + @Override + public void write(char[] cbuf, int off, int len) throws IOException + { + _writer.write(cbuf, off, len); + for (; len > 0; len--) + { + char c = cbuf[off++]; + int lastLine = _line; + int lastColumn = _column; + updateCurrentPosition(c); + _previousChar = c; + if (_isWhitespaceFunction.test(c)) + { + updateSavedPositionsForWhitespace(lastLine, lastColumn); + } else + { + _lastNonWhitespacePosition.line = lastLine; + _lastNonWhitespacePosition.column = lastColumn; + } + } + } + + @Override + public void flush() throws IOException + { + _writer.flush(); + } + + @Override + public void close() throws IOException + { + _writer.close(); + } + + @Override + public String toString() + { + return _writer.toString(); + } + + private void updateCurrentPosition(char c) + { + if (_previousChar == '\r') + { + if (c == '\n') + { + _column = 1; + } else + { + _column = 2; + } + } else if (c == '\n' || c == '\r') + { + _column = 1; + ++_line; + } else + { + ++_column; + } + } + + /** + * Any saved positions that are equal to the current row and column are set to the current position in order to + * remove leading whitespace. Once the first non-whitespace character is written, the current position will be + * different from any saved positions and the current position will advance. + */ + private void updateSavedPositionsForWhitespace(int lastLine, int lastColumn) + { + for (int i = _savedPositionStack.size() - 1; i >= 0; --i) + { + CharacterPosition savedCharacterPosition = _savedPositionStack.get(i); + if (savedCharacterPosition.line == lastLine && savedCharacterPosition.column == lastColumn) + { + savedCharacterPosition.line = _line; + savedCharacterPosition.column = _column; + } else + { + break; + } + } + } + + /** + * Row and column numbers of a character in Writer output + */ + public static class CharacterPosition + { + + private int line; + private int column; + + CharacterPosition(int line, int column) + { + this.line = line; + this.column = column; + } + + /** + * 1-based index of line in writer output + */ + public int getLine() + { + return line; + } + + /** + * 1-based index of column in writer output + */ + public int getColumn() + { + return column; + } + + @Override + public boolean equals(Object o) + { + if (this == o) + { + return true; + } + if (o == null || getClass() != o.getClass()) + { + return false; + } + CharacterPosition characterPosition = (CharacterPosition) o; + return line == characterPosition.line && column == characterPosition.column; + } + + @Override + public int hashCode() + { + return Objects.hash(line, column); + } + + @Override + public String toString() + { + return "CharacterPosition{" + "line=" + line + ", column=" + column + '}'; + } + } +} diff --git a/data/src/test/java/com/linkedin/util/TestLineColumnNumberWriter.java b/data/src/test/java/com/linkedin/util/TestLineColumnNumberWriter.java new file mode 100644 index 0000000000..c3010149cc --- /dev/null +++ b/data/src/test/java/com/linkedin/util/TestLineColumnNumberWriter.java @@ -0,0 +1,51 @@ +package com.linkedin.util; + +import java.io.IOException; +import java.io.StringWriter; +import org.testng.Assert; +import org.testng.annotations.Test; + + +public class TestLineColumnNumberWriter +{ + + @Test + public void testHandlesDifferentNewlines() throws IOException + { + LineColumnNumberWriter writer = new LineColumnNumberWriter(new StringWriter()); + writer.write("1\n2\n3\n"); + Assert.assertEquals(writer.getCurrentPosition(), new LineColumnNumberWriter.CharacterPosition(4, 1)); + writer.write("1\r\n2\r\n3\r\n"); + Assert.assertEquals(writer.getCurrentPosition(), new LineColumnNumberWriter.CharacterPosition(7, 1)); + writer.write("1\r2\r3\r"); + Assert.assertEquals(writer.getCurrentPosition(), new LineColumnNumberWriter.CharacterPosition(10, 1)); + } + + @Test + public void testSavedPositionIgnoresLeadingWhitespace() throws IOException + { + LineColumnNumberWriter writer = new LineColumnNumberWriter(new StringWriter()); + writer.write("123\n"); + writer.saveCurrentPosition(); + writer.saveCurrentPosition(); + writer.write(" \n "); + writer.write("456"); + writer.saveCurrentPosition(); + writer.write(" 789"); + Assert.assertEquals(writer.popSavedPosition(), new LineColumnNumberWriter.CharacterPosition(3, 8)); + Assert.assertEquals(writer.popSavedPosition(), new LineColumnNumberWriter.CharacterPosition(3, 2)); + Assert.assertEquals(writer.popSavedPosition(), new LineColumnNumberWriter.CharacterPosition(3, 2)); + } + + @Test + public void testGetLastNonWhitespacePosition() throws IOException + { + LineColumnNumberWriter writer = new LineColumnNumberWriter(new StringWriter()); + writer.write("123"); + Assert.assertEquals(writer.getLastNonWhitespacePosition(), new LineColumnNumberWriter.CharacterPosition(1, 3)); + writer.write("\n "); + Assert.assertEquals(writer.getLastNonWhitespacePosition(), new LineColumnNumberWriter.CharacterPosition(1, 3)); + writer.write("4"); + Assert.assertEquals(writer.getLastNonWhitespacePosition(), new LineColumnNumberWriter.CharacterPosition(2, 2)); + } +} diff --git a/generator-test/src/test/java/com/linkedin/pegasus/generator/test/pdl/PdlEncoderTest.java b/generator-test/src/test/java/com/linkedin/pegasus/generator/test/pdl/PdlEncoderTest.java index 7d12f35666..ae22fdef75 100644 --- a/generator-test/src/test/java/com/linkedin/pegasus/generator/test/pdl/PdlEncoderTest.java +++ b/generator-test/src/test/java/com/linkedin/pegasus/generator/test/pdl/PdlEncoderTest.java @@ -21,7 +21,10 @@ import com.linkedin.data.schema.AbstractSchemaParser; import com.linkedin.data.schema.DataSchema; import com.linkedin.data.schema.DataSchemaResolver; +import com.linkedin.data.schema.NamedDataSchema; +import com.linkedin.data.schema.RecordDataSchema; import com.linkedin.data.schema.SchemaToPdlEncoder; +import com.linkedin.data.schema.UnionDataSchema; import com.linkedin.data.schema.grammar.PdlSchemaParser; import com.linkedin.data.schema.resolver.MultiFormatDataSchemaResolver; import com.linkedin.pegasus.generator.test.idl.EncodingStyle; @@ -31,6 +34,7 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.StringWriter; +import java.util.Map; import org.apache.commons.io.FileUtils; import org.testng.Assert; import org.testng.annotations.DataProvider; @@ -149,6 +153,75 @@ private void assertRoundTrip(String relativeName) throws IOException } } + @Test(dataProvider = "pdlFilePaths") + public void testTrackWriteLocations(String pdlFilePath) throws IOException + { + assertRoundTripLineColumnNumbersMatch(pdlFilePath); + } + + private void assertRoundTripLineColumnNumbersMatch(String relativeName) throws IOException + { + String fullName = "com.linkedin.pegasus.generator.test.idl." + relativeName; + File file = new File(pegasusSrcDir, "/" + fullName.replace('.', '/') + ".pdl"); + + TypeReferenceFormat referenceFormat = TypeReferenceFormat.PRESERVE; + + // Test all encoding styles + for (SchemaToPdlEncoder.EncodingStyle encodingStyle : SchemaToPdlEncoder.EncodingStyle.values()) + { + String encoded = readAndStandardizeFormat(file, referenceFormat, encodingStyle); + + DataSchemaResolver resolver = MultiFormatDataSchemaResolver.withBuiltinFormats(pegasusSrcDir.getAbsolutePath()); + PdlSchemaParser parser = new PdlSchemaParser(resolver, true); + parser.parse(encoded); + Map parsedLocations = parser.getParseLocations(); + DataSchema parsed = extractSchema(parser, file.getAbsolutePath()); + + StringWriter writer = new StringWriter(); + SchemaToPdlEncoder encoder = new SchemaToPdlEncoder(writer, true); + encoder.setTypeReferenceFormat(referenceFormat); + encoder.setEncodingStyle(encodingStyle); + encoder.encode(parsed); + Map writeLocations = encoder.getWriteLocations(); + + for (Map.Entry expected : parsedLocations.entrySet()) + { + PdlSchemaParser.ParseLocation actual = writeLocations.get(expected.getKey()); + + Assert.assertNotNull(actual, + "Missing location for " + expected.getKey() + " in " + file.getAbsolutePath() + ":" + + expected.getValue().getStartLine() + ":" + expected.getValue().getStartColumn()); + Assert.assertEquals(actual.getStartLine(), expected.getValue().getStartLine(), + "Start line for " + expected.getKey() + " in " + file.getAbsolutePath() + ":" + + expected.getValue().getStartLine() + ":" + expected.getValue().getStartColumn()); + Assert.assertEquals(actual.getStartColumn(), expected.getValue().getStartColumn(), + "Start col for " + expected.getKey() + " in " + file.getAbsolutePath() + ":" + + expected.getValue().getStartLine() + ":" + expected.getValue().getStartColumn()); + Assert.assertEquals(actual.getEndLine(), expected.getValue().getEndLine(), + "End line for " + expected.getKey() + " in " + file.getAbsolutePath() + ":" + + expected.getValue().getStartLine() + ":" + expected.getValue().getStartColumn()); + Assert.assertEquals(actual.getEndColumn(), expected.getValue().getEndColumn(), + "End col for " + expected.getKey() + " in " + file.getAbsolutePath() + ":" + + expected.getValue().getStartLine() + ":" + expected.getValue().getStartColumn()); + } + + Assert.assertEquals(parsedLocations.size(), writeLocations.size(), + "Different numer of element locations for " + file.getAbsolutePath()); + } + } + + private String readAndStandardizeFormat(File file, TypeReferenceFormat typeReferenceFormat, + SchemaToPdlEncoder.EncodingStyle encodingStyle) throws IOException + { + DataSchema parsed = parseSchema(file); + StringWriter writer = new StringWriter(); + SchemaToPdlEncoder encoder = new SchemaToPdlEncoder(writer); + encoder.setEncodingStyle(encodingStyle); + encoder.setTypeReferenceFormat(typeReferenceFormat); + encoder.encode(parsed); + return writer.toString(); + } + private DataSchema parseSchema(File file) throws IOException { DataSchemaResolver resolver = MultiFormatDataSchemaResolver.withBuiltinFormats(pegasusSrcDir.getAbsolutePath()); diff --git a/gradle.properties b/gradle.properties index ce14fea603..3702b1dcc2 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,4 +1,4 @@ -version=29.42.0 +version=29.42.1 group=com.linkedin.pegasus org.gradle.configureondemand=true org.gradle.parallel=true