Skip to content

Commit

Permalink
AVRO-3666: Refactor for recent changes
Browse files Browse the repository at this point in the history
Includes the use of NameValidator and parsing multiple files with
circular references between them.
  • Loading branch information
opwvhk committed Oct 26, 2023
1 parent 4d88a15 commit 34971c0
Show file tree
Hide file tree
Showing 32 changed files with 1,183 additions and 739 deletions.
2 changes: 2 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ root = true
charset = utf-8
end_of_line = lf
insert_final_newline = true
ij_any_block_comment_at_first_column = false
ij_any_line_comment_at_first_column = false

[*.{java,xml,sh}]
indent_style = space
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

import java.io.IOException;
import java.net.URI;
import java.util.Collection;

/**
* Schema parser for a specific schema format.
Expand All @@ -29,46 +28,50 @@
* schema sources.
* </p>
*
* <h2>Note to implementers:</h2>
*
* <p>
* Implementations are located using a {@link java.util.ServiceLoader}. See that
* class for details.
* </p>
*
* <p>
* You can expect that schemas being read are invalid, so you are encouraged to
* return {@code null} upon parsing failure where the input clearly doesn't make
* sense (e.g., reading "/**" when expecting JSON). If the input is likely in
* the correct format, but invalid, throw a {@link SchemaParseException}
* instead.
* </p>
*
* <p>
* Note that throwing anything other than a {@code SchemaParseException} will
* abort the parsing process, so reserve that for rethrowing exceptions.
* Implementations are located using a {@link java.util.ServiceLoader} and must
* therefore be threadsafe. See the {@code ServiceLoader} class for details on
* loading your implementation.
* </p>
*
* @see java.util.ServiceLoader
*/
public interface FormattedSchemaParser {
/**
* Parse a schema from a text based source. Can use the base location of the
* schema (e.g., the directory where the schema file lives) if available.
*
* <p>
* Implementations should add all named schemas they parse to the collection.
* Parse schema definitions from a text based source.
* </p>
*
* @param types a mutable collection of known types; parsed named
* schemata will be added
* <h2>Notes for implementers:</h2>
*
* <ul>
* <li>Schema definitions are expected not to be in the format the parser
* expects. So when the input clearly doesn't make sense (e.g., reading "/**"
* when expecting JSON), it is a good idea not to do anything (especially
* calling methods on the @code ParseContext}).</li>
* <li>The parameter {@code parseContext} is not thread-safe.</li>
* <li>When parsing, all parsed schema definitions should be added to the
* provided {@link ParseContext}.</li>
* <li>Optionally, you may return a "main" schema. Some schema definitions have
* one, for example the schema defined by the root of the JSON document in a
* <a href="https://avro.apache.org/docs/current/specification/">standard schema
* definition</a>. If unsure, return {@code null}.</li>
* <li>If parsing fails, throw a {@link SchemaParseException}. This will let the
* parsing process recover and continue.</li>
* <li>Throwing anything other than a {@code SchemaParseException} will abort
* the parsing process, so reserve that for rethrowing exceptions.</li>
* </ul>
*
* @param parseContext the current parse context: all parsed schemata should
* be added here to resolve names with; contains all
* previously known types
* @param baseUri the base location of the schema, or {@code null} if
* not known
* @param formattedSchema the schema as text
* @return the parsed schema, or {@code null} if the format is not supported
* @param formattedSchema the text of the schema definition(s) to parse
* @return the main schema, if any
* @throws IOException when the schema cannot be read
* @throws SchemaParseException when the schema cannot be parsed
*/
Schema parse(Collection<Schema> types, URI baseUri, CharSequence formattedSchema)
Schema parse(ParseContext parseContext, URI baseUri, CharSequence formattedSchema)
throws IOException, SchemaParseException;
}
26 changes: 9 additions & 17 deletions lang/java/avro/src/main/java/org/apache/avro/JsonSchemaParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@

import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;

/**
* Schema parser for JSON formatted schemata. This initial implementation simply
Expand Down Expand Up @@ -59,32 +57,26 @@ public static Schema parseInternal(String... fragments) {
for (String fragment : fragments) {
buffer.append(fragment);
}
return new JsonSchemaParser().parse(new ArrayList<>(), buffer, true);
return new JsonSchemaParser().parse(new ParseContext(NameValidator.NO_VALIDATION), buffer, null);
}

@Override
public Schema parse(Collection<Schema> schemas, URI baseUri, CharSequence formattedSchema)
public Schema parse(ParseContext parseContext, URI baseUri, CharSequence formattedSchema)
throws IOException, SchemaParseException {
return parse(schemas, formattedSchema, false);
return parse(parseContext, formattedSchema, parseContext.nameValidator);
}

private Schema parse(Collection<Schema> schemas, CharSequence formattedSchema, boolean skipValidation)
private Schema parse(ParseContext parseContext, CharSequence formattedSchema, NameValidator nameValidator)
throws SchemaParseException {
// TODO: refactor JSON parsing out of the Schema class
Schema.Parser parser;
if (skipValidation) {
parser = new Schema.Parser(Schema.NameValidator.NO_VALIDATION);
Schema.Parser parser = new Schema.Parser(nameValidator);
if (nameValidator == NameValidator.NO_VALIDATION) {
parser.setValidateDefaults(false);
} else {
parser = new Schema.Parser();
}
if (schemas != null) {
parser.addTypes(schemas);
parser = new Schema.Parser(nameValidator);
}
parser.addTypes(parseContext.typesByName().values());
Schema schema = parser.parse(formattedSchema.toString());
if (schemas != null) {
schemas.addAll(parser.getTypes().values());
}
parser.getTypes().values().forEach(parseContext::put);
return schema;
}
}
104 changes: 104 additions & 0 deletions lang/java/avro/src/main/java/org/apache/avro/NameValidator.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.avro;

public interface NameValidator {

class Result {
private final String errors;

public Result(final String errors) {
this.errors = errors;
}

public boolean isOK() {
return this == NameValidator.OK;
}

public String getErrors() {
return errors;
}
}

Result OK = new Result(null);

default Result validate(String name) {
return OK;
}

NameValidator NO_VALIDATION = new NameValidator() {
};

NameValidator UTF_VALIDATOR = new NameValidator() {
@Override
public Result validate(final String name) {
if (name == null) {
return new Result("Null name");
}
int length = name.length();
if (length == 0) {
return new Result("Empty name");
}
char first = name.charAt(0);
if (!(Character.isLetter(first) || first == '_')) {
return new Result("Illegal initial character: " + name);
}
for (int i = 1; i < length; i++) {
char c = name.charAt(i);
if (!(Character.isLetterOrDigit(c) || c == '_')) {
return new Result("Illegal character in: " + name);
}
}
return OK;
}
};

NameValidator STRICT_VALIDATOR = new NameValidator() {
@Override
public Result validate(final String name) {
if (name == null) {
return new Result("Null name");
}
int length = name.length();
if (length == 0) {
return new Result("Empty name");
}
char first = name.charAt(0);
if (!(isLetter(first) || first == '_')) {
return new Result("Illegal initial character: " + name);
}
for (int i = 1; i < length; i++) {
char c = name.charAt(i);
if (!(isLetter(c) || isDigit(c) || c == '_')) {
return new Result("Illegal character in: " + name);
}
}
return OK;
}

private boolean isLetter(char c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}

private boolean isDigit(char c) {
return c >= '0' && c <= '9';
}

};

}
Loading

0 comments on commit 34971c0

Please sign in to comment.