Skip to content

Commit

Permalink
avrogencpp: expose ValidSchema in generated code
Browse files Browse the repository at this point in the history
The C++ avro library has readers and writers that depend on having an
in-memory ValidSchema. The avrogencpp tool can take a JSON schema and
generate code to interact with a given datum, but doesn't expose the
ValidSchema, despite using it during codegen.

This adds a valid_schema() method to the avrogencpp binary that
builds the schema and exposes it via generated code.

This is inspired largely by the kspp[1] library.

Note, the escape() method used is copied from [2].

[1] https://github.com/bitbouncer/kspp/blob/8539f359e32bd3dd1360ac4616eab88e79aab607/tools/kspp_avrogencpp/kspp_avrogencpp.cpp
[2] https://github.com/redpanda-data/avro/blob/1410e79f9df61669c2d52f6d0643e6c35156e615/lang/c%2B%2B/impl/NodeImpl.cc#L29-L69
  • Loading branch information
andrwng authored and pgellert committed Aug 9, 2024
1 parent 8396369 commit c4a52a4
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 15 deletions.
96 changes: 87 additions & 9 deletions lang/c++/impl/avrogencpp.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,53 @@ using boost::lexical_cast;
using avro::compileJsonSchema;
using avro::ValidSchema;

namespace {

// Escape string for serialization.
string escape(const string &unescaped) {
string s;
s.reserve(unescaped.length());
for (char c : unescaped) {
switch (c) {
case '\\':
case '"':
case '/':
s += '\\';
s += c;
break;
case '\b':
s += '\\';
s += 'b';
break;
case '\f':
s += '\f';
break;
case '\n':
s += '\\';
s += 'n';
break;
case '\r':
s += '\\';
s += 'r';
break;
case '\t':
s += '\\';
s += 't';
break;
default:
if (!std::iscntrl(c, std::locale::classic())) {
s += c;
continue;
}
s += avro::intToHex(static_cast<unsigned int>(c));
break;
}
}
return s;
}

} // anonymous namespace

struct PendingSetterGetter {
string structName;
string type;
Expand Down Expand Up @@ -85,16 +132,18 @@ class CodeGen {
map<NodePtr, string> done;
set<NodePtr> doing;

using OptSchemaRef = std::optional<std::reference_wrapper<const ValidSchema>>;

std::string guard();
std::string fullname(const string &name) const;
std::string generateEnumType(const NodePtr &n);
std::string cppTypeOf(const NodePtr &n);
std::string generateRecordType(const NodePtr &n);
std::string generateRecordType(const NodePtr &n, OptSchemaRef emittedSchema = std::nullopt);
std::string unionName();
std::string generateUnionType(const NodePtr &n);
std::string generateType(const NodePtr &n);
std::string generateType(const NodePtr &n, OptSchemaRef emittedSchema = std::nullopt);
std::string generateDeclaration(const NodePtr &n);
std::string doGenerateType(const NodePtr &n);
std::string doGenerateType(const NodePtr &n, OptSchemaRef emittedSchema = std::nullopt);
void generateEnumTraits(const NodePtr &n);
void generateTraits(const NodePtr &n);
void generateRecordTraits(const NodePtr &n);
Expand Down Expand Up @@ -226,7 +275,24 @@ static string cppNameOf(const NodePtr &n) {
}
}

string CodeGen::generateRecordType(const NodePtr &n) {
namespace {

std::string toString(const ValidSchema& schema) {
std::stringstream ss;
schema.toJson(ss);
std::string s = ss.str();
s.erase(remove_if(s.begin(), s.end(), ::isspace), s.end());
return s;
}

std::string toEscapedString(const ValidSchema& schema) {
auto str = toString(schema);
return escape(str);
}

} // anonymous namespace

string CodeGen::generateRecordType(const NodePtr &n, OptSchemaRef emittedSchema) {
size_t c = n->leaves();
string decoratedName = decorate(n->name());
vector<string> types;
Expand All @@ -253,6 +319,17 @@ string CodeGen::generateRecordType(const NodePtr &n) {
}
}
}

if (emittedSchema.has_value()) {
os_ << " static inline const char* schema_as_string() {\n";
os_ << " return \"" << toEscapedString(*emittedSchema) << "\";\n";
os_ << " } \n\n";
os_ << " static const ::avro::ValidSchema& valid_schema() {\n";
os_ << " static const auto& _validSchema = ::avro::compileJsonSchemaFromString(schema_as_string());\n";
os_ << " return _validSchema;\n";
os_ << " }\n\n";
}

for (size_t i = 0; i < c; ++i) {
// the nameAt(i) does not take c++ reserved words into account
// so we need to call decorate on it
Expand Down Expand Up @@ -410,19 +487,19 @@ string CodeGen::generateUnionType(const NodePtr &n) {
/**
* Returns the type for the given schema node and emits code to os.
*/
string CodeGen::generateType(const NodePtr &n) {
string CodeGen::generateType(const NodePtr &n, OptSchemaRef emittedSchema) {
NodePtr nn = (n->type() == avro::AVRO_SYMBOLIC) ? resolveSymbol(n) : n;

map<NodePtr, string>::const_iterator it = done.find(nn);
if (it != done.end()) {
return it->second;
}
string result = doGenerateType(nn);
string result = doGenerateType(nn, emittedSchema);
done[nn] = result;
return result;
}

string CodeGen::doGenerateType(const NodePtr &n) {
string CodeGen::doGenerateType(const NodePtr &n, OptSchemaRef emittedSchema) {
switch (n->type()) {
case avro::AVRO_STRING:
case avro::AVRO_BYTES:
Expand Down Expand Up @@ -459,7 +536,7 @@ string CodeGen::doGenerateType(const NodePtr &n) {
return "std::map<std::string, " + dn + " >";
}
case avro::AVRO_RECORD:
return generateRecordType(n);
return generateRecordType(n, emittedSchema);
case avro::AVRO_ENUM:
return generateEnumType(n);
case avro::AVRO_UNION:
Expand Down Expand Up @@ -730,6 +807,7 @@ void CodeGen::generate(const ValidSchema &schema) {

os_ << "#include <sstream>\n"
<< "#include <any>\n"
<< "#include \"" << includePrefix_ << "Compiler.hh\"\n"
<< "#include \"" << includePrefix_ << "Specific.hh\"\n"
<< "#include \"" << includePrefix_ << "Encoder.hh\"\n"
<< "#include \"" << includePrefix_ << "Decoder.hh\"\n"
Expand All @@ -741,7 +819,7 @@ void CodeGen::generate(const ValidSchema &schema) {
}

const NodePtr &root = schema.root();
generateType(root);
generateType(root, schema);

for (vector<PendingSetterGetter>::const_iterator it =
pendingGettersAndSetters.begin();
Expand Down
8 changes: 2 additions & 6 deletions lang/c++/test/AvrogencppTests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -247,19 +247,15 @@ const char schemaFilename<umu::r1>::value[] = "jsonschemas/union_map_union";

template<typename T>
void testEncoding2() {
ValidSchema s;
ifstream ifs(schemaFilename<T>::value);
compileJsonSchema(ifs, s);

unique_ptr<OutputStream> os = memoryOutputStream();
EncoderPtr e = validatingEncoder(s, binaryEncoder());
EncoderPtr e = validatingEncoder(T::valid_schema(), binaryEncoder());
e->init(*os);
T t1;
setRecord(t1);
avro::encode(*e, t1);
e->flush();

DecoderPtr d = validatingDecoder(s, binaryDecoder());
DecoderPtr d = validatingDecoder(T::valid_schema(), binaryDecoder());
unique_ptr<InputStream> is = memoryInputStream(*os);
d->init(*is);
T t2;
Expand Down

0 comments on commit c4a52a4

Please sign in to comment.