diff --git a/docker/Dockerfile-vscode b/docker/Dockerfile-vscode index 6b33f63..3edf228 100644 --- a/docker/Dockerfile-vscode +++ b/docker/Dockerfile-vscode @@ -47,7 +47,7 @@ WORKDIR /home/vscode # Setup python deps and configure rust. # -RUN pip3 --disable-pip-version-check --no-cache-dir install mypy wasmtime \ +RUN pip3 --disable-pip-version-check --no-cache-dir install mypy wasmtime==0.37.0 \ && rm -rf /tmp/pip-tmp \ && bash /tmp/library-scripts/install-rust-tools.sh \ && bash /tmp/library-scripts/install-wit-bindgen.sh diff --git a/examples/rust/templates/Cargo.toml b/examples/rust/templates/Cargo.toml new file mode 100644 index 0000000..1c1ff22 --- /dev/null +++ b/examples/rust/templates/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "templates" +version = "0.1.0" +edition = "2021" + +[dependencies] +wit-bindgen-rust = { git = "https://github.com/bytecodealliance/wit-bindgen.git", rev = "60e3c5b41e616fee239304d92128e117dd9be0a7" } +tera = "1.19" +serde_json = "1.0" +skyscraper = "0.4" +jsonpath_lib = "0.1" + +[dependencies.yaml-rust] +git = "https://github.com/chyh1990/yaml-rust.git" + +[lib] +crate-type = ["cdylib"] diff --git a/examples/rust/templates/Makefile b/examples/rust/templates/Makefile new file mode 100644 index 0000000..4c1f255 --- /dev/null +++ b/examples/rust/templates/Makefile @@ -0,0 +1,16 @@ +.PHONY: debug +debug: $(eval TGT:=debug) +debug: wasm + +.PHONY: release +release: $(eval TGT:=release) +release: RELFLAGS = --release +release: wasm + +.PHONY: wasm +wasm: + cargo wasi build --lib $(RELFLAGS) + +.PHONY: clean +clean: + @cargo clean diff --git a/examples/rust/templates/README.md b/examples/rust/templates/README.md new file mode 100644 index 0000000..c67b98a --- /dev/null +++ b/examples/rust/templates/README.md @@ -0,0 +1,202 @@ +# Templating Functions + +The templating functions in this example allow you to apply an +XML, HTML, JSON, or Yaml document / object to a +[Tera](https://tera.netlify.app) template. In the case of XML +and HTML, the documents can be queried using +[XPath](https://www.w3.org/TR/xpath-31/) expressions. For JSON +and Yaml, [JSONPath](https://datatracker.ietf.org/wg/jsonpath/about/) +expressions can be used. The output data format can be anything +supported by Tera templates, which can generate pretty much any +text-based format. + +What this means is that you can transform XML, HTML, JSON, or Yaml +strings into any other format including new forms of XML, HTML, JSON, +or Yaml. For example, if you had the following XML structure: +``` + + XQuery Kick Start + James McGovern + Per Bothner + Kurt Cagle + James Linn + Vaidyanathan Nagarajan + 2003 + 49.99 + +``` + +You could convert it to a JSON structure like this: +``` +{ + "book": { + "title": "XQuery Kick Start", + "authors": [ + "James McGovern", + "Per Bothner", + "Kurt Cagle", + "James Linn", + "Vaidyanathan Nagarajan" + ], + "published": { + "year": 2003 + }, + "listing": { + "price": 49.99 + } + } +} +``` + +Using the `render_xml` function included in this package and the +following template: +``` +{ + "book": { + "title": {{ q(path="/book/title") | get(key="text") | json_encode | safe }}, + "authors": [{% for item in q(path="/book/author") %} + {{ item | get(key="text") | json_encode | safe }}{% if not loop.last %}, {% endif %} + {% endfor %}], + "published": { "year": {{ q(path="/book/year") | get(key="text") | int }} }, + "listing": { "price": {{ q(path="/book/price") | get(key="text") | float }} } + } +} +``` + +## The `q` function + +In addition to being able to traverse the objects in the parsed document using +the Tera syntax, a `q` function has also been added. It has the following +signature: +``` +q(path="...") -> string +``` + +The `path` parameter when using XML or HTML input is an XPath query. For +JSON and Yaml input, the path is a JSONPath query. This allows you to use +more powerful queries to extract pieces of your input document than what +Tera can do by default. + +## XML / HTML object structure + +While JSON and Yaml are fairly straight-forward data structures that both map to +JSON structures entirely using maps and arrays, XML and HTML are a bit more +complex. There is no standard map or array type in XML / HTML. In order to make +it possible to traverse XML / HTML objects in a Tera template, those documents +are converted to a JSON-like object using the following mappings: +``` + + XQuery Kick Start + James McGovern + Per Bothner + Kurt Cagle + James Linn + Vaidyanathan Nagarajan + 2003 + 49.99 + +``` + +The JSON object for the above XML looks like: +``` +{ + "_": { + "name": "book", + "attributes": {"category": "web"}, + "children": [ + { + "name": "title", + "attributes": {"lang": "en"}, + "text": "XQuery Kick Start", + "children": [] + }, + { + "name": "author", + "attributes": {}, + "text": "James McGovern", + "children": [] + }, + ... + { + "name": "year", + "attributes": {}, + "text": "2003", + "children": [] + }, + { + "name": "price", + "attributes": {}, + "text": "49.99", + "children": [] + }, + ] + } +} +``` + +As you can see traversing an XML document using Tera's syntax or +even JSONPath is rather complicated. Using the `q(path="...")` function +to get nodes is much simpler. Even in an XML document, the `q` function +returns a JSON object that can be traversed from that point. For example, +to get the price of a book using the `q` function could be done as follows: +``` +q(path="/book/price") +``` + +This will return an object of the following form: +``` +{ + "name": "price", + "attributes": {}, + "text": "49.99", + "children": [] +} +``` + +You can then extract the `text` attribute using Tera's `get` function: +``` +q(path="/book/price") | get(key="text") +``` + +The above expression will return "49.99". + +## UDFs + +The functions included in this package are as follows: + +``` +render_json(json-string, template-string) -> string +render_xml(xml-string, template-string) -> string +render_yaml(yaml-string, template-string) -> string +``` + +## Compiling + +To compile the functions in the example, use the following command. +``` +cargo wasi build --lib --release +``` + +The Makefile can also be used to build the Wasm file. +``` +make release +``` + +## Load functions into the database + +Once you have compiled the functions, they can be loaded into the database +using the `pushwasm` command. + +``` +pushwasm udf --conn mysql://user:@127.0.0.1:3306/dbname --wit templates.wit \ + --wasm target/wasm32-wasi/release/templates.wasm --name render_json +pushwasm udf --conn mysql://user:@127.0.0.1:3306/dbname --wit templates.wit \ + --wasm target/wasm32-wasi/release/templates.wasm --name render_xml +pushwasm udf --conn mysql://user:@127.0.0.1:3306/dbname --wit templates.wit \ + --wasm target/wasm32-wasi/release/templates.wasm --name render_yaml +``` + +## Using the functions + +The `test.py` file contains a Python program that demonstrates the use +of each of the functions. diff --git a/examples/rust/templates/src/lib.rs b/examples/rust/templates/src/lib.rs new file mode 100644 index 0000000..f0503cf --- /dev/null +++ b/examples/rust/templates/src/lib.rs @@ -0,0 +1,155 @@ +wit_bindgen_rust::export!("templates.wit"); +struct Templates; + +extern crate serde_json; +extern crate tera; +extern crate skyscraper; +extern crate jsonpath_lib; +extern crate yaml_rust; + +use std::collections::HashMap; + +use serde_json::Map; +use skyscraper::html; +use skyscraper::xpath; +use tera::{Tera, Context, Function, Result, Value, from_value, to_value}; +use yaml_rust::{YamlLoader, yaml}; + +// Convert an html::DocumentNode to a serde_json::Value recursively. +fn element_to_value(doc: &html::HtmlDocument, elem: &html::DocumentNode) -> Value { + let html_node = doc.get_html_node(&elem).expect("document does not contain node"); + match html_node { + html::HtmlNode::Tag(tag) => { + let out = &mut Map::::new(); + out.insert("name".to_string(), Value::String(tag.name.to_string())); + out.insert("children".to_string(), elem.children(doc).into_iter().map(|n| element_to_value(doc, &n)).collect()); + let attrs = &mut Map::::new(); + for (key, value) in tag.attributes.iter() { + attrs.insert(key.to_string(), Value::String(value.to_string())); + } + out.insert("attributes".to_string(), Value::Object(attrs.to_owned())); + // This shouldn't be needed, but the XPath library doesn't support extracting text... + out.insert("text".to_string(), Value::String(elem.get_all_text(doc).unwrap_or("".to_string()))); + Value::Object(out.to_owned()) + }, + html::HtmlNode::Text(txt) => { + Value::String(txt.to_string()) + } + } +} + +fn yaml_to_value(doc: &yaml::Yaml) -> Value { + match doc { + yaml::Yaml::Real(v) => Value::from(v.to_string().parse::().unwrap()), + yaml::Yaml::Integer(v) => Value::from(v.to_string().parse::().unwrap()), + yaml::Yaml::String(v) => Value::String(v.to_string()), + yaml::Yaml::Boolean(v) => Value::Bool(*v), + yaml::Yaml::Array(v) => { + Value::Array(v.into_iter().map(|x| yaml_to_value(x)).collect::>()) + }, + yaml::Yaml::Hash(v) => { + let out = &mut Map::::new(); + for (key, val) in v.iter() { + out.insert(key.as_str().unwrap().to_string(), yaml_to_value(val)); + } + Value::Object(out.to_owned()) + }, + yaml::Yaml::Alias(_) => Value::Null, + yaml::Yaml::Null => Value::Null, + yaml::Yaml::BadValue => Value::Null, + } +} + +// Create an XPath function for use in the template. +fn query_xml(document: html::HtmlDocument) -> impl Function { + Box::new(move |args: &HashMap| -> Result { + match args.get("path") { + Some(val) => match from_value::(val.clone()) { + Ok(path) => { + let xpath = xpath::parse(path.as_str()).expect("xpath is invalid"); + let nodes = xpath.apply(&document).expect("could not apply xpath to document"); + if nodes.len() == 0 { + Ok(to_value::>(vec![]).unwrap()) + } else if nodes.len() > 1 { + Ok(to_value(nodes.into_iter().map(|n| element_to_value(&document, &n)).collect::>()).expect("could not convert to value")) + } else { + Ok(element_to_value(&document, &nodes[0])) + } + }, + Err(_) => Err("could not get path argument".into()), + }, + None => Err("could not get path argument".into()), + } + }) +} + +// Create a JSONPath function for use in the template. +fn query_json(obj: Value) -> impl Function { + Box::new(move |args: &HashMap| -> Result { + match args.get("path") { + Some(val) => match from_value::(val.clone()) { + Ok(path) => { + Ok(jsonpath_lib::select(&obj, path.as_str()).expect("could not execute JSONPath")) + }, + Err(_) => Err("could not get path argument".into()), + }, + None => Err("could not get path argument".into()), + } + }) +} + +// Render the input XML/JSON string using the given template. +fn render(txt: String, infmt: &str, template: String) -> String { + let mut tera = Tera::default(); + + // Using a temporary name here with an extension for the auto-escaping capabilities + // in the templating engine which get set based on the extension. + tera.add_raw_template("x", template.as_str()).unwrap(); + + // Turn off auto-escaping of special HTML characters. + tera.autoescape_on(vec![]); + + let mut context = Context::new(); + + match infmt { + "json" => { + let parsed: Value = serde_json::from_str(txt.as_str()).expect("failed to parse JSON input"); + let obj: serde_json::Map = parsed.as_object().unwrap().clone(); + tera.register_function("q", query_json(Value::Object(obj.clone()))); + context.insert("_", &obj); + }, + "xml" => { + let obj = html::parse(txt.as_str()).expect("failed to parse XML input"); + let json_obj = element_to_value(&obj, &obj.root_node); + tera.register_function("q", query_xml(obj)); + context.insert("_", &json_obj); + }, + "yaml" => { + let obj = YamlLoader::load_from_str(txt.as_str()).expect("failed to parse yaml input"); + let json_obj = yaml_to_value(&obj[0]).as_object().unwrap().to_owned(); + tera.register_function("q", query_json(Value::Object(json_obj.clone()))); + context.insert("_", &json_obj); + }, + _ => { + panic!(); + } + } + + return tera.render("x", &context).unwrap(); +} + +impl templates::Templates for Templates { + + fn render_json(json: String, template: String) -> String { + return render(json, "json", template); + } + + fn render_xml(xml: String, template: String) -> String { + return render(xml, "xml", template); + } + + fn render_yaml(yaml: String, template: String) -> String { + return render(yaml, "yaml", template); + } + +} diff --git a/examples/rust/templates/templates.wit b/examples/rust/templates/templates.wit new file mode 100644 index 0000000..47d3058 --- /dev/null +++ b/examples/rust/templates/templates.wit @@ -0,0 +1,3 @@ +render-json: func(json: string, template: string) -> string +render-xml: func(xml: string, template: string) -> string +render-yaml: func(yaml: string, template: string) -> string \ No newline at end of file diff --git a/examples/rust/templates/test.py b/examples/rust/templates/test.py new file mode 100755 index 0000000..b325f41 --- /dev/null +++ b/examples/rust/templates/test.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 + +import json + +import singlestoredb as s2 + +# Unfortunately, the XPath library used does not support extracting text using `text()`, so we had to work +# around it by adding a text attribute that gets looked up after querying the XPath. +JSON_TEMPLATE = r'''{ + "book": { + "title": {{ q(path="/book/title") | get(key="text") | json_encode | safe }}, + "authors": [{% for item in q(path="/book/author") %} + {{ item | get(key="text") | json_encode | safe }}{% if not loop.last %}, {% endif %} + {% endfor %}], + "published": { "year": {{ q(path="/book/year") | get(key="text") | int }} }, + "listing": { "price": {{ q(path="/book/price") | get(key="text") | float }} } + } +}''' + +# Templates that use a JSON object can use either the object notation in the templating engine with +# `_` as the top element, or you can use the `q(...)` function to query using JSONPath queries. +XML_TEMPLATE = r''' + {{ q(path="$.book.title") | escape_xml | safe }} + {% for author in _.book.authors %}{{ author | escape_xml | safe }}{% if not loop.last %}\n {% endif %}{% endfor %} + {{ _.book.published.year | int }} + {{ _.book.listing.price | float }} + +''' + +# This template generates yaml from a JSON object +YAML_TEMPLATE = r'''book: + title: {{ _.book.title }} + authors: + {% for author in _.book.authors %}- {{ author }}{% if not loop.last %}\n {% endif %}{% endfor %} + published: + year: {{ _.book.published.year | int }} + listing: + price: {{ _.book.listing.price | float }} +''' + +with s2.connect('root:@localhost:9306') as conn: + with conn.cursor() as cur: + cmds = [ + '''CREATE DATABASE IF NOT EXISTS xml_test''', + '''USE xml_test''', + '''DROP TABLE IF EXISTS books''', + '''CREATE TABLE books ( + id INT, + xml TEXT + )''', + f"""SET @json_template = '{JSON_TEMPLATE}'""", + f"""SET @xml_template = '{XML_TEMPLATE}'""", + f"""SET @yaml_template = '{YAML_TEMPLATE}'""", + ] + for cmd in cmds: + cur.execute(cmd) + + cur.executemany( + r'''INSERT INTO books (id, xml) VALUES (%s, %s)''', + [ + (1, '''\n''' + ''' XQuery Kick Start\n''' + ''' James McGovern\n''' + ''' Per Bothner\n''' + ''' Kurt Cagle\n''' + ''' James Linn\n''' + ''' Vaidyanathan Nagarajan\n''' + ''' 2003\n''' + ''' 49.99\n''' + '''\n'''), + ], + ) + cur.execute( + '''SELECT xml, ''' + ''' render_xml(xml, @json_template), ''' + ''' render_json(render_xml(xml, @json_template), @xml_template), ''' + ''' render_json(render_xml(xml, @json_template), @yaml_template), ''' + ''' render_yaml(render_json(render_xml(xml, @json_template), @yaml_template), @xml_template) ''' + '''FROM books''', + ) + for row in cur: + print('# XML input') + print(row[0]) + + print('# JSON output') + print(json.dumps(json.loads(row[1]), indent=2), end='\n\n') + + print('# XML output') + print(row[2]) + + print('# YAML output') + print(row[3]) + + print('# XML => JSON => YAML => XML') + print(row[4]) \ No newline at end of file