Skip to content

Commit

Permalink
RONDB-858: Use go avro lib in RDRS2 as cpp lib is slow
Browse files Browse the repository at this point in the history
  • Loading branch information
smkniazi committed Feb 24, 2025
1 parent 8921a26 commit ce4d322
Show file tree
Hide file tree
Showing 24 changed files with 601 additions and 538 deletions.
31 changes: 0 additions & 31 deletions Dockerfile.oraclelinux8
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@ ARG CMAKE_VERSION=3.23.2
ARG OPEN_SSL_VERSION=3.0.14
ARG GO_VERSION=1.22.9
ARG JSONCPP_VERSION=1.9.5
ARG BOOST_VERSION_MAJOR=1
ARG BOOST_VERSION_MINOR=86
ARG BOOST_VERSION_PATCH=0
ARG AVRO_VERSION=1.12.0
ARG PROMETHEUS_CPP_VERSION=1.3.0

# Default build threads to 1; max is defined in Docker config (run `nproc` in Docker container)
Expand Down Expand Up @@ -144,33 +140,6 @@ EOF
RUN --mount=type=cache,target=/var/cache/yum,id=oracle8-yum \
yum install bzip2 bzip2-libs

# Install latest boost
ENV BOOST_VERSION=${BOOST_VERSION_MAJOR}.${BOOST_VERSION_MINOR}.${BOOST_VERSION_PATCH}
ENV BOOST_V_UNDERSCORE=${BOOST_VERSION_MAJOR}_${BOOST_VERSION_MINOR}_${BOOST_VERSION_PATCH}
RUN --mount=type=cache,target=$DOWNLOADS_CACHE_DIR \
cd $DOWNLOADS_CACHE_DIR && \
rm -rf boost_* && \
wget https://archives.boost.io/release/$BOOST_VERSION/source/boost_$BOOST_V_UNDERSCORE.tar.bz2 && \
tar --bzip2 -xf boost_$BOOST_V_UNDERSCORE.tar.bz2 && \
source scl_source enable gcc-toolset-12 && \
cd boost_$BOOST_V_UNDERSCORE && \
./bootstrap.sh &&\
./b2 -j$THREADS_ARG && \
./b2 install && \
rm -rf boost_*

# Install avro (only static library)
RUN --mount=type=cache,target=$DOWNLOADS_CACHE_DIR \
source scl_source enable gcc-toolset-12 && \
cd $DOWNLOADS_CACHE_DIR && \
rm -rf release-* avro-* && \
wget -N --progress=bar:force -P $DOWNLOADS_CACHE_DIR https://github.com/apache/avro/archive/refs/tags/release-$AVRO_VERSION.tar.gz && \
tar xf release-$AVRO_VERSION.tar.gz && \
cd avro-release-$AVRO_VERSION/lang/c++ && \
./build.sh install && \
cd $DOWNLOADS_CACHE_DIR && \
rm -rf release-* avro-*

# Install updated jsoncpp
RUN --mount=type=cache,target=$DOWNLOADS_CACHE_DIR \
wget -N --progress=bar:force -P $DOWNLOADS_CACHE_DIR \
Expand Down
15 changes: 1 addition & 14 deletions Dockerfile.oraclelinux9
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ ARG CMAKE_VERSION=3.23.2
ARG OPEN_SSL_VERSION=3.0.14
ARG GO_VERSION=1.22.9
ARG JSONCPP_VERSION=1.9.5
ARG AVRO_VERSION=1.12.0
ARG PROMETHEUS_CPP_VERSION=1.3.0

# Default build threads to 1; max is defined in Docker config (run `nproc` in Docker container)
Expand Down Expand Up @@ -52,7 +51,7 @@ RUN --mount=type=cache,target=/var/cache/yum,id=oracle9-yum \
numactl numactl-libs numactl-devel \
uuid-devel libudev-devel doxygen \
patchelf ncurses-devel java-1.8.0-openjdk-devel automake \
vim pigz hostname libtirpc-devel boost-devel \
vim pigz hostname libtirpc-devel \
libcurl-devel

# Building RonDB 24.10 requires bison at least version 3.8. Oracle linux 9 ships
Expand Down Expand Up @@ -140,18 +139,6 @@ EOF
# RDRS2
#

# Install avro
RUN --mount=type=cache,target=$DOWNLOADS_CACHE_DIR \
source scl_source enable gcc-toolset-12 && \
cd $DOWNLOADS_CACHE_DIR && \
rm -rf release-* avro-* && \
wget -N --progress=bar:force -P $DOWNLOADS_CACHE_DIR https://github.com/apache/avro/archive/refs/tags/release-$AVRO_VERSION.tar.gz && \
tar xf release-$AVRO_VERSION.tar.gz && \
cd avro-release-$AVRO_VERSION/lang/c++ && \
./build.sh install && \
cd $DOWNLOADS_CACHE_DIR && \
rm -rf release-* avro-*

# Install updated jsoncpp
RUN --mount=type=cache,target=$DOWNLOADS_CACHE_DIR \
wget -N --progress=bar:force -P $DOWNLOADS_CACHE_DIR \
Expand Down
14 changes: 1 addition & 13 deletions Dockerfile.ubuntu22
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ ARG CMAKE_VERSION=3.25.1
ARG OPEN_SSL_VERSION=3.0.14
ARG GO_VERSION=1.22.9
ARG JSONCPP_VERSION=1.9.5
ARG AVRO_VERSION=1.12.0
ARG PROMETHEUS_CPP_VERSION=1.3.0

# Default build threads to 1; max is defined in Docker config (run `nproc` in Docker container)
Expand All @@ -26,7 +25,7 @@ RUN --mount=type=cache,target=/var/cache/apt,id=ubuntu22-apt \
apt-get update && apt-get -y install wget pkg-config patchelf \
libncurses5-dev default-jdk libudev-dev bison flex autoconf pigz \
openssh-client maven libsasl2-dev libldap-dev libaio-dev \
git vim nano protobuf-compiler graphviz libboost-all-dev \
git vim nano protobuf-compiler graphviz \
libcurl4-openssl-dev

# RonDB 22.10 requires gcc/g++ 10
Expand Down Expand Up @@ -115,17 +114,6 @@ EOF
# RDRS2
#

# Install avro
RUN --mount=type=cache,target=$DOWNLOADS_CACHE_DIR \
cd $DOWNLOADS_CACHE_DIR && \
rm -rf release-* avro-* && \
wget -N --progress=bar:force -P $DOWNLOADS_CACHE_DIR https://github.com/apache/avro/archive/refs/tags/release-$AVRO_VERSION.tar.gz && \
tar xf release-$AVRO_VERSION.tar.gz && \
cd avro-release-$AVRO_VERSION/lang/c++ && \
./build.sh install && \
cd $DOWNLOADS_CACHE_DIR && \
rm -rf release-* avro-*

# Install updated jsoncpp
RUN --mount=type=cache,target=$DOWNLOADS_CACHE_DIR \
wget -N --progress=bar:force -P $DOWNLOADS_CACHE_DIR \
Expand Down
1 change: 1 addition & 0 deletions storage/ndb/rest-server2/extra/drogon/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ FILE(WRITE "${RDRS_DROGON_BINARY_DIR}/builder.sh" "
cmake \
-S${RDRS_DROGON_SOURCE_DIR} \
-B${RDRS_DROGON_BINARY_DIR} \
-DBUILD_YAML_CONFIG=OFF \
-DCMAKE_INSTALL_PREFIX=${RDRS_DROGON_INSTALL_DIR} \
-DRDRS_OPENSSL_LIBRARY=${RDRS_OPENSSL_LIBRARY} \
-DRDRS_CRYPTO_LIBRARY=${RDRS_CRYPTO_LIBRARY} \
Expand Down
1 change: 1 addition & 0 deletions storage/ndb/rest-server2/server/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ set (CMAKE_CXX_STANDARD_REQUIRED ON)

add_subdirectory(src)
add_subdirectory(test)
add_subdirectory(avro)
36 changes: 36 additions & 0 deletions storage/ndb/rest-server2/server/avro/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
project(libavro)

set(SHARED_LIB "${CMAKE_BINARY_DIR}/library_output_directory/libavro.a")

# Build the Go library


#COMMAND go build -ldflags=-extldflags=-Wl,-rpath,"${CMAKE_CURRENT_BINARY_DIR}/lib" -o ${SHARED_LIB} -buildmode=c-shared avro.go
add_custom_target(LIBAVRO_BUILD ALL
COMMAND ${CMAKE_COMMAND} -E env CGO_LDFLAGS="-O3" go build -o ${SHARED_LIB} -buildmode=c-archive avro.go avro_parser.go
COMMAND mkdir -p ${CMAKE_CURRENT_BINARY_DIR}/include
COMMAND mv ${CMAKE_BINARY_DIR}/library_output_directory/libavro.h ${CMAKE_CURRENT_BINARY_DIR}/include/avro.h
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMENT "Building libavro (Go CGO shared library)"
)

# Ensure the headers and libraries are available
add_library(libavro SHARED IMPORTED)
set_target_properties(libavro PROPERTIES
IMPORTED_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/${SHARED_LIB}"
INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_BINARY_DIR}/include"
)

# Ensure the library is built before the main project uses it
add_dependencies(libavro LIBAVRO_BUILD)

# Set PATHS
SET(RDRS_LIBAVRO_LIB_DIR "${CMAKE_BINARY_DIR}/library_output_directory" CACHE INTERNAL "RDRS libavro dir" FORCE)
SET(RDRS_LIBAVRO_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/include" CACHE INTERNAL "RDRS libavro include dir" FORCE)

# Install step
INSTALL(FILES "${SHARED_LIB}" DESTINATION lib
PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE
GROUP_READ GROUP_EXECUTE
WORLD_READ WORLD_EXECUTE
)
117 changes: 117 additions & 0 deletions storage/ndb/rest-server2/server/avro/avro.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
/*
* Copyright (c) 2023, 2025, Hopsworks and/or its affiliates.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
* USA.
*/

package main

/*
#include <string.h>
#include <stdint.h>
*/
import "C"
import (
"fmt"
"os"
"reflect"
"sync/atomic"
"unsafe"

"github.com/bytedance/sonic"
"github.com/hamba/avro/v2"
)

var curSchemaID atomic.Int64

type ComplexFeature struct {
schemaStr string
AvroSchema *avro.Schema
AvroStruct *reflect.Type
}

var avroStructs = make(map[int64]*ComplexFeature)

//export register_schema
func register_schema(schema string) C.int64_t {

avroSchema, err := avro.Parse(string(schema))
if err != nil {
//fmt.Fprintf(os.Stderr, "Failed to parse avro schemd %s. Error: %v\n", schema, err)
return -1
}

avroStruct, err := ConvertAvroSchemaToStruct(avroSchema)
if err != nil {
//fmt.Fprintf(os.Stderr, "Failed to generate strcut for avro schemd %s. Error: %v\n", schema, err)
return -1
}
id := curSchemaID.Add(1)
avroStructs[id] = &ComplexFeature{schemaStr: schema, AvroSchema: &avroSchema, AvroStruct: &avroStruct}

//fmt.Printf("Go lang. Registered schema: %s. ID: %d\n", schema, id)
return C.int64_t(id)
}

//export unregister_schema
func unregister_schema(schema_id C.int64_t) {
delete(avroStructs, int64(schema_id))
//fmt.Printf("Go lang. Deleted schema ID: %d\n", schema_id)
}

//export unmarshal_avro
func unmarshal_avro(schema_id C.int64_t, data []byte, outStr **C.char, outLen *C.int32_t) C.int64_t {
//fmt.Printf("Go lang. Unmarshal: Schema ID: %d\n", schema_id)

// var avroDeserialized interface{}
cf, ok := avroStructs[int64(schema_id)]
if !ok {
//fmt.Fprintf(os.Stderr, "Failed to unmarshall avro data. Schema ID: %d not found \n", schema_id)
return C.int64_t(-1)
}

avroDeserialized := reflect.New(*cf.AvroStruct).Interface()
err := avro.Unmarshal(*cf.AvroSchema, data, &avroDeserialized)
if err != nil {
//fmt.Fprintf(os.Stderr, "Failed to unmarshall avro data. Schema ID: %d. Error: %v\n", schema_id, err)
return C.int64_t(-1)
}

// dicsard the top most wrapper
j := reflect.ValueOf(avroDeserialized).Elem().Field(0).Interface()

bytes, err := sonic.Marshal(j)
if err != nil {
//fmt.Fprintf(os.Stderr, "Failed to unmarshall avro data. Schema ID: %d. Error: %v\n", schema_id, err)
return C.int64_t(-1)
}

heapStr := C.malloc(C.size_t(len(bytes)))
if heapStr == nil {
fmt.Fprintln(os.Stderr, "Failed to allocate memory\n")
return C.int64_t(-1)
}

// Copy Go slice to C heap memory
C.memcpy(heapStr, unsafe.Pointer(&bytes[0]), C.size_t(len(bytes)))

*outStr = (*C.char)(heapStr)
*outLen = C.int32_t(len(bytes))

return C.int64_t(0)
}

func main() {}
Loading

0 comments on commit ce4d322

Please sign in to comment.