Skip to content

Commit

Permalink
[custom] Add custom filter to use llama2c
Browse files Browse the repository at this point in the history
This patch adds custom filter to use llama2c.

example pipeline
```sh
$ gst-launch-1.0 filesrc location=input.txt ! text/x-raw ! tensor_converter input-dim=3584:1:1:1 ! tensor_filter framework=custom model=libnnstreamer_customfilter_llama2.so ! filesink location=output.txt
```

model.bin and tokenizer.bin is needed.

Signed-off-by: Yelin Jeong <[email protected]>
  • Loading branch information
niley7464 authored and wooksong committed Sep 9, 2024
1 parent 97993ff commit 41bc82b
Show file tree
Hide file tree
Showing 2 changed files with 187 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
/**
* NNStreamer Custom Filter Example llama2.c
* Copyright (C) 2024 Yelin Jeong <[email protected]>
*
* SPDX-License-Identifier: LGPL-2.1-only
*
* @file nnstreamer_customfilter_llama2.c
* @date 06 Sep 2024
* @brief Custom NNStreamer Filter Example llama2.c
* @author Yelin Jeong <[email protected]>
* @bug No known bugs except for NYI items
* @see https://github.com/nnsuite/llama2.c
* @todo Use property to values like temperatrue, topp
*/


#include <nnstreamer_plugin_api.h>
#include <nnstreamer_util.h>
#include <tensor_filter_custom.h>
#include <assert.h>
#include "api.h"

#define CHECKPOINT_PATH "model.bin"
#define TOKENIZER_PATH "tokenizer.bin"

#define TEMPERATURE \
1.0f // 0.0 = greedy deterministic. 1.0 = original. don't set higher
#define TOPP \
0.9f // top-p in nucleus sampling. 1.0 = off. 0.9 works well, but slower
#define RNG_SEED 0ULL // seed rng with time by default

/**
* @brief _pt_data Internal data structure
*/
typedef struct _pt_data {
Transformer *transformer;
Tokenizer *tokenizer;
Sampler *sampler;
char *prompt; // prompt string
int steps; // number of steps to run for

GstTensorsInfo info;
} pt_data;

/**
* @brief init callback of tensor_filter custom
*/
static void *
pt_init (const GstTensorFilterProperties *prop)
{
pt_data *data = (pt_data *) malloc (sizeof (pt_data));
Config *config;
UNUSED (prop);

gst_tensors_info_init (&data->info);

data->transformer = (Transformer *) malloc (sizeof (Transformer));

// build the Transformer via the model .bin file
build_transformer (data->transformer, (char *) CHECKPOINT_PATH);
config = &data->transformer->config;

if (data->steps == 0 || data->steps > config->seq_len)
data->steps = config->seq_len; // override to ~max length

data->tokenizer = (Tokenizer *) malloc (sizeof (Tokenizer));

// build the Tokenizer via the tokenizer .bin file
build_tokenizer (data->tokenizer, (char *) TOKENIZER_PATH, config->vocab_size);

data->sampler = (Sampler *) malloc (sizeof (Sampler));

// build the Sampler
build_sampler (data->sampler, config->vocab_size, TEMPERATURE, TOPP, RNG_SEED);

data->info.num_tensors = 1;
data->info.info[0].type = _NNS_UINT8;
data->info.info[0].dimension[0] = data->tokenizer->max_token_length * data->steps;

return data;
}

/**
* @brief exit callback of tensor_filter custom
*/
static void
pt_exit (void *private_data, const GstTensorFilterProperties *prop)
{
pt_data *data = private_data;
UNUSED (prop);
assert (private_data);

gst_tensors_info_free (&data->info);
free_transformer (data->transformer);
free_tokenizer (data->tokenizer);
free_sampler (data->sampler);

free (data->transformer);
free (data->tokenizer);
free (data->sampler);
free (data);
}

/**
* @brief setInputDimension callback of tensor_filter custom
*/
static int
get_inputDim (void *private_data, const GstTensorFilterProperties *prop, GstTensorsInfo *info)
{
UNUSED (private_data);
UNUSED (prop);
assert (info);

info->format = _NNS_TENSOR_FORMAT_SPARSE;
info->num_tensors = 1;

return 0;
}

/**
* @brief getOutputDimension callback of tensor_filter custom
*/
static int
get_outputDim (void *private_data, const GstTensorFilterProperties *prop, GstTensorsInfo *info)
{
pt_data *data = private_data;
UNUSED (prop);
assert (private_data);

gst_tensors_info_copy (info, &data->info);
return 0;
}

/**
* @brief invoke callback of tensor_filter custom
*/
static int
pt_invoke (void *private_data, const GstTensorFilterProperties *prop,
const GstTensorMemory *input, GstTensorMemory *output)
{
pt_data *data = private_data;
char *result, *prompt;
int len;

UNUSED (prop);
assert (private_data);
assert (input);
assert (output);

prompt = (char *) input[0].data;
len = strlen (prompt);

if (len > 1) {
prompt[strlen (prompt) - 1] = ' '; // Remove new line from text file
} else {
prompt = NULL;
}

output->size = data->tokenizer->max_token_length * data->steps;

result = get_tokens (data->transformer, data->tokenizer, data->sampler, prompt, data->steps);
memcpy (output[0].data, result, strlen(result));

free (result);
return 0;
}

static NNStreamer_custom_class NNStreamer_custom_body = {
.initfunc = pt_init,
.exitfunc = pt_exit,
.getInputDim = get_inputDim,
.getOutputDim = get_outputDim,
.invoke = pt_invoke,
};

/* The dyn-loaded object */
NNStreamer_custom_class *NNStreamer_custom = &NNStreamer_custom_body;
10 changes: 10 additions & 0 deletions tests/nnstreamer_example/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,13 @@ if tensorrt_support_is_available
install_dir: customfilter_install_dir
)
endif

if llama2c_support_is_available
library('nnstreamer_customfilter_llama2',
join_paths('custom_example_llama2',
'nnstreamer_customfilter_llama2.c'),
dependencies: [glib_dep, gst_dep, nnstreamer_dep, llama2c_support_deps],
install: get_option('install-test'),
install_dir: customfilter_install_dir
)
endif

0 comments on commit 41bc82b

Please sign in to comment.