-
Notifications
You must be signed in to change notification settings - Fork 26
/
wllama.cpp
142 lines (132 loc) · 3.29 KB
/
wllama.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#include <iostream>
#include <vector>
#include <string>
#include <sstream>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <malloc.h>
#include <emscripten/emscripten.h>
#include "llama.h"
#include "json.hpp"
#include "common.h"
#include "actions.hpp"
#define WLLAMA_ACTION(name) \
if (action == #name) \
{ \
res = action_##name(app, body_json); \
}
static void llama_log_callback_logTee(ggml_log_level level, const char *text, void *user_data)
{
(void)user_data;
const char *lvl = "@@DEBUG";
size_t len = strlen(text);
if (len == 0 || text[len - 1] != '\n') {
// do not print if the line does not terminate with \n
return;
}
if (level == GGML_LOG_LEVEL_ERROR) {
lvl = "@@ERROR";
} else if (level == GGML_LOG_LEVEL_WARN) {
lvl = "@@WARN";
} else if (level == GGML_LOG_LEVEL_INFO) {
lvl = "@@INFO";
}
fprintf(stderr, "%s@@%s", lvl, text);
}
static std::string result;
static app_t app;
extern "C" const char *wllama_start()
{
try
{
llama_backend_init();
// std::cerr << llama_print_system_info() << "\n";
llama_log_set(llama_log_callback_logTee, nullptr);
return "{\"success\":true}";
}
catch (std::exception &e)
{
json ex{{"__exception", std::string(e.what())}};
result = std::string(ex.dump());
return result.c_str();
}
}
extern "C" const char *wllama_action(const char *name, const char *body)
{
try
{
json res;
std::string body_str(body);
json body_json = json::parse(body_str);
std::string action(name);
WLLAMA_ACTION(load);
WLLAMA_ACTION(set_options);
WLLAMA_ACTION(sampling_init);
WLLAMA_ACTION(sampling_sample);
WLLAMA_ACTION(sampling_accept);
WLLAMA_ACTION(get_vocab);
WLLAMA_ACTION(lookup_token);
WLLAMA_ACTION(tokenize);
WLLAMA_ACTION(detokenize);
WLLAMA_ACTION(decode);
WLLAMA_ACTION(encode);
WLLAMA_ACTION(get_logits);
WLLAMA_ACTION(embeddings);
WLLAMA_ACTION(chat_format);
WLLAMA_ACTION(kv_remove);
WLLAMA_ACTION(kv_clear);
WLLAMA_ACTION(current_status);
WLLAMA_ACTION(session_save);
WLLAMA_ACTION(session_load);
result = std::string(res.dump());
return result.c_str();
}
catch (std::exception &e)
{
json ex{{"__exception", std::string(e.what())}};
result = std::string(ex.dump());
return result.c_str();
}
}
extern "C" const char *wllama_exit()
{
try
{
free_all(app);
llama_backend_free();
return "{\"success\":true}";
}
catch (std::exception &e)
{
json ex{{"__exception", std::string(e.what())}};
result = std::string(ex.dump());
return result.c_str();
}
}
extern "C" const char *wllama_debug()
{
auto get_mem_total = [&]()
{
return EM_ASM_INT(return HEAP8.length);
};
auto get_mem_free = [&]()
{
auto i = mallinfo();
unsigned int total_mem = get_mem_total();
unsigned int dynamic_top = (unsigned int)sbrk(0);
return total_mem - dynamic_top + i.fordblks;
};
json res = json{
{"mem_total_MB", get_mem_total() / 1024 / 1024},
{"mem_free_MB", get_mem_free() / 1024 / 1024},
{"mem_used_MB", (get_mem_total() - get_mem_free()) / 1024 / 1024},
};
result = std::string(res.dump());
return result.c_str();
}
int main()
{
std::cerr << "Unused\n";
return 0;
}