Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/introspection #774

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion build/embed-lualibs
Original file line number Diff line number Diff line change
Expand Up @@ -37,22 +37,26 @@ libs=`find src/lua -type f -name '*.lua'`

typeset -A emptyarray=()
extarray=()
scenarios=()
c=0
[ -r luac45 ] || {
curl -Ls -o luac54 https://github.com/dyne/luabinaries/releases/latest/download/luac54
chmod +x luac54
}
for i in ${(f)libs}; do
p=`basename $i`
# n = the filename without path nor extension, i.e: zencode_ecdh
n=${p[(ws:.:)1]}
f="lualib_${n}.c"
print "+ $i $opts"
tmp=`mktemp -d`
# bytecompile lua in case requested by opts
if [[ "$opts" = "compile" ]]; then
./luac54 -o ${tmp}/${n} $i
else
cp $i ${tmp}/${n}
fi
# generate C embedding of lua sources in hex
pushd $tmp
print >> ${dst}
print "// $i" >> ${dst}
Expand All @@ -65,16 +69,21 @@ for i in ${(f)libs}; do
print >> ${dst}
popd
rm -rf $tmp
# save the extension for later outside this loop, to generate a
# function declaration othat returns its contents
ext="{\"${n}\", &${n}_len, (const char *)${n}},"
extarray+=($ext)
emptyarray+=($ext "{\"${n}\", &fakelen, \"/$p\"},")
# save the scenario name of the extension in case it is zencode
[[ "${n[(ws:_:)1]}" == "zencode" ]] && scenarios+="${n[(ws:_:)2]}"

c=$(( c + 1 ))
done

cat <<EOF >> ${dst}
#endif // __EMSCRIPTEN__

zen_extension_t zen_extensions[] = {
const zen_extension_t zen_extensions[] = {
EOF
for i in $extarray; do
cat <<EOF >> ${dst}
Expand All @@ -89,3 +98,14 @@ cat <<EOF >> ${dst}
{ NULL, NULL, NULL }
};
EOF

# save the list of zencode scenarios
cat <<EOF >> ${dst}
const char* const zen_scenarios[] = {
EOF
for i in $scenarios; do
print -n "\"$i\", " >> ${dst}
done
cat <<EOF >> ${dst}
NULL};
EOF
1 change: 1 addition & 0 deletions src/lua/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ TIME = require'time'
INSPECT = require('inspect')
QSORT = require('qsort_op') -- optimized table sort
table.sort = QSORT -- override native table sort
SPELL = require('spell')
JSON = require('zenroom_json')
ECDH = require('zenroom_ecdh')
-- ECDH public keys cannot function as ECP because of IANA 7303
Expand Down
105 changes: 105 additions & 0 deletions src/lua/spell.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
-- Spelling Corrector.
--
-- Copyright 2014 Francisco Zamora-Martinez
-- Copyright 2024 Jaromil (Dyne.org)
-- Adaptation of Peter Norvig python Spelling Corrector:
-- http://norvig.com/spell-correct.html
-- Open source code under MIT license: http://www.opensource.org/licenses/mit-license.php

local yield,wrap = coroutine.yield,coroutine.wrap
local alphabet_str,alphabet = 'abcdefghijklmnopqrstuvwxyz',{}
for a in alphabet_str:gmatch(".") do alphabet[#alphabet+1] = a end
spell = {}

local function list(w) return pairs{[w]=true} end

function spell:max(...)
local arg,max,hyp = table.pack(...),0,nil
for w in table.unpack(arg) do
local p = self.model[w] or 1
if p>max or ( p==max and hyp<w ) then hyp,max=w,p end
end
return hyp
end

-- local function words(text) return text:lower():gmatch("[a-z]+") end

-- local function train(features)
-- for f in features do model[f] = (model[f] or 1) + 1 end
-- end

-- local function init(filename) train(words(io.open(filename):read("*a"))) end

local function make_yield()
local set = {}
return function(w)
if not set[w] then
set[w] = true
yield(w)
end
end
end

local function edits1(word_str, yield)
local yield = yield or make_yield()
return wrap(function()
local splits, word = {}, {}
for i=1,#word_str do
word[i],splits[i] = word_str:sub(i,i),{word_str:sub(1,i),word_str:sub(i)}
end
-- sentinels
splits[0], splits[#word_str+1] = { "", word_str }, { word_str, ""}
-- deletes
for i=1,#word_str do yield( splits[i-1][1]..splits[i+1][2] ) end
-- transposes
for i=1,#word_str-1 do
yield( splits[i-1][1]..word[i+1]..word[i]..splits[i+2][2] )
end
-- replaces
for i=1,#word_str do
for j=1,#alphabet do
yield( splits[i-1][1]..alphabet[j]..splits[i+1][2] )
end
end
-- inserts
for i=0,#word_str do
for j=1,#alphabet do
yield( splits[i][1]..alphabet[j]..splits[i+1][2] )
end
end
end)
end

function spell:known_edits2(w, set)
local yield,yield2 = make_yield(),make_yield()
return wrap(function()
for e1 in edits1(w) do
for e2 in edits1(e1,yield2) do
if self.model[e2] then yield( e2 ) end
end
end
end)
end

function spell:known(list,aux)
return wrap(function()
for w in list,aux do
if self.model[w] then yield(w) end
end
end)
end

function spell:correct(w)
local w = w:lower()
local result = self:max(self:known(list(w)))
or self:max(self:known(edits1(w)))
or self:max(self:known_edits2(w))
or self:max(list(w))
if result then
return result
else
return false,"No suggestion found for word: "..w
end
end

return spell
14 changes: 14 additions & 0 deletions src/zen_parse.c
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,19 @@ static int lua_strtok(lua_State* L) {
}
#endif

// list scenarios embedded at build time in lualibs_detected.c
extern const char* const zen_scenarios[];
static int lua_list_scenarios(lua_State* L) {
lua_newtable(L);
register int i;
for(i=0; zen_scenarios[i] != NULL; i++) {
lua_pushnumber(L, i + 1); // Lua arrays are 1-indexed
lua_pushstring(L, zen_scenarios[i]);
lua_settable(L, -3);
}
return 1;
}

void zen_add_parse(lua_State *L) {
// override print() and io.write()
static const struct luaL_Reg custom_parser [] =
Expand All @@ -264,6 +277,7 @@ void zen_add_parse(lua_State *L) {
{"trim", lua_trim_spaces},
{"trimq", lua_trim_quotes},
{"jsontok", lua_unserialize_json},
{"zencode_scenarios", lua_list_scenarios},
{NULL, NULL} };
lua_getglobal(L, "_G");
luaL_setfuncs(L, custom_parser, 0); // for Lua versions 5.2 or greater
Expand Down
82 changes: 82 additions & 0 deletions test/lua/introspection.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@

scenarios = I.spy( zencode_scenarios() )

print''
print( 'Introspection found '.. #scenarios..' scenarios')
print''

local before
local after
for _,v in ipairs(scenarios) do
-- before = os.time()

-- exceptions: data, zencode
if v ~= 'zencode' and v ~= 'data' then
print ('+ load: '..v)
load_scenario('zencode_'..v)
end
-- after = os.time()
-- print (' seconds: '..after-before)
end

print''
print( 'All scenarios are loaded now')
print''

-- total_statements = ( 0
-- + table_size(ZEN.when_steps)
-- + table_size(ZEN.given_steps)
-- + table_size(ZEN.then_steps)
-- + table_size(ZEN.foreach_steps) )
-- print( 'Total Zencode statements: '..total_statements)

statements = { }
for k,v in pairs(ZEN.when_steps) do table.insert(statements, k) end
for k,v in pairs(ZEN.given_steps) do table.insert(statements, k) end
for k,v in pairs(ZEN.then_steps) do table.insert(statements, k) end
for k,v in pairs(ZEN.foreach_steps) do table.insert(statements, k) end

tokens = { }
for _,v in ipairs(statements) do
local toks = strtok(trim(v):lower(), ' ')
for _,t in ipairs(toks) do
if t ~= "''" then
if tokens[t] then
tokens[t] = tokens[t] + 1
else
tokens[t] = 1
end
end
end
end

print( 'Hall of fame:')
local function sortbyval(tbl, sortFunction)
local keys = {}
for key in pairs(tbl) do
table.insert(keys, key)
end

table.sort(keys, function(a, b)
return sortFunction(tbl[a], tbl[b])
end)

return keys
end

local sorted_tokens = sortbyval(tokens, function(a, b) return a < b end)

for _,v in ipairs(sorted_tokens) do
print(tokens[v]..'\t'..v)
end

print''
print( 'Total Zencode statements: '..#statements)
print( 'Total unique word tokens: '..table_size(tokens))
print''

SPELL.model = tokens
assert( SPELL:correct('crate') == 'create' )
assert( SPELL:correct('cruute') == 'create' )
assert( SPELL:correct('frr') == 'for' )
assert( SPELL:correct('uerfy') == 'verify' )
Loading