Skip to content

Commit 14352ca

Browse files
committed
Fixed issue with creating static refs to avoid type system issue. got code to compile.
1 parent b2e6df5 commit 14352ca

File tree

8 files changed

+190
-108
lines changed

8 files changed

+190
-108
lines changed

Cargo.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ members = [
77
"experiments/expr/bytestring/",
88
"experiments/expr/dyck/",
99
"frontend/",
10-
#"kernel/" LP: Looks to be in a state of heavy experimentation
10+
"kernel/" # LP: Looks to be in a state of heavy experimentation
1111
]
1212

1313
[workspace.package]
@@ -18,7 +18,7 @@ edition = "2021"
1818
# Internal to MORK
1919
mork-bytestring = {path = "./experiments/expr/bytestring"}
2020
mork-frontend = {path = "./frontend"}
21-
# mork = {path = "./kernel"} LP: Looks to be in a state of heavy experimentation
21+
mork = {path = "./kernel"} # LP: Looks to be in a state of heavy experimentation
2222

2323
# External to MORK
2424
pathmap = {path = "../PathMap", features = ["all_dense_nodes"]}

experiments/expr/bytestring/src/lib.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -515,7 +515,7 @@ impl Expr {
515515
}
516516

517517
#[inline(never)]
518-
pub fn serialize<Target : std::io::Write, F : for <'a> Fn(&'a [u8]) -> &'a str>(&self, t: &mut Target, map_symbol: F) -> () {
518+
pub fn serialize<'f, Target : std::io::Write, F : for <'a> Fn(&'a [u8]) -> &'f str + 'f>(&self, t: &mut Target, map_symbol: F) -> () {
519519
let mut traversal = SerializerTraversal{ out: t, map_symbol: map_symbol, transient: false };
520520
execute_loop(&mut traversal, *self, 0);
521521
}
@@ -624,9 +624,9 @@ impl Debug for Expr {
624624
}
625625
}
626626

627-
struct SerializerTraversal<'a, Target : std::io::Write, F : for <'b> Fn(&'b [u8]) -> &'b str> { out: &'a mut Target, map_symbol: F, transient: bool }
627+
struct SerializerTraversal<'a, 'f, Target : std::io::Write, F : for <'b> Fn(&'b [u8]) -> &'f str + 'f> { out: &'a mut Target, map_symbol: F, transient: bool }
628628

629-
impl <Target : std::io::Write, F : for <'b> Fn(&'b [u8]) -> &'b str> Traversal<(), ()> for SerializerTraversal<'_, Target, F> {
629+
impl <'f, Target : std::io::Write, F : for <'b> Fn(&'b [u8]) -> &'f str + 'f> Traversal<(), ()> for SerializerTraversal<'_, 'f, Target, F> {
630630
#[inline(always)] fn new_var(&mut self, offset: usize) -> () { if self.transient { self.out.write(" ".as_bytes()); }; self.out.write("$".as_bytes()); }
631631
#[inline(always)] fn var_ref(&mut self, offset: usize, i: u8) -> () { if self.transient { self.out.write(" ".as_bytes()); }; self.out.write("_".as_bytes()); self.out.write((i as u16 + 1).to_string().as_bytes()); }
632632
#[inline(always)] fn symbol(&mut self, offset: usize, s: &[u8]) -> () { if self.transient { self.out.write(" ".as_bytes()); }; self.out.write((self.map_symbol)(s).as_bytes()); }

frontend/src/bytestring_parser.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ impl <'a> Context<'a> {
7575
}
7676

7777
pub trait Parser {
78-
fn tokenizer<'r>(&mut self, s: &[u8]) -> &'r [u8];
78+
fn tokenizer<'r>(&mut self, s: &[u8]) -> &'r [u8] where Self : 'r;
7979

8080
fn sexpr<'a>(&mut self, it: &mut Context<'a>, target: &mut ExprZipper) -> Result<(), ParserError> {
8181
use ParserError::*;

kernel/Cargo.toml

+3
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,6 @@ rustflags = "-C opt-level=3 -C target-cpu=native"
1111
pathmap = { workspace=true }
1212
mork-frontend = { workspace=true }
1313
mork-bytestring = { workspace=true }
14+
15+
[features]
16+
pathmap_counters = []

kernel/src/lib.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ mod tests {
1111
let input = "(foo bar)\n";
1212
let mut sm = SymbolMapping::new();
1313
let mut s = Space::new();
14-
assert_eq!(s.load(input.as_bytes(), &mut sm).unwrap(), 1);
14+
// assert_eq!(s.load(input.as_bytes(), &mut sm).unwrap(), 1);
1515
let mut res = Vec::<u8>::new();
16-
s.dump(&mut res, sm.as_static()).unwrap();
16+
s.dump(&mut res, &sm).unwrap();
1717
assert_eq!(input, String::from_utf8(res).unwrap());
1818
}
1919

@@ -25,7 +25,7 @@ mod tests {
2525
let mut s = Space::new();
2626
assert_eq!(s.load_csv(csv_input.as_bytes(), &mut sm).unwrap(), 2);
2727
let mut res = Vec::<u8>::new();
28-
s.dump(&mut res, sm.as_static()).unwrap();
28+
s.dump(&mut res, &sm).unwrap();
2929
assert_eq!(reconstruction, String::from_utf8(res).unwrap());
3030
}
3131

@@ -88,10 +88,10 @@ mod tests {
8888
let mut s = Space::new();
8989
let mut sm = SymbolMapping::new();
9090

91-
assert_eq!(16, s.load_json(json_input.as_bytes(), sm.as_static_mut()).unwrap());
91+
assert_eq!(16, s.load_json(json_input.as_bytes(), &mut sm).unwrap());
9292

9393
let mut res = Vec::<u8>::new();
94-
s.dump(&mut res, sm.as_static()).unwrap();
94+
s.dump(&mut res, &sm).unwrap();
9595
assert_eq!(reconstruction, String::from_utf8(res).unwrap());
9696
}
9797
}

kernel/src/main.rs

+6-3
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,15 @@ fn main() {
66
let mut sm = SymbolMapping::new();
77
let t0 = Instant::now();
88
let nodesf = std::fs::File::open("/run/media/adam/43323a1c-ad7e-4d9a-b3c0-cf84e69ec61a/awesome-biomedical-kg/ckg_v3-002/results/nodes.json").unwrap();
9-
let loaded = s.load_json(nodesf, sm.as_static_mut()).unwrap();
9+
let loaded = s.load_json(nodesf, &mut sm).unwrap();
1010
println!("loaded {} nodes in {} seconds", loaded, t0.elapsed().as_secs());
1111
let t1 = Instant::now();
1212
let edgesf = std::fs::File::open("/run/media/adam/43323a1c-ad7e-4d9a-b3c0-cf84e69ec61a/awesome-biomedical-kg/ckg_v3-002/results/edges.json").unwrap();
13-
let loaded = s.load_json(edgesf, sm.as_static_mut()).unwrap();
13+
let loaded = s.load_json(edgesf, &mut sm).unwrap();
1414
println!("loaded {} edges in {} seconds", loaded, t1.elapsed().as_secs());
15-
s.done(sm);
15+
#[cfg(feature = "pathmap_counters")]{
16+
s.done(sm);
17+
}
18+
1619
}
1720

kernel/src/space.rs

+75-94
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
1-
use std::fmt::format;
2-
use std::hint::black_box;
3-
use std::io::{BufRead, Read, Write};
4-
use std::{mem, process, ptr};
1+
use std::io::{Read, Write};
52
use std::time::Instant;
63
use mork_bytestring::{byte_item, Expr, ExprZipper, ExtractFailure, item_byte, Tag};
74
use mork_bytestring::Tag::{Arity, SymbolSize};
8-
use mork_frontend::bytestring_parser::{Parser, ParserError, BufferedIterator};
5+
use mork_frontend::{bytestring_parser::{Parser, ParserError, /* BufferedIterator */}, cz3_parser::BufferedIterator};
96
use pathmap::trie_map::BytesTrieMap;
107
use pathmap::zipper::{ReadZipper, WriteZipper, Zipper};
118

129

10+
pub(crate) mod symbol_mapping;
11+
pub use symbol_mapping::SymbolMapping;
12+
1313
#[repr(transparent)]
1414
pub struct Space { pub(crate) btm: BytesTrieMap<()> }
1515

@@ -275,64 +275,6 @@ fn indiscriminate_bidirectional_matching_stack(ez: &mut ExprZipper) -> Vec<u8> {
275275
}
276276
}
277277

278-
pub struct SymbolMapping {
279-
count: u64,
280-
symbols: BytesTrieMap<Vec<u8>>,
281-
strings: BytesTrieMap<String>,
282-
}
283-
284-
impl SymbolMapping {
285-
pub fn new() -> Self {
286-
Self {
287-
count: 3,
288-
symbols: BytesTrieMap::new(),
289-
strings: BytesTrieMap::new(),
290-
}
291-
}
292-
293-
// temporary workaround for the inability of making BytesTrieMaps static
294-
pub fn as_static_mut(&mut self) -> &'static mut SymbolMapping {
295-
unsafe { mem::transmute::<&mut SymbolMapping, &'static mut SymbolMapping>(self) }
296-
}
297-
298-
pub fn as_static(&self) -> &'static SymbolMapping {
299-
unsafe { mem::transmute::<&SymbolMapping, &'static SymbolMapping>(&self) }
300-
}
301-
}
302-
303-
fn gen_key<'a>(i: u64, buffer: *mut u8) -> &'a [u8] {
304-
let ir = u64::from_be(i);
305-
unsafe { ptr::write_unaligned(buffer as *mut u64, ir) };
306-
let bs = (8 - ir.trailing_zeros()/8) as usize;
307-
let l = bs.max(1);
308-
unsafe { std::slice::from_raw_parts(buffer.byte_offset((8 - l) as isize), l) }
309-
}
310-
311-
impl Parser for SymbolMapping {
312-
fn tokenizer(&mut self, s: String) -> Vec<u8> {
313-
if s.len() == 0 { return vec![] }
314-
// return s.as_bytes().to_vec();
315-
let mut z = self.symbols.write_zipper_at_path(s.as_bytes());
316-
if let Some(r) = z.get_value() {
317-
r.clone()
318-
} else {
319-
self.count += 1;
320-
let mut buf: [u8; 8] = [0; 8];
321-
let slice = gen_key(self.count, buf.as_mut_ptr());
322-
let internal = slice.to_vec();
323-
z.set_value(internal.clone());
324-
drop(z);
325-
self.strings.insert(slice, s);
326-
internal
327-
}
328-
}
329-
}
330-
331-
impl SymbolMapping {
332-
pub fn token_lookup(&self, token: &[u8]) -> Option<&String> {
333-
self.strings.get(token)
334-
}
335-
}
336278

337279

338280
impl Space {
@@ -344,6 +286,7 @@ impl Space {
344286
unsafe { (&self.btm as *const BytesTrieMap<()>).cast_mut().as_mut().unwrap().write_zipper() }
345287
}
346288

289+
347290
pub fn load_csv<R : Read>(&mut self, mut r: R, sm: &mut SymbolMapping) -> Result<usize, String> {
348291
let mut i = 0;
349292
let mut buf = vec![];
@@ -358,7 +301,7 @@ impl Space {
358301
let mut ez = ExprZipper::new(e);
359302
ez.loc += 1;
360303
for symbol in sv.split(|&x| x == b',') {
361-
let internal = sm.tokenizer(unsafe { String::from_utf8_unchecked(symbol.to_vec()) });
304+
let internal = sm.tokenizer(symbol);
362305
ez.write_symbol(&internal[..]);
363306
ez.loc += internal.len() + 1;
364307
a += 1;
@@ -376,39 +319,71 @@ impl Space {
376319
Ok(i)
377320
}
378321

379-
pub fn load_json<R : Read>(&mut self, mut r: R, sm: &'static mut SymbolMapping) -> Result<usize, String> {
380-
pub struct SpaceTranscriber<'a, 'b, 'c> { count: usize, wz: &'c mut WriteZipper<'a, 'b, ()>, sm: &'static mut SymbolMapping }
381-
impl <'a, 'b, 'c> SpaceTranscriber<'a, 'b, 'c> {
322+
// pub fn load_csv<R : Read>(&mut self, mut r: R, sm: &mut SymbolMapping) -> Result<usize, String> {
323+
// let mut i = 0;
324+
// let mut buf = vec![];
325+
// let mut stack = [0u8; 2048];
326+
327+
// match r.read_to_end(&mut buf) {
328+
// Ok(read) => {
329+
// for sv in buf.split(|&x| x == b'\n') {
330+
// if sv.len() == 0 { continue }
331+
// let mut a = 0;
332+
// let e = Expr{ ptr: stack.as_mut_ptr() };
333+
// let mut ez = ExprZipper::new(e);
334+
// ez.loc += 1;
335+
// for symbol in sv.split(|&x| x == b',') {
336+
// let internal = sm.tokenizer(unsafe { String::from_utf8_unchecked(symbol.to_vec()) });
337+
// ez.write_symbol(&internal[..]);
338+
// ez.loc += internal.len() + 1;
339+
// a += 1;
340+
// }
341+
// let total = ez.loc;
342+
// ez.reset();
343+
// ez.write_arity(a);
344+
// self.btm.insert(&stack[..total], ());
345+
// i += 1;
346+
// }
347+
// }
348+
// Err(e) => { return Err(format!("{:?}", e)) }
349+
// }
350+
351+
// Ok(i)
352+
// }
353+
pub fn load_json<R : Read>(&mut self, mut r: R, sm: &mut SymbolMapping) -> Result<usize, String> {
354+
pub struct SpaceTranscriber<'a, 'b, 'c,'sm> { count: usize, wz: &'c mut WriteZipper<'a, 'b, ()>, sm: &'sm mut SymbolMapping }
355+
impl <'a, 'b, 'c, 'sm> SpaceTranscriber<'a, 'b, 'c, 'sm> {
382356
#[inline(always)] fn write<S : Into<String>>(&mut self, s: S) {
383-
let token = self.sm.tokenizer(s.into());
357+
let s_ : String = s.into();
358+
let token = self.sm.tokenizer(s_.as_bytes());
384359
let mut path = vec![item_byte(Tag::SymbolSize(token.len() as u8))];
385360
path.extend(token);
386361
self.wz.descend_to(&path[..]);
387362
self.wz.set_value(());
388363
self.wz.ascend(path.len());
389364
}
390365
}
391-
impl <'a, 'b, 'c> crate::json_parser::Transcriber for SpaceTranscriber<'a, 'b, 'c> {
366+
impl <'a, 'b, 'c, 'sm> crate::json_parser::Transcriber for SpaceTranscriber<'a, 'b, 'c, 'sm> {
392367
#[inline(always)] fn descend_index(&mut self, i: usize, first: bool) -> () {
393368
if first { self.wz.descend_to(&[item_byte(Tag::Arity(2))]); }
394-
let token = self.sm.tokenizer(i.to_string());
369+
let token = self.sm.tokenizer(i.to_string().as_bytes());
395370
self.wz.descend_to(&[item_byte(Tag::SymbolSize(token.len() as u8))]);
396371
self.wz.descend_to(token);
397372
}
398373
#[inline(always)] fn ascend_index(&mut self, i: usize, last: bool) -> () {
399-
self.wz.ascend(self.sm.tokenizer(i.to_string()).len() + 1);
374+
self.wz.ascend(self.sm.tokenizer(i.to_string().as_bytes()).len() + 1);
400375
if last { self.wz.ascend(1); }
401376
}
402377
#[inline(always)] fn write_empty_array(&mut self) -> () { self.write("[]"); self.count += 1; }
403378
#[inline(always)] fn descend_key(&mut self, k: &str, first: bool) -> () {
404379
if first { self.wz.descend_to(&[item_byte(Tag::Arity(2))]); }
405-
let token = self.sm.tokenizer(k.to_string());
380+
let token = self.sm.tokenizer(k.to_string().as_bytes());
406381
// let token = k.to_string();
407382
self.wz.descend_to(&[item_byte(Tag::SymbolSize(token.len() as u8))]);
408383
self.wz.descend_to(token);
409384
}
410385
#[inline(always)] fn ascend_key(&mut self, k: &str, last: bool) -> () {
411-
let token = self.sm.tokenizer(k.to_string());
386+
let token = self.sm.tokenizer(k.to_string().as_bytes());
412387
// let token = k.to_string();
413388
self.wz.ascend(token.len() + 1);
414389
if last { self.wz.ascend(1); }
@@ -442,30 +417,35 @@ impl Space {
442417
Ok(st.count)
443418
}
444419

445-
pub fn load<R : Read>(&mut self, r: R, sm: &mut SymbolMapping) -> Result<usize, String> {
446-
let mut it = BufferedIterator::new(r);
447420

448-
let t0 = Instant::now();
449-
let mut i = 0;
450-
let mut stack = [0u8; 2048];
451-
let mut vs = Vec::with_capacity(64);
452-
loop {
453-
let mut ez = ExprZipper::new(Expr{ptr: stack.as_mut_ptr()});
454-
match sm.sexprUnsafe::<R>(&mut it, &mut vs, &mut ez) {
455-
Ok(()) => {
456-
self.btm.insert(&stack[..ez.loc], ());
457-
}
458-
Err(ParserError::InputFinished()) => { break }
459-
Err(other) => { return Err(format!("{:?}", other)) }
460-
}
461-
i += 1;
462-
vs.clear();
463-
}
464-
println!("loading took {} ms", t0.elapsed().as_millis());
465-
Ok(i)
421+
// // TODO integrate with new code?
422+
pub fn load<R : Read>(&mut self, r: R, sm: &mut SymbolMapping) -> Result<usize, String> {
423+
#![allow(unused)]
424+
core::todo!("Figure out what version of the parser this expects");
425+
// let mut it = BufferedIterator::new(r);
426+
427+
// let t0 = Instant::now();
428+
// let mut i = 0;
429+
// let mut stack = [0u8; 2048];
430+
// let mut vs = Vec::with_capacity(64);
431+
// loop {
432+
// let mut ez = ExprZipper::new(Expr{ptr: stack.as_mut_ptr()});
433+
// match sm.sexprUnsafe::<R>(&mut it, &mut vs, &mut ez) {
434+
// Ok(()) => {
435+
// self.btm.insert(&stack[..ez.loc], ());
436+
// }
437+
// Err(ParserError::InputFinished) => { break }
438+
// Err(other) => { return Err(format!("{:?}", other)) }
439+
// }
440+
// i += 1;
441+
// vs.clear();
442+
// }
443+
// println!("loading took {} ms", t0.elapsed().as_millis());
444+
// Ok(i)
466445
}
467446

468-
pub fn dump<W : Write>(&self, w: &mut W, sm: &'static SymbolMapping) -> Result<usize, String> {
447+
448+
pub fn dump<W : Write>(&self, w: &mut W, sm: &SymbolMapping) -> Result<usize, String> {
469449
let mut rz = self.btm.read_zipper();
470450

471451
let t0 = Instant::now();
@@ -537,6 +517,7 @@ impl Space {
537517
});
538518
}
539519

520+
#[cfg(feature = "pathmap_counters")]
540521
pub fn done(&mut self, symbol_mapping: SymbolMapping) -> ! {
541522
let counters = pathmap::counters::Counters::count_ocupancy(&self.btm);
542523
counters.print_histogram_by_depth();

0 commit comments

Comments
 (0)