Skip to content

Commit

Permalink
Fixed variations. Made the program store encountered positions
Browse files Browse the repository at this point in the history
  • Loading branch information
datawater committed Jul 7, 2024
1 parent 13e6c4d commit 08877e3
Show file tree
Hide file tree
Showing 11 changed files with 154 additions and 48 deletions.
3 changes: 1 addition & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
target/
data/*.zip
data/twic*.pgn
data/*.big.pgn
mkramdisk
*.data
*.perf
Expand Down
12 changes: 6 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 2 additions & 6 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,9 @@ license = "GPL-3.0"
[dependencies]
lexopt = "0.3.0"
memmap2 = "0.9.4"
libcmbr = { path = "./libcmbr" }
libcmbr = { path = "./libcmbr", features = ["bitcode"] }
cfg-if = "1.0.0"
bitcode = { version = "0.6.0", features = ["serde"], optional = true }

[features]
default = ["bitcode"]
bitcode = ["dep:bitcode"]
bitcode = { version = "0.6.0", features = ["serde"]}

[target.'cfg(target_os = "windows")'.dependencies]
windows = { version = "0.58.0", features = ["Win32", "Win32_System", "Win32_System_SystemInformation"] }
Expand Down
6 changes: 3 additions & 3 deletions NOTICE.html
Original file line number Diff line number Diff line change
Expand Up @@ -283,9 +283,9 @@ <h4>Used by:</h4>
<li><a href=" https://github.com/rust-lang/libc ">libc 0.2.155</a></li>
<li><a href=" https://github.com/dtolnay/proc-macro2 ">proc-macro2 1.0.86</a></li>
<li><a href=" https://github.com/dtolnay/quote ">quote 1.0.36</a></li>
<li><a href=" https://github.com/serde-rs/serde ">serde 1.0.203</a></li>
<li><a href=" https://github.com/serde-rs/serde ">serde_derive 1.0.203</a></li>
<li><a href=" https://github.com/dtolnay/syn ">syn 2.0.68</a></li>
<li><a href=" https://github.com/serde-rs/serde ">serde 1.0.204</a></li>
<li><a href=" https://github.com/serde-rs/serde ">serde_derive 1.0.204</a></li>
<li><a href=" https://github.com/dtolnay/syn ">syn 2.0.69</a></li>
<li><a href=" https://github.com/dtolnay/unicode-ident ">unicode-ident 1.0.12</a></li>
</ul>
<pre class="license-text"> Apache License
Expand Down
2 changes: 1 addition & 1 deletion data/with_varation_and_comments.pgn
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[E "E"]
[S "S"]

1. e4 (1. d4) (1. c4) 1... e5 2. Nf3 (2. Nc3 d5 (2... d6)) (2. d3) { Comment } *
1. e4 (1. d4) (1. c4) 1... e5 2. Nf3 (2. Nc3 d5 (2... d6)) (2. d3) 2... Nc6 3. Bc4 Bc5 4. O-O *

[E "E"]
[S "S"]
Expand Down
Empty file added libcmbr/NOTICE.html
Empty file.
124 changes: 115 additions & 9 deletions libcmbr/src/cmbr/pgntocmbr.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,24 @@
use super::{CmbrFile, SanToCmbrMvConvertor};
use crate::cmbr::CmbrGame;
use crate::cmbr::CmbrVariation;
use crate::pgn::{PgnGame, PgnToken};
use crate::{cmbr::CmbrGame, error::LibCmbrError};
use pgn_lexer::parser::Token;

use shakmaty::Chess;
use std::str::from_utf8_unchecked;

// use lz4_flex::{compress_prepend_size, decompress_size_prepended};
use phf::phf_map;

use shakmaty::fen::Fen;
use shakmaty::zobrist::{Zobrist32, ZobristHash};
use shakmaty::{CastlingSide, Chess, Color, Position};

use std::collections::HashMap;
use std::error::Error;
use std::io::Write;
use std::str::from_utf8_unchecked;

macro_rules! move_to_halfmove {
($move:expr, $is_black:expr) => {
(2 * $move) - ($is_black == false) as u16
(2 * $move) - 1 - ($is_black == false) as u16
};
}

Expand All @@ -24,32 +31,97 @@ static MOVE_ANNOTATION_TO_NAG: phf::Map<&[u8], u8> = phf_map! {
b"?!" => 6,
};

static RESULT_TO_CHAR: phf::Map<&[u8], char> = phf_map! {
b"*" => 'u',
b"1-0" => 'w',
b"0-1" => 'b',
b"1/2-1/2" => 'd',
};

fn get_fen_from_board(board: &Chess) -> String {
let mut fen = board.board().board_fen(board.promoted()).to_string();
fen.push_str(if board.turn() == Color::White {
" w "
} else {
" b "
});

let castles = board.castles();

if castles.has(Color::White, CastlingSide::KingSide) {
fen.push('K')
};
if castles.has(Color::White, CastlingSide::QueenSide) {
fen.push('Q')
};
if castles.has(Color::Black, CastlingSide::KingSide) {
fen.push('k')
};
if castles.has(Color::Black, CastlingSide::QueenSide) {
fen.push('q')
};

let ep_square = board.ep_square(shakmaty::EnPassantMode::Legal);
if ep_square.is_some() {
fen.push(' ');
// SAFE: Safe
fen.push_str(&unsafe { ep_square.unwrap_unchecked() }.to_string());
}

return fen;
}

impl CmbrFile {
// TODO(#22): Write tests for CmbrFile::from_ast
// TODO: Split the Headers, Moves and the encountered positions into seperate files and compress them seperatly
// TODO: Reduce the memory footprint of the program
// Currently the program uses 33x amount of memory as the input file.
pub fn from_ast(
ast: Vec<PgnGame>,
convertor: &mut SanToCmbrMvConvertor,
is_compressed: bool,
) -> Result<Self, LibCmbrError> {
) -> Result<Self, Box<dyn Error>> {
debug_assert!(is_compressed == false);

let mut file = CmbrFile::new(is_compressed);
let mut board = Chess::new();

(0..ast.len()).into_iter().for_each(|game_i| {
let fen = get_fen_from_board(&board);

let _ = file.encountered_positions.try_insert(
board
.zobrist_hash::<Zobrist32>(shakmaty::EnPassantMode::Legal)
.0,
fen,
);

let len = ast.len();

(0..len).into_iter().for_each(|game_i| {
if game_i % 1000 == 0 || game_i == len {
print!("{}\r", game_i as f64 / len as f64 * 100.0);
let _ = std::io::stdout().flush();
}

file.games.insert(game_i as u32, CmbrGame::new());

// SAFE: Safe
let cmbr_game = unsafe { file.games.get_mut(&(game_i as u32)).unwrap_unchecked() };
let game = &ast[game_i];

// TODO: Support fen headers in libcmbr
board = Chess::new();

let _ = cmbr_game.encountered_positions.try_insert(0, board
.zobrist_hash::<Zobrist32>(shakmaty::EnPassantMode::Legal)
.0);

{
let mut current_key: &[u8] = &[];

for header in &game.0 .0 {
match header {
Token::Result(r) => cmbr_game.result = r[r.len() - 1] as char,
Token::Result(r) => cmbr_game.result = RESULT_TO_CHAR[r],
Token::TagSymbol(k) => current_key = k,

// SAFE: Safe
Expand All @@ -68,6 +140,9 @@ impl CmbrFile {
let variations = &game.0 .1;
let variations_iter = variations.iter();

let mut variation_pointers: HashMap<u16, u16> = HashMap::with_capacity(1);
variation_pointers.insert(0, 0);

for (id, variation) in variations_iter {
if variation.0.len() == 0 {
eprintln!("[WARN] Empty variation on game N{game_i}. Skipping game");
Expand All @@ -82,6 +157,23 @@ impl CmbrFile {
0
};

let variation_pointer = *variation_pointers.get(id).unwrap() as u32;
let positions_pointer = (variation_pointer << 16) | start_at as u32;

let zobrist_hash = cmbr_game.encountered_positions.get(&positions_pointer);
if zobrist_hash.is_none() {
eprintln!("[WARN] Skipping game: {game_i}");
break;
}

// SAFE: Safe
let zobrist_hash = unsafe { zobrist_hash.unwrap_unchecked() };
let fen = file.encountered_positions.get(zobrist_hash).unwrap();
// SAFE: Safe
let fen: Fen = fen.parse().unwrap();

board = fen.into_position(shakmaty::CastlingMode::Standard).unwrap();

let cmbr_variation = CmbrVariation::new(start_at);
cmbr_game.variations.insert(*id, cmbr_variation);

Expand All @@ -95,9 +187,12 @@ impl CmbrFile {
.moves
.push((((*p as u32) << 8) | 0b10000000).into());

variation_pointers.insert(*p, *id);

continue;
}


if let PgnToken::Token(t) = token {
match t {
Token::NAG(n) => {
Expand All @@ -119,15 +214,26 @@ impl CmbrFile {

if cmbrmv.is_err() {
// TODO(#24): Skip game instead of not finishing convertion if invalid san occurs
eprintln!("[WARN] Not finishing convertion of N{game_i} due to invalid san. Error: {}", cmbrmv.err().unwrap());
eprintln!("[WARN] Not finishing convertion of N{game_i} due to invalid san. SAN: {} | Fen: {}",
std::str::from_utf8(m).unwrap(),
get_fen_from_board(&board));
skip_game = true;
break;
}

// SAFE: Safe
let cmbrmv = unsafe { cmbrmv.unwrap_unchecked() };
cmbr_variation.moves.push(cmbrmv);

let hash = board
.zobrist_hash::<Zobrist32>(shakmaty::EnPassantMode::Legal)
.0;

let fen = get_fen_from_board(&board);
let _ = file.encountered_positions.try_insert(hash, fen);

current_move_number += 1;
let _ = cmbr_game.encountered_positions.insert(((*id as u32) << 16) | current_move_number as u32, hash);
}

Token::MoveAnnotation(an) => cmbr_variation.moves.push(
Expand Down
21 changes: 15 additions & 6 deletions libcmbr/src/cmbr/structs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@ use super::u24;
use crate::utils::def_enum;
use litemap::LiteMap;

pub type CmbrMv = u24;

def_enum! (
#[doc = "An enum donating the flags that a CMBR-MV Can have"]
pub CmbrMvFlags => u8 {
Expand Down Expand Up @@ -44,20 +42,30 @@ def_enum! (
BlackLongCaslte => 0b1111,
});

/// CMBR Move representation
pub type CmbrMv = u24;
/// Calculated by `(VariationId << 16) | HalfMoveNumber`
pub type MoveId = u32;
pub type CmbrFen = String;

/// A Struct denoting the structure of a CMBR file.
#[cfg_attr(feature = "bitcode", derive(serde::Serialize, serde::Deserialize))]
#[derive(Debug, PartialEq, Eq, Clone)]
#[repr(C, align(1))]
pub struct CmbrFile {
// CMBR!
/// Header: `CMBR!`
magic_bytes: &'static str,
pub is_compressed: bool,
// Game Id
/// Game Id
pub games: HashMap<u32, CmbrGame>,
/// Positions stored as FEN
pub encountered_positions: HashMap<u32, CmbrFen>,
}

/// A Struct denoting the structure of a game represented in CMBR
#[cfg_attr(feature = "bitcode", derive(serde::Serialize, serde::Deserialize))]
#[derive(Debug, PartialEq, Eq, Clone)]
#[repr(C, align(1))]
pub struct CmbrGame {
pub headers: LiteMap<String, String>,
/// Possible values: 'w', 'b', 'd', 'u'.
Expand All @@ -68,7 +76,7 @@ pub struct CmbrGame {
pub result: char,
/// Variation pointer (main variation is 0)
pub variations: LiteMap<u16, CmbrVariation>,
pub crc64: u64,
pub encountered_positions: HashMap<u32, u32>,
}

/// A Struct denoting the structure of a variation represented in CMBR
Expand All @@ -91,6 +99,7 @@ impl CmbrFile {
magic_bytes: "CMBR!",
is_compressed,
games: HashMap::with_capacity(16),
encountered_positions: HashMap::with_capacity(1024),
};
}
}
Expand All @@ -101,7 +110,7 @@ impl CmbrGame {
headers: LiteMap::with_capacity(7),
variations: LiteMap::with_capacity(1),
result: 'u',
crc64: 0,
encountered_positions: HashMap::with_capacity(79),
};
}
}
Expand Down
4 changes: 2 additions & 2 deletions libcmbr/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
#![allow(non_upper_case_globals)]
#![feature(test)]
#![feature(test, map_try_insert)]

use cfg_if::cfg_if;

// TODO(#27): Add multithreading support
// TODO(#26): Experiment with different allocators
// Since our program is memory-usage intensive, different allocators may provide performance speedups and use less memory

// NOTE: With TCMAlloc the program is just slightly faster (by like 400ns/iter
// NOTE: With TCMAlloc the program is just slightly faster (by like 400ns/iter)
cfg_if! {
if #[cfg(all(not(target_env = "msvc"), feature = "tcmalloc"))] {
use tcmalloc::TCMalloc;
Expand Down
10 changes: 3 additions & 7 deletions src/eval_args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,9 @@ pub fn eval_args(cli: &Cli) {
let cmbr_file =
CmbrFile::from_ast(ast, &mut convertor, args.enable_compression).unwrap();

cfg_if::cfg_if! {
if #[cfg(feature = "bitcode")] {
let mut f = File::create(&args.output).unwrap();
let serialized = bitcode::serialize(&cmbr_file).unwrap();
f.write(&serialized[..]).unwrap();
}
};
let mut f = File::create(&args.output).unwrap();
let serialized = bitcode::serialize(&cmbr_file).unwrap();
f.write(&serialized[..]).unwrap();
}

crate::CommandE::License => {
Expand Down
Loading

0 comments on commit 08877e3

Please sign in to comment.