Skip to content

Commit

Permalink
Merge branch 'neon-mmd:rolling' into 532
Browse files Browse the repository at this point in the history
  • Loading branch information
KekmaTime authored Sep 12, 2024
2 parents c6b9340 + 193b4e3 commit 85a4ba1
Show file tree
Hide file tree
Showing 10 changed files with 710 additions and 640 deletions.
1,148 changes: 593 additions & 555 deletions Cargo.lock

Large diffs are not rendered by default.

18 changes: 7 additions & 11 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "websurfx"
version = "1.17.20"
version = "1.17.22"
edition = "2021"
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
repository = "https://github.com/neon-mmd/websurfx"
Expand All @@ -27,6 +27,7 @@ tokio = { version = "1.32.0", features = [
], default-features = false }
serde = { version = "1.0.209", default-features = false, features = ["derive"] }
serde_json = { version = "1.0.122", default-features = false }
bincode = {version="1.3.3", default-features=false}
maud = { version = "0.26.0", default-features = false, features = [
"actix-web",
] }
Expand All @@ -48,24 +49,21 @@ mlua = { version = "0.9.9", features = [
redis = { version = "0.25.4", features = [
"tokio-comp",
"connection-manager",
"tcp_nodelay"
], default-features = false, optional = true }
blake3 = { version = "1.5.4", default-features = false }
error-stack = { version = "0.4.0", default-features = false, features = [
"std",
] }
async-trait = { version = "0.1.80", default-features = false }
regex = { version = "1.9.4", features = ["perf"], default-features = false }
smallvec = { version = "1.13.1", features = [
"union",
"serde",
], default-features = false }
futures = { version = "0.3.30", default-features = false, features = ["alloc"] }
dhat = { version = "0.3.2", optional = true, default-features = false }
mimalloc = { version = "0.1.43", default-features = false }
async-once-cell = { version = "0.5.3", default-features = false }
actix-governor = { version = "0.5.0", default-features = false }
mini-moka = { version = "0.10", optional = true, default-features = false, features = [
"sync",
moka = { version = "0.12.8", optional = true, default-features = false, features = [
"future",
] }
async-compression = { version = "0.4.12", default-features = false, features = [
"brotli",
Expand All @@ -82,8 +80,8 @@ base64 = { version = "0.21.5", default-features = false, features = [
cfg-if = { version = "1.0.0", default-features = false, optional = true }
keyword_extraction = { version = "1.4.3", default-features = false, features = [
"tf_idf",
"rayon",
] }

stop-words = { version = "0.8.0", default-features = false, features = ["iso"] }
thesaurus = { version = "0.5.2", default-features = false, optional = true, features = [
"moby",
Expand All @@ -104,8 +102,6 @@ lightningcss = { version = "1.0.0-alpha.57", default-features = false, features
# Temporary fork with fix
minify-js = { git = "https://github.com/RuairidhWilliamson/minify-js", branch = "master", version = "0.6.0", default-features = false}



[profile.dev]
opt-level = 0
debug = true
Expand Down Expand Up @@ -180,7 +176,7 @@ opt-level = "z"
use-synonyms-search = ["thesaurus/static"]
default = ["memory-cache"]
dhat-heap = ["dep:dhat"]
memory-cache = ["dep:mini-moka"]
memory-cache = ["dep:moka"]
redis-cache = ["dep:redis", "dep:base64"]
compress-cache-results = ["dep:async-compression", "dep:cfg-if"]
encrypt-cache-results = ["dep:chacha20poly1305", "dep:chacha20"]
Expand Down
6 changes: 3 additions & 3 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
haskellPackages.hadolint
nodejs
nodePackages_latest.cspell
nodePackages_latest.eslint
eslint
nodePackages_latest.markdownlint-cli2
nodePackages_latest.stylelint
redis
Expand Down
41 changes: 28 additions & 13 deletions src/cache/cacher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@
//! from the upstream search engines in a json format.

use error_stack::Report;
use futures::future::join_all;
#[cfg(feature = "memory-cache")]
use mini_moka::sync::Cache as MokaCache;
#[cfg(feature = "memory-cache")]
use mini_moka::sync::ConcurrentCacheExt;
use moka::future::Cache as MokaCache;

#[cfg(feature = "memory-cache")]
use std::time::Duration;
Expand Down Expand Up @@ -376,29 +375,38 @@ impl Cacher for RedisCache {
}
}
/// TryInto implementation for SearchResults from Vec<u8>
use std::convert::TryInto;
use std::{convert::TryInto, sync::Arc};

impl TryInto<SearchResults> for Vec<u8> {
type Error = CacheError;

fn try_into(self) -> Result<SearchResults, Self::Error> {
serde_json::from_slice(&self).map_err(|_| CacheError::SerializationError)
bincode::deserialize_from(self.as_slice()).map_err(|_| CacheError::SerializationError)
}
}

impl TryInto<Vec<u8>> for &SearchResults {
type Error = CacheError;

fn try_into(self) -> Result<Vec<u8>, Self::Error> {
serde_json::to_vec(self).map_err(|_| CacheError::SerializationError)
bincode::serialize(self).map_err(|_| CacheError::SerializationError)
}
}

/// Memory based cache backend.
#[cfg(feature = "memory-cache")]
pub struct InMemoryCache {
/// The backend cache which stores data.
cache: MokaCache<String, Vec<u8>>,
cache: Arc<MokaCache<String, Vec<u8>>>,
}

#[cfg(feature = "memory-cache")]
impl Clone for InMemoryCache {
fn clone(&self) -> Self {
Self {
cache: self.cache.clone(),
}
}
}

#[cfg(feature = "memory-cache")]
Expand All @@ -408,15 +416,17 @@ impl Cacher for InMemoryCache {
log::info!("Initialising in-memory cache");

InMemoryCache {
cache: MokaCache::builder()
.time_to_live(Duration::from_secs(config.cache_expiry_time.into()))
.build(),
cache: Arc::new(
MokaCache::builder()
.time_to_live(Duration::from_secs(config.cache_expiry_time.into()))
.build(),
),
}
}

async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
let hashed_url_string = self.hash_url(url);
match self.cache.get(&hashed_url_string) {
match self.cache.get(&hashed_url_string).await {
Some(res) => self.post_process_search_results(res).await,
None => Err(Report::new(CacheError::MissingValue)),
}
Expand All @@ -427,13 +437,18 @@ impl Cacher for InMemoryCache {
search_results: &[SearchResults],
urls: &[String],
) -> Result<(), Report<CacheError>> {
let mut tasks: Vec<_> = Vec::with_capacity(urls.len());
for (url, search_result) in urls.iter().zip(search_results.iter()) {
let hashed_url_string = self.hash_url(url);
let bytes = self.pre_process_search_results(search_result).await?;
self.cache.insert(hashed_url_string, bytes);
let new_self = self.clone();
tasks.push(tokio::spawn(async move {
new_self.cache.insert(hashed_url_string, bytes).await
}));
}

self.cache.sync();
join_all(tasks).await;

Ok(())
}
}
Expand Down
6 changes: 3 additions & 3 deletions src/cache/redis_cacher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ const REDIS_PIPELINE_SIZE: usize = 3;
/// connect to.
pub struct RedisCache {
/// It stores a pool of connections ready to be used.
connection_pool: Vec<ConnectionManager>,
connection_pool: Box<[ConnectionManager]>,
/// It stores the size of the connection pool (in other words the number of
/// connections that should be stored in the pool).
pool_size: u8,
Expand Down Expand Up @@ -58,13 +58,13 @@ impl RedisCache {
}));
}

let mut outputs = Vec::new();
let mut outputs = Vec::with_capacity(tasks.len());
for task in tasks {
outputs.push(task.await??);
}

let redis_cache = RedisCache {
connection_pool: outputs,
connection_pool: outputs.into_boxed_slice(),
pool_size,
current_connection: Default::default(),
cache_ttl,
Expand Down
74 changes: 48 additions & 26 deletions src/models/aggregation_models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

use super::engine_models::EngineError;
use serde::{Deserialize, Serialize};
use smallvec::SmallVec;
#[cfg(any(
feature = "use-synonyms-search",
feature = "use-non-static-synonyms-search"
Expand All @@ -23,7 +22,7 @@ pub struct SearchResult {
/// The description of the search result.
pub description: String,
/// The names of the upstream engines from which this results were provided.
pub engine: SmallVec<[String; 0]>,
pub engine: Vec<String>,
/// The td-tdf score of the result in regards to the title, url and description and the user's query
pub relevance_score: f32,
}
Expand Down Expand Up @@ -153,10 +152,10 @@ impl EngineErrorInfo {
#[serde(rename_all = "camelCase")]
pub struct SearchResults {
/// Stores the individual serializable `SearchResult` struct into a vector of
pub results: Vec<SearchResult>,
pub results: Box<[SearchResult]>,
/// Stores the information on which engines failed with their engine name
/// and the type of error that caused it.
pub engine_errors_info: Vec<EngineErrorInfo>,
pub engine_errors_info: Box<[EngineErrorInfo]>,
/// Stores the flag option which holds the check value that the following
/// search query was disallowed when the safe search level set to 4 and it
/// was present in the `Blocklist` file.
Expand All @@ -183,10 +182,10 @@ impl SearchResults {
/// the search url.

Check warning on line 182 in src/models/aggregation_models.rs

View workflow job for this annotation

GitHub Actions / Rust project

doc list item without indentation
/// * `engine_errors_info` - Takes an array of structs which contains information regarding
/// which engines failed with their names, reason and their severity color name.
pub fn new(results: Vec<SearchResult>, engine_errors_info: &[EngineErrorInfo]) -> Self {
pub fn new(results: Box<[SearchResult]>, engine_errors_info: Box<[EngineErrorInfo]>) -> Self {
Self {
results,
engine_errors_info: engine_errors_info.to_owned(),
engine_errors_info,
disallowed: Default::default(),
filtered: Default::default(),
safe_search_level: Default::default(),
Expand All @@ -205,11 +204,11 @@ impl SearchResults {
}

/// A getter function that gets the value of `engine_errors_info`.
pub fn engine_errors_info(&mut self) -> Vec<EngineErrorInfo> {
pub fn engine_errors_info(&mut self) -> Box<[EngineErrorInfo]> {
std::mem::take(&mut self.engine_errors_info)
}
/// A getter function that gets the value of `results`.
pub fn results(&mut self) -> Vec<SearchResult> {
pub fn results(&mut self) -> Box<[SearchResult]> {
self.results.clone()
}

Expand Down Expand Up @@ -254,27 +253,50 @@ fn calculate_tf_idf(
let tf_idf = TfIdf::new(params);
let tokener = Tokenizer::new(query, stop_words, Some(punctuation));
let query_tokens = tokener.split_into_words();
let mut search_tokens = vec![];

for token in query_tokens {
#[cfg(any(
feature = "use-synonyms-search",
feature = "use-non-static-synonyms-search"
))]
{
// find some synonyms and add them to the search (from wordnet or moby if feature is enabled)
let synonyms = synonyms(&token);
search_tokens.extend(synonyms)
}
search_tokens.push(token);
}
#[cfg(any(
feature = "use-synonyms-search",
feature = "use-non-static-synonyms-search"
))]
let mut extra_tokens = vec![];

let mut total_score = 0.0f32;
for token in search_tokens.iter() {
total_score += tf_idf.get_score(token);
}
let total_score: f32 = query_tokens
.iter()
.map(|token| {
#[cfg(any(
feature = "use-synonyms-search",
feature = "use-non-static-synonyms-search"
))]
{
// find some synonyms and add them to the search (from wordnet or moby if feature is enabled)
extra_tokens.extend(synonyms(token))
}

tf_idf.get_score(token)
})
.sum();

#[cfg(not(any(
feature = "use-synonyms-search",
feature = "use-non-static-synonyms-search"
)))]
let result = total_score / (query_tokens.len() as f32);

#[cfg(any(
feature = "use-synonyms-search",
feature = "use-non-static-synonyms-search"
))]
let extra_total_score: f32 = extra_tokens
.iter()
.map(|token| tf_idf.get_score(token))
.sum();

let result = total_score / (search_tokens.len() as f32);
#[cfg(any(
feature = "use-synonyms-search",
feature = "use-non-static-synonyms-search"
))]
let result =
(extra_total_score + total_score) / ((query_tokens.len() + extra_tokens.len()) as f32);

f32::from(!result.is_nan()) * result
}
4 changes: 2 additions & 2 deletions src/models/server_models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use super::parser_models::Style;
pub struct SearchParams {
/// It stores the search parameter option `q` (or query in simple words)
/// of the search url.
pub q: Option<String>,
pub q: Option<Cow<'static, str>>,
/// It stores the search parameter `page` (or pageno in simple words)
/// of the search url.
pub page: Option<u32>,
Expand All @@ -29,7 +29,7 @@ pub struct Cookie<'a> {
/// It stores the colorscheme name used for the website theme.
pub colorscheme: Cow<'a, str>,
/// It stores the user selected upstream search engines selected from the UI.
pub engines: Cow<'a, Vec<Cow<'a, str>>>,
pub engines: Cow<'a, [Cow<'a, str>]>,
/// It stores the user selected safe search level from the UI.
pub safe_search_level: u8,
}
Expand Down
Loading

0 comments on commit 85a4ba1

Please sign in to comment.