Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DISCO-3211] Suggest: move to new remote settings API #6598

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions components/remote_settings/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,11 @@ impl RemoteSettingsClient {
pub fn get_attachment(&self, record: &RemoteSettingsRecord) -> ApiResult<Vec<u8>> {
self.internal.get_attachment(record)
}

#[handle_error(Error)]
pub fn sync(&self) -> ApiResult<()> {
self.internal.sync()
}
}

impl RemoteSettingsClient {
Expand Down
8 changes: 2 additions & 6 deletions components/suggest/src/benchmarks/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

use std::collections::HashMap;

use crate::{db::SuggestDao, error::Error, rs, Result};
use crate::{error::Error, rs, Result};

/// Remotes settings client for benchmarking
///
Expand Down Expand Up @@ -87,11 +87,7 @@ impl RemoteSettingsBenchmarkClient {
}

impl rs::Client for RemoteSettingsBenchmarkClient {
fn get_records(
&self,
collection: rs::Collection,
_db: &mut SuggestDao,
) -> Result<Vec<rs::Record>> {
fn get_records(&self, collection: rs::Collection) -> Result<Vec<rs::Record>> {
Ok(self
.records
.iter()
Expand Down
24 changes: 21 additions & 3 deletions components/suggest/src/benchmarks/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ use std::{
use tempfile::TempDir;

use crate::{SuggestIngestionConstraints, SuggestStore};
use remote_settings::{RemoteSettingsConfig2, RemoteSettingsContext, RemoteSettingsService};

use std::sync::Arc;

pub mod client;
pub mod geoname;
Expand Down Expand Up @@ -76,8 +79,15 @@ fn new_store() -> SuggestStore {
let (starter_dir, starter_db_path) = starter.get_or_insert_with(|| {
let temp_dir = tempfile::tempdir().unwrap();
let db_path = temp_dir.path().join(unique_db_filename());
let store =
SuggestStore::new(&db_path.to_string_lossy(), None).expect("Error building store");
let rs_config = RemoteSettingsConfig2 {
bucket_name: None,
server: None,
app_context: Some(RemoteSettingsContext::default()),
};
let remote_settings_service =
Arc::new(RemoteSettingsService::new("".to_string(), rs_config).unwrap());
let store = SuggestStore::new(&db_path.to_string_lossy(), remote_settings_service)
.expect("Error building store");
store
.ingest(SuggestIngestionConstraints::all_providers())
.expect("Error during ingestion");
Expand All @@ -86,8 +96,16 @@ fn new_store() -> SuggestStore {
});

let db_path = starter_dir.path().join(unique_db_filename());
let rs_config = RemoteSettingsConfig2 {
bucket_name: None,
server: None,
app_context: Some(RemoteSettingsContext::default()),
};
let remote_settings_service =
Arc::new(RemoteSettingsService::new("".to_string(), rs_config).unwrap());
std::fs::copy(starter_db_path, &db_path).expect("Error copying starter DB file");
SuggestStore::new(&db_path.to_string_lossy(), None).expect("Error building store")
SuggestStore::new(&db_path.to_string_lossy(), remote_settings_service)
.expect("Error building store")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice! Thanks for keeping this code up to date.

}

/// Cleanup the temp directory created for SuggestStore instances used in the benchmarks.
Expand Down
49 changes: 1 addition & 48 deletions components/suggest/src/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@ use std::{cell::OnceCell, path::Path, sync::Arc};

use interrupt_support::{SqlInterruptHandle, SqlInterruptScope};
use parking_lot::{Mutex, MutexGuard};
use remote_settings::RemoteSettingsResponse;
use rusqlite::{
named_params,
types::{FromSql, ToSql},
Connection, OptionalExtension,
Connection,
};
use sql_support::{open_database, repeat_sql_vars, ConnExt};

Expand Down Expand Up @@ -202,52 +201,6 @@ impl<'a> SuggestDao<'a> {
//
// These methods implement CRUD operations

pub fn read_cached_rs_data(&self, collection: &str) -> Option<RemoteSettingsResponse> {
match self.try_read_cached_rs_data(collection) {
Ok(result) => result,
Err(e) => {
// Return None on failure . If the cached data is corrupted, maybe because the
// RemoteSettingsResponse schema changed, then we want to just continue on. This also matches
// the proposed API from #6328, so it should be easier to adapt this code once
// that's merged.
error_support::report_error!("suggest-rs-cache-read", "{e}");
None
}
}
}

pub fn write_cached_rs_data(&mut self, collection: &str, data: &RemoteSettingsResponse) {
if let Err(e) = self.try_write_cached_rs_data(collection, data) {
// Return None on failure for the same reason as in [Self::read_cached_rs_data]
error_support::report_error!("suggest-rs-cache-write", "{e}");
}
}

fn try_read_cached_rs_data(&self, collection: &str) -> Result<Option<RemoteSettingsResponse>> {
let mut stmt = self
.conn
.prepare_cached("SELECT data FROM rs_cache WHERE collection = ?")?;
let data = stmt
.query_row((collection,), |row| row.get::<_, Vec<u8>>(0))
.optional()?;
match data {
Some(data) => Ok(Some(rmp_serde::decode::from_slice(data.as_slice())?)),
None => Ok(None),
}
}

fn try_write_cached_rs_data(
&mut self,
collection: &str,
data: &RemoteSettingsResponse,
) -> Result<()> {
let mut stmt = self
.conn
.prepare_cached("INSERT OR REPLACE INTO rs_cache(collection, data) VALUES(?, ?)")?;
stmt.execute((collection, rmp_serde::encode::to_vec(data)?))?;
Ok(())
}

pub fn get_ingested_records(&self) -> Result<Vec<IngestedRecord>> {
let mut stmt = self
.conn
Expand Down
119 changes: 57 additions & 62 deletions components/suggest/src/rs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,17 @@
//! the new suggestion in their results, and return `Suggestion::T` variants
//! as needed.

use std::fmt;
use std::{fmt, sync::Arc};

use remote_settings::{Attachment, RemoteSettingsRecord};
use serde::{Deserialize, Deserializer};
use remote_settings::{
Attachment, RemoteSettingsClient, RemoteSettingsError, RemoteSettingsRecord,
RemoteSettingsService,
};
use serde::{Deserialize, Deserializer, Serialize};
use serde_json::{Map, Value};

use crate::{
db::SuggestDao, error::Error, provider::SuggestionProvider,
query::full_keywords_to_fts_content, Result,
error::Error, provider::SuggestionProvider, query::full_keywords_to_fts_content, Result,
};

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
Expand Down Expand Up @@ -69,85 +72,56 @@ pub(crate) trait Client {
/// client-side filtering.
///
/// Records that can't be parsed as [SuggestRecord] are ignored.
fn get_records(&self, collection: Collection, dao: &mut SuggestDao) -> Result<Vec<Record>>;
fn get_records(&self, collection: Collection) -> Result<Vec<Record>>;

fn download_attachment(&self, record: &Record) -> Result<Vec<u8>>;
}

/// Implements the [Client] trait using a real remote settings client
pub struct RemoteSettingsClient {
pub struct SuggestRemoteSettingsClient {
// Create a separate client for each collection name
quicksuggest_client: remote_settings::RemoteSettings,
fakespot_client: remote_settings::RemoteSettings,
quicksuggest_client: Arc<RemoteSettingsClient>,
fakespot_client: Arc<RemoteSettingsClient>,
}

impl RemoteSettingsClient {
pub fn new(
server: Option<remote_settings::RemoteSettingsServer>,
bucket_name: Option<String>,
server_url: Option<String>,
) -> Result<Self> {
impl SuggestRemoteSettingsClient {
pub fn new(rs_service: &RemoteSettingsService) -> Result<Self> {
Ok(Self {
quicksuggest_client: remote_settings::RemoteSettings::new(
remote_settings::RemoteSettingsConfig {
server: server.clone(),
bucket_name: bucket_name.clone(),
collection_name: "quicksuggest".to_owned(),
server_url: server_url.clone(),
},
)?,
fakespot_client: remote_settings::RemoteSettings::new(
remote_settings::RemoteSettingsConfig {
server,
bucket_name,
collection_name: "fakespot-suggest-products".to_owned(),
server_url,
},
)?,
quicksuggest_client: rs_service.make_client("quicksuggest".to_owned())?,
fakespot_client: rs_service.make_client("fakespot-suggest-products".to_owned())?,
})
}

fn client_for_collection(&self, collection: Collection) -> &remote_settings::RemoteSettings {
fn client_for_collection(&self, collection: Collection) -> &RemoteSettingsClient {
match collection {
Collection::Fakespot => &self.fakespot_client,
Collection::Quicksuggest => &self.quicksuggest_client,
}
}
}

impl Client for RemoteSettingsClient {
fn get_records(&self, collection: Collection, dao: &mut SuggestDao) -> Result<Vec<Record>> {
// For now, handle the cache manually. Once 6328 is merged, we should be able to delegate
// this to remote_settings.
impl Client for SuggestRemoteSettingsClient {
fn get_records(&self, collection: Collection) -> Result<Vec<Record>> {
let client = self.client_for_collection(collection);
let cache = dao.read_cached_rs_data(collection.name());
let last_modified = match &cache {
Some(response) => response.last_modified,
None => 0,
};
let response = match cache {
None => client.get_records()?,
Some(cache) => remote_settings::cache::merge_cache_and_response(
cache,
client.get_records_since(last_modified)?,
),
};
if last_modified != response.last_modified {
dao.write_cached_rs_data(collection.name(), &response);
client.sync()?;
let response = client.get_records(false);
match response {
Some(r) => Ok(r
.into_iter()
.filter_map(|r| Record::new(r, collection).ok())
.collect()),
None => Err(Error::RemoteSettings(RemoteSettingsError::Other {
reason: "Unable to get records".to_owned(),
})),
}

Ok(response
.records
.into_iter()
.filter_map(|r| Record::new(r, collection).ok())
.collect())
}

fn download_attachment(&self, record: &Record) -> Result<Vec<u8>> {
let converted_record: RemoteSettingsRecord = record.clone().into();
match &record.attachment {
Some(a) => Ok(self
Some(_) => Ok(self
.client_for_collection(record.collection)
.get_attachment(&a.location)?),
.get_attachment(&converted_record)?),
None => Err(Error::MissingAttachment(record.id.to_string())),
}
}
Expand Down Expand Up @@ -181,11 +155,23 @@ impl Record {
}
}

impl From<Record> for RemoteSettingsRecord {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cargo clippy said impl From is preferred over Into

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense, Rust will automatically generate the Into if you define a From.

fn from(record: Record) -> Self {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we get a bug/ticket on file for removing our Record struct?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

makes sense it's more or less the same as RemoteSettingsRecord

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The key difference is the payload field. RemoteSettingsRecord stores a JSON value there, since it doesn't know the specifics of how it's going to be used. suggest::rs::Record stores the deserialized SuggestRecord there. So, if a function inputs suggest::rs::Record it can access the fields more conveniently and it doesn't have to deal with deserialization errors.

It's still a good idea and I'm pretty sure there's a way to remove some code duplication. Maybe RemoteSettingsRecord could be generic over its payload field. It's just not going to be a trivial change.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

created a jira in our maintenance epic for this

RemoteSettingsRecord {
id: record.id.to_string(),
last_modified: record.last_modified,
deleted: false,
attachment: record.attachment.clone(),
fields: record.payload.to_json_map(),
}
}
}

/// A record in the Suggest Remote Settings collection.
///
/// Most Suggest records don't carry inline fields except for `type`.
/// Suggestions themselves are typically stored in each record's attachment.
#[derive(Clone, Debug, Deserialize)]
#[derive(Clone, Debug, Deserialize, Serialize)]
#[serde(tag = "type")]
pub(crate) enum SuggestRecord {
#[serde(rename = "icon")]
Expand Down Expand Up @@ -214,6 +200,15 @@ pub(crate) enum SuggestRecord {
Geonames,
}

impl SuggestRecord {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This feels a little goofy just so that we can use Record to get_attachment, but setting fields to Map::new( ) also felt weird

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, slightly goofy but this makes sense to me.

fn to_json_map(&self) -> Map<String, Value> {
match serde_json::to_value(self) {
Ok(Value::Object(map)) => map,
_ => unreachable!(),
}
}
}

/// Enum for the different record types that can be consumed.
/// Extracting this from the serialization enum so that we can
/// extend it to get type metadata.
Expand Down Expand Up @@ -564,7 +559,7 @@ pub(crate) struct DownloadedFakespotSuggestion {
}

/// An exposure suggestion record's inline data
#[derive(Clone, Debug, Deserialize)]
#[derive(Clone, Debug, Deserialize, Serialize)]
pub(crate) struct DownloadedExposureRecord {
pub suggestion_type: String,
}
Expand Down Expand Up @@ -644,11 +639,11 @@ impl FullOrPrefixKeywords<String> {
}

/// Global Suggest configuration data to ingest from a configuration record
#[derive(Clone, Debug, Deserialize)]
#[derive(Clone, Debug, Deserialize, Serialize)]
pub(crate) struct DownloadedGlobalConfig {
pub configuration: DownloadedGlobalConfigInner,
}
#[derive(Clone, Debug, Deserialize)]
#[derive(Clone, Debug, Deserialize, Serialize)]
pub(crate) struct DownloadedGlobalConfigInner {
/// The maximum number of times the user can click "Show less frequently"
/// for a suggestion in the UI.
Expand Down
Loading