From 36281aecb06740eb06abbf24e6e219ba1b1e5e77 Mon Sep 17 00:00:00 2001 From: Nikhil Thorat Date: Fri, 26 Jan 2024 11:27:57 -0500 Subject: [PATCH] Rename USE_TABLE_INDEX => LILAC_USE_TABLE_INDEX. Add LILAC_PROD_MODE. (#1134) Renames USE_TABLE_INDEX to LILAC_USE_TABLE_INDEX, and then makes sure it's backwards compat. Add LILAC_PROD_MODE, which disables toast notification errors for the public demo, which makes it just look janky and broken. Also add embeddings to missing datasets. --- lilac/data/dataset_duckdb.py | 8 ++--- lilac/env.py | 14 +++++++- lilac/server.py | 2 ++ lilac_hf_space.yml | 18 +++++++++- .../lib/components/ErrorNotifications.svelte | 33 +++++++++++++++++++ web/blueprint/src/routes/+layout.svelte | 27 +++------------ web/lib/fastapi_client/models/ServerStatus.ts | 1 + 7 files changed, 74 insertions(+), 29 deletions(-) create mode 100644 web/blueprint/src/lib/components/ErrorNotifications.svelte diff --git a/lilac/data/dataset_duckdb.py b/lilac/data/dataset_duckdb.py index 98a31d14b..aa25c2c33 100644 --- a/lilac/data/dataset_duckdb.py +++ b/lilac/data/dataset_duckdb.py @@ -335,7 +335,7 @@ def __init__( self._signal_manifests: list[SignalManifest] = [] self._map_manifests: list[MapManifest] = [] self._label_schemas: dict[str, Schema] = {} - if env('USE_TABLE_INDEX', default=False): + if env('LILAC_USE_TABLE_INDEX', default=False): self.con = duckdb.connect(database=os.path.join(self.dataset_path, DUCKDB_CACHE_FILE)) else: self.con = duckdb.connect(database=':memory:') @@ -521,7 +521,7 @@ def _recompute_joint_table(self, latest_mtime_micro_sec: int) -> DatasetManifest [SOURCE_VIEW_NAME] + [f'LEFT JOIN {escape_col_name(parquet_id)} USING ({ROWID})' for parquet_id in parquet_ids] ) - if env('USE_TABLE_INDEX', default=False): + if env('LILAC_USE_TABLE_INDEX', default=False): self.con.execute( """CREATE TABLE IF NOT EXISTS mtime_cache AS (SELECT CAST(0 AS bigint) AS mtime);""" @@ -572,7 +572,7 @@ def _clear_joint_table_cache(self) -> None: """Clears the cache for the joint table.""" self._recompute_joint_table.cache_clear() self._pivot_cache.clear() - if env('USE_TABLE_INDEX', default=False): + if env('LILAC_USE_TABLE_INDEX', default=False): self.con.execute('DROP TABLE IF EXISTS mtime_cache') def _add_map_keys_to_schema(self, path: PathTuple, field: Field, merged_schema: Schema) -> None: @@ -2901,7 +2901,7 @@ def _create_where( # So, we insert a range clause to limit the extent of the index scan. This optimization # works well because nearly all of our queries are sorted by rowid, meaning that min/max # will narrow down the index scan to a small range. - if env('USE_TABLE_INDEX', default=False) and ROWID in select_str: + if env('LILAC_USE_TABLE_INDEX', default=False) and ROWID in select_str: min_row, max_row = min(filter_list_val), max(filter_list_val) filter_query += f' AND {ROWID} BETWEEN {min_row} AND {max_row}' # wrap in parens to isolate from other filters, just in case? diff --git a/lilac/env.py b/lilac/env.py index c09fd469a..1a16c552a 100644 --- a/lilac/env.py +++ b/lilac/env.py @@ -37,7 +37,16 @@ class LilacEnvironment(BaseModel): description='Turn on Lilac debug mode to log queries and timing information.' ) DISABLE_LOGS: str = PydanticField(description='Disable log() statements to the console.') - USE_TABLE_INDEX: str = PydanticField(description='Use persistent tables with rowid indexes.') + USE_TABLE_INDEX: str = PydanticField( + description='Use persistent tables with rowid indexes.' + ' NOTE: This is deprecated in favor of USE_TABLE_INDEX.' + ) + LILAC_USE_TABLE_INDEX: str = PydanticField( + description='Use persistent tables with rowid indexes.' + ) + LILAC_DISABLE_ERROR_NOTIFICATIONS: str = PydanticField( + description='Set lilac in production mode. This will disable error messages in the UI.' + ) # API Keys. OPENAI_API_KEY: str = PydanticField( @@ -140,6 +149,9 @@ def _init_env() -> None: def env(key: str, default: Optional[Any] = None) -> Any: """Return the value of an environment variable.""" + # For backwards compatibility, shim USE_TABLE_INDEX to LILAC_USE_TABLE_INDEX. + if key == 'USE_TABLE_INDEX': + key = 'LILAC_USE_TABLE_INDEX' return os.environ.get(key, default) diff --git a/lilac/server.py b/lilac/server.py index b6664e021..c374090ac 100644 --- a/lilac/server.py +++ b/lilac/server.py @@ -134,6 +134,7 @@ class ServerStatus(BaseModel): version: str google_analytics_enabled: bool + disable_error_notifications: bool @app.get('/status') @@ -142,6 +143,7 @@ def status() -> ServerStatus: return ServerStatus( version=metadata.version('lilac'), google_analytics_enabled=env('GOOGLE_ANALYTICS_ENABLED', False), + disable_error_notifications=env('LILAC_DISABLE_ERROR_NOTIFICATIONS', False), ) diff --git a/lilac_hf_space.yml b/lilac_hf_space.yml index a6db68735..90b882124 100644 --- a/lilac_hf_space.yml +++ b/lilac_hf_space.yml @@ -134,6 +134,12 @@ datasets: - - conversations - '*' - value + embeddings: + - embedding: gte-small + path: + - conversations + - '*' + - value - namespace: lilac name: UltraChat-200k @@ -159,6 +165,13 @@ datasets: media_paths: - prompt - completion + embeddings: + - embedding: gte-small + path: + - prompt + - embedding: gte-small + path: + - completion ## Eval datasets - namespace: lilac @@ -385,7 +398,10 @@ datasets: - instruction - input - output - + embeddings: + - embedding: gte-small + path: + - instruction signals: - signal_name: text_statistics - signal_name: lang_detection diff --git a/web/blueprint/src/lib/components/ErrorNotifications.svelte b/web/blueprint/src/lib/components/ErrorNotifications.svelte new file mode 100644 index 000000000..f11bfce06 --- /dev/null +++ b/web/blueprint/src/lib/components/ErrorNotifications.svelte @@ -0,0 +1,33 @@ + + +{#if !disableErrorNotifications} + {#each $apiErrors as error} + { + $apiErrors = $apiErrors.filter(e => e !== error); + }} + > +
+ +
+
+ {/each} + {#if showError} + + {/if} +{/if} diff --git a/web/blueprint/src/routes/+layout.svelte b/web/blueprint/src/routes/+layout.svelte index 7749e6012..4f0490152 100644 --- a/web/blueprint/src/routes/+layout.svelte +++ b/web/blueprint/src/routes/+layout.svelte @@ -1,8 +1,7 @@