Skip to content

Commit

Permalink
save
Browse files Browse the repository at this point in the history
  • Loading branch information
nsthorat committed Feb 15, 2024
1 parent b91e496 commit f1a0c42
Show file tree
Hide file tree
Showing 9 changed files with 40 additions and 48 deletions.
4 changes: 2 additions & 2 deletions lilac/formats/openai_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class OpenAIJSON(DatasetFormat):
Taken from: https://platform.openai.com/docs/api-reference/chat
"""

name: ClassVar[str] = 'openai_json'
name: ClassVar[str] = 'OpenAI JSON'
data_schema: Schema = schema(
{
'messages': [
Expand Down Expand Up @@ -88,7 +88,7 @@ class OpenAIConversationJSON(DatasetFormat):
Note that here "messages" is "conversation" for support with common datasets.
"""

name: ClassVar[str] = 'openai_conversation_json'
name: ClassVar[str] = 'OpenAI Conversation JSON'
data_schema: Schema = schema(
{
'conversation': [
Expand Down
2 changes: 1 addition & 1 deletion lilac/formats/openchat.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
class OpenChat(DatasetFormat):
"""OpenChat format."""

name: ClassVar[str] = 'openchat'
name: ClassVar[str] = 'OpenChat'
data_schema: Schema = schema(
{
'items': [
Expand Down
2 changes: 1 addition & 1 deletion lilac/formats/sharegpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def _sharegpt_selector(item: Item, conv_from: str) -> str:
class ShareGPT(DatasetFormat):
"""ShareGPT format."""

name: ClassVar[str] = 'sharegpt'
name: ClassVar[str] = 'ShareGPT'
data_schema: Schema = schema(
{
'conversations': [
Expand Down
22 changes: 6 additions & 16 deletions lilac/router_dataset_signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from pydantic import Field as PydanticField

from .auth import UserInfo, get_session_user, get_user_access
from .config import ClusterInputSelectorConfig
from .dataset_format import DatasetFormatInputSelector, get_dataset_format_cls
from .db_manager import get_dataset
from .router_utils import RouteErrorHandler
Expand Down Expand Up @@ -85,7 +84,7 @@ class ClusterOptions(BaseModel):
"""The request for the cluster endpoint."""

input: Optional[Path] = None
input_selector: Optional[ClusterInputSelectorConfig] = None
input_selector: Optional[str] = None

output_path: Optional[Path] = None
use_garden: bool = PydanticField(
Expand All @@ -111,9 +110,6 @@ def cluster(
if not get_user_access(user).dataset.compute_signals:
raise HTTPException(401, 'User does not have access to compute clusters over this dataset.')

if options.input is None and options.input_selector is None:
raise HTTPException(400, 'Either input or input_selector must be provided.')

dataset = get_dataset(namespace, dataset_name)
manifest = dataset.manifest()

Expand All @@ -129,21 +125,15 @@ def cluster(

format_cls = get_dataset_format_cls(dataset_format.name)
if format_cls is None:
raise ValueError(f'Unknown format: {c.input_selector.format}')
raise ValueError(f'Unknown format: {dataset_format.name}')

format = format_cls()
if format != manifest.dataset_format:
raise ValueError(
f'Cluster input format {c.input_selector.format} does not match '
f'dataset format {manifest.dataset_format}'
)

cluster_input = format_cls.input_selectors[c.input_selector.selector]
cluster_input = format_cls.input_selectors[options.input_selector]

task_name = (
f'[{namespace}/{dataset_name}] Clustering using input selector '
f'"{options.input_selector.selector}"'
f'[{namespace}/{dataset_name}] Clustering using input selector ' f'"{options.input_selector}"'
)
else:
raise HTTPException(400, 'Either input or input_selector must be provided.')

task_id = get_task_manager().task_id(name=task_name)

Expand Down
37 changes: 27 additions & 10 deletions web/blueprint/src/lib/components/ComputeClusterModal.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@
</script>

<script lang="ts">
import {clusterMutation, queryFormatSelectors} from '$lib/queries/datasetQueries';
import {
clusterMutation,
queryDatasetManifest,
queryFormatSelectors
} from '$lib/queries/datasetQueries';
import {queryAuthInfo} from '$lib/queries/serverQueries';
import {serializePath, type Path} from '$lilac';
import {
Expand All @@ -39,12 +43,17 @@
$: canComputeRemotely = $authInfo.data?.access.dataset.execute_remotely;
$: formatSelectorsQuery =
options != null ? queryFormatSelectors(options?.namespace, options?.datasetName) : null;
let selectedFormatSelector: string | undefined = undefined;
options != null ? queryFormatSelectors(options.namespace, options.datasetName) : null;
$: datasetManifest =
options != null ? queryDatasetManifest(options.namespace, options.datasetName) : null;
let selectedFormatSelector = 'none';
let formatSelectors: string[] | undefined = undefined;
let outputColumn: string | undefined = undefined;
$: outputColumnRequired = formatSelectors != null;
$: outputColumnRequired =
formatSelectors != null &&
formatSelectors.length > 0 &&
selectedFormatSelector != null &&
selectedFormatSelector != 'none';
$: {
if (options?.output_path != null) {
outputColumn = serializePath(options.output_path);
Expand All @@ -56,7 +65,6 @@
$formatSelectorsQuery != null &&
$formatSelectorsQuery.data != null
) {
selectedFormatSelector = $formatSelectorsQuery.data[0];
formatSelectors = $formatSelectorsQuery.data;
}
}
Expand All @@ -66,13 +74,15 @@
}
function submit() {
if (!options) return;
$clusterQuery.mutate([
options.namespace,
options.datasetName,
{
input: options.input,
input: selectedFormatSelector == null ? options.input : null,
use_garden: options.use_garden,
output_path: outputColumn,
input_selector: selectedFormatSelector,
overwrite: options.overwrite
}
]);
Expand All @@ -87,16 +97,21 @@
<div class="flex max-w-2xl flex-col gap-y-8">
<div>
<FieldSelect
disabled={selectedFormatSelector != null && selectedFormatSelector != 'none'}
filter={f => f.dtype?.type === 'string'}
defaultPath={options.input}
bind:path={options.input}
labelText="Field"
/>
</div>
{#if formatSelectors != null}
{#if formatSelectors != null && formatSelectors.length > 0}
<div>
<div class="label text-s mb-2 font-medium text-gray-700">Selector</div>
<div class="label text-s mb-2 font-medium text-gray-700">
{$datasetManifest?.data?.dataset_manifest.dataset_format?.['format_name']} selector
</div>
<Select hideLabel={true} bind:selected={selectedFormatSelector} required>
<SelectItem value={'none'} text={'None'} />

{#each formatSelectors as formatSelector}
<SelectItem value={formatSelector} text={formatSelector} />
{/each}
Expand All @@ -105,7 +120,9 @@
{/if}
<div>
<div class="label text-s mb-2 font-medium text-gray-700">
{outputColumnRequired ? '*' : ''} Output column
{outputColumnRequired ? '*' : ''} Output column {!outputColumnRequired
? '(Optional)'
: ''}
</div>
<input
required={outputColumnRequired}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
export let defaultPath: Path | undefined = undefined;
export let path: Path | undefined = undefined;
export let disabled = false;
const datasetViewStore = getDatasetViewContext();
Expand Down Expand Up @@ -83,7 +84,7 @@
<div class="label text-s mb-2 font-medium text-gray-700">
{labelText}
</div>
<Select hideLabel={true} {helperText} bind:selected={selectedPath} required>
<Select hideLabel={true} {helperText} bind:selected={selectedPath} required {disabled}>
{#if sourceFields?.length}
<SelectItemGroup label="Source Fields">
{#each sourceFields as field}
Expand Down
1 change: 0 additions & 1 deletion web/lib/fastapi_client/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ export type { AuthenticationInfo } from './models/AuthenticationInfo';
export type { BinaryFilter } from './models/BinaryFilter';
export type { ClusterInfo } from './models/ClusterInfo';
export type { ClusterInputFormatSelectorInfo } from './models/ClusterInputFormatSelectorInfo';
export type { ClusterInputSelectorConfig } from './models/ClusterInputSelectorConfig';
export type { ClusterOptions } from './models/ClusterOptions';
export type { ClusterResponse } from './models/ClusterResponse';
export type { Column } from './models/Column';
Expand Down
13 changes: 0 additions & 13 deletions web/lib/fastapi_client/models/ClusterInputSelectorConfig.ts

This file was deleted.

4 changes: 1 addition & 3 deletions web/lib/fastapi_client/models/ClusterOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,12 @@
/* tslint:disable */
/* eslint-disable */

import type { ClusterInputSelectorConfig } from './ClusterInputSelectorConfig';

/**
* The request for the cluster endpoint.
*/
export type ClusterOptions = {
input?: (Array<string> | string | null);
input_selector?: (ClusterInputSelectorConfig | null);
input_selector?: (string | null);
output_path?: (Array<string> | string | null);
/**
* Accelerate computation by running remotely on Lilac Garden.
Expand Down

0 comments on commit f1a0c42

Please sign in to comment.