Skip to content

Commit

Permalink
Switch to ruff formatting. (#810)
Browse files Browse the repository at this point in the history
<img width="469" alt="image"
src="https://github.com/lilacai/lilac/assets/1100749/8d3a81a9-9989-4cb0-a220-4721aec535ac">

Compare to head:
<img width="481" alt="image"
src="https://github.com/lilacai/lilac/assets/1100749/147affc7-3ea6-42e5-a63e-59223ac181d7">



This is insanely fast... @charliermarsh good work!
  • Loading branch information
nsthorat authored Nov 1, 2023
1 parent 9d07ea8 commit 7046727
Show file tree
Hide file tree
Showing 136 changed files with 5,236 additions and 5,609 deletions.
1 change: 0 additions & 1 deletion .vscode/extensions.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
"recommendations": [
"ms-python.python",
"ms-python.vscode-pylance",
"ms-python.isort",
"GitHub.vscode-pull-request-github",
"esbenp.prettier-vscode",
"dbaeumer.vscode-eslint",
Expand Down
2 changes: 1 addition & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
},
"[python]": {
"editor.formatOnSave": true,
"editor.defaultFormatter": "charliermarsh.ruff",
"editor.codeActionsOnSave": {
"source.fixAll": false,
"source.organizeImports": true,
Expand All @@ -62,7 +63,6 @@
"eslint.validate": ["typescript", "svelte"],
"python.envFile": "${workspaceFolder}/.venv",
"python.linting.mypyEnabled": true,
"python.formatting.provider": "yapf",
"python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python",
"git.enableSmartCommit": true,
"git.confirmSync": false,
Expand Down
9 changes: 7 additions & 2 deletions lilac/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,13 @@
set_default_dataset_cls(DatasetDuckDB)

# Avoids polluting the results of dir(__package__).
del (metadata, register_default_sources, register_default_signals, set_default_dataset_cls,
DatasetDuckDB)
del (
metadata,
register_default_sources,
register_default_signals,
set_default_dataset_cls,
DatasetDuckDB,
)

__all__ = [
'start_server',
Expand Down
15 changes: 12 additions & 3 deletions lilac/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@

class ConceptAuthorizationException(Exception):
"""Authorization exceptions thrown by the concept database."""

pass


class DatasetUserAccess(BaseModel):
"""User access for datasets."""

# Whether the user can compute a signal.
compute_signals: bool
# Whether the user can delete a dataset.
Expand All @@ -33,12 +35,14 @@ class DatasetUserAccess(BaseModel):

class ConceptUserAccess(BaseModel):
"""User access for concepts."""

# Whether the user can delete any concept (not their own).
delete_any_concept: bool


class UserAccess(BaseModel):
"""User access."""

is_admin: bool = False

create_dataset: bool
Expand All @@ -50,6 +54,7 @@ class UserAccess(BaseModel):

class UserInfo(BaseModel):
"""User information."""

id: str
email: str
name: str
Expand All @@ -59,6 +64,7 @@ class UserInfo(BaseModel):

class AuthenticationInfo(BaseModel):
"""Authentication information for the user."""

user: Optional[UserInfo] = None
access: UserAccess
auth_enabled: bool
Expand Down Expand Up @@ -109,7 +115,8 @@ def get_user_access(user_info: Optional[UserInfo]) -> UserAccess:
edit_labels=bool(env('LILAC_AUTH_USER_EDIT_LABELS', False)),
label_all=not bool(env('LILAC_AUTH_USER_DISABLE_LABEL_ALL', False)),
),
concept=ConceptUserAccess(delete_any_concept=False))
concept=ConceptUserAccess(delete_any_concept=False),
)

return UserAccess(
is_admin=is_admin,
Expand All @@ -121,5 +128,7 @@ def get_user_access(user_info: Optional[UserInfo]) -> UserAccess:
update_settings=True,
create_label_type=True,
edit_labels=True,
label_all=True),
concept=ConceptUserAccess(delete_any_concept=True))
label_all=True,
),
concept=ConceptUserAccess(delete_any_concept=True),
)
39 changes: 25 additions & 14 deletions lilac/batch_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
from .utils import chunks, is_primitive


def _deep_flatten(input: Union[Iterator, object],
is_primitive_predicate: Callable[[object], bool]) -> Generator:
def _deep_flatten(
input: Union[Iterator, object], is_primitive_predicate: Callable[[object], bool]
) -> Generator:
"""Flattens a nested iterable."""
if is_primitive_predicate(input):
yield input
Expand All @@ -20,8 +21,9 @@ def _deep_flatten(input: Union[Iterator, object],
yield from _deep_flatten(elem, is_primitive_predicate)


def deep_flatten(input: Union[Iterator, Iterable],
is_primitive_predicate: Callable[[object], bool] = is_primitive) -> Iterator:
def deep_flatten(
input: Union[Iterator, Iterable], is_primitive_predicate: Callable[[object], bool] = is_primitive
) -> Iterator:
"""Flattens a deeply nested iterator.
Primitives and dictionaries are not flattened. The user can also provide a predicate to determine
Expand All @@ -30,8 +32,11 @@ def deep_flatten(input: Union[Iterator, Iterable],
return _deep_flatten(input, is_primitive_predicate)


def _deep_unflatten(flat_input: Iterator[list[object]], original_input: Union[Iterable, object],
is_primitive_predicate: Callable[[object], bool]) -> Union[list, dict]:
def _deep_unflatten(
flat_input: Iterator[list[object]],
original_input: Union[Iterable, object],
is_primitive_predicate: Callable[[object], bool],
) -> Union[list, dict]:
"""Unflattens a deeply flattened iterable according to the original iterable's structure."""
if is_primitive_predicate(original_input):
return next(flat_input)
Expand All @@ -44,9 +49,11 @@ def _deep_unflatten(flat_input: Iterator[list[object]], original_input: Union[It
return [_deep_unflatten(flat_input, orig_elem, is_primitive_predicate) for orig_elem in values]


def deep_unflatten(flat_input: Union[Iterable, Iterator],
original_input: Union[Iterable, object],
is_primitive_predicate: Callable[[object], bool] = is_primitive) -> Generator:
def deep_unflatten(
flat_input: Union[Iterable, Iterator],
original_input: Union[Iterable, object],
is_primitive_predicate: Callable[[object], bool] = is_primitive,
) -> Generator:
"""Unflattens a deeply flattened iterable according to the original iterable's structure."""
flat_input_iter = iter(flat_input)
if isinstance(original_input, Iterable) and not is_primitive_predicate(original_input):
Expand All @@ -72,8 +79,10 @@ def flatten(inputs: Iterable[Iterable[TFlatten]]) -> Iterator[TFlatten]:
TUnflatten = TypeVar('TUnflatten')


def unflatten(flat_inputs: Union[Iterable[TUnflatten], Iterator[TUnflatten]],
original_inputs: Iterable[Iterable[Any]]) -> Iterator[list[TUnflatten]]:
def unflatten(
flat_inputs: Union[Iterable[TUnflatten], Iterator[TUnflatten]],
original_inputs: Iterable[Iterable[Any]],
) -> Iterator[list[TUnflatten]]:
"""Unflattens a flattened iterable according to the original iterable's structure."""
flat_inputs_iter = iter(flat_inputs)
for original_input in original_inputs:
Expand All @@ -84,9 +93,11 @@ def unflatten(flat_inputs: Union[Iterable[TUnflatten], Iterator[TUnflatten]],
TFlatBatchedOutput = TypeVar('TFlatBatchedOutput')


def flat_batched_compute(input: Iterable[Iterable[TFlatBatchedInput]],
f: Callable[[list[TFlatBatchedInput]], Iterable[TFlatBatchedOutput]],
batch_size: int) -> Iterable[Iterable[TFlatBatchedOutput]]:
def flat_batched_compute(
input: Iterable[Iterable[TFlatBatchedInput]],
f: Callable[[list[TFlatBatchedInput]], Iterable[TFlatBatchedOutput]],
batch_size: int,
) -> Iterable[Iterable[TFlatBatchedOutput]]:
"""Flatten the input, batched call f, and return the output unflattened."""
# Tee the input so we can use it twice for the input and output shapes.
input_1, input_2 = itertools.tee(input, 2)
Expand Down
17 changes: 7 additions & 10 deletions lilac/batch_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@ def f(inputs: Iterable[int]) -> list[int]:


def test_batched_compute_np() -> None:
input = [[np.array([1, 1])], [np.array([2, 2]), np.array([3, 3])],
[np.array([4, 4]), np.array([5, 5])]]
input = [
[np.array([1, 1])],
[np.array([2, 2]), np.array([3, 3])],
[np.array([4, 4]), np.array([5, 5])],
]
batch_size = 2 # Does not evenly split any input

def f(inputs: Iterable[np.ndarray]) -> Iterable[float]:
Expand Down Expand Up @@ -53,10 +56,7 @@ def test_deep_flatten_primitive() -> None:


def test_deep_flatten_np() -> None:
input = [
[np.array([1, 1])],
[np.array([2, 2]), np.array([3, 3])],
]
input = [[np.array([1, 1])], [np.array([2, 2]), np.array([3, 3])]]
result = list(deep_flatten(input))

assert len(result) == 3
Expand Down Expand Up @@ -95,10 +95,7 @@ def test_deep_unflatten_primitive_list() -> None:


def test_deep_unflatten_np() -> None:
input = [
[np.array([1, 1])],
[np.array([2, 2]), np.array([3, 3])],
]
input = [[np.array([1, 1])], [np.array([2, 2]), np.array([3, 3])]]
result = list(deep_unflatten(deep_flatten(input), input))

assert len(result) == 2
Expand Down
60 changes: 38 additions & 22 deletions lilac/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,22 @@
'--host',
help='The host address where the web server will listen to.',
default='127.0.0.1',
type=str)
type=str,
)
@click.option('--port', help='The port number of the web-server', type=int, default=5432)
@click.option(
'--load',
help='Load from the project config upon bootup.',
type=bool,
is_flag=True,
default=False)
'--load', help='Load from the project config upon bootup.', type=bool, is_flag=True, default=False
)
def start(project_dir: str, host: str, port: int, load: bool) -> None:
"""Starts the Lilac web server."""
project_dir = project_dir_from_args(project_dir)
if not dir_is_project(project_dir):
value = str(
click.prompt(
f'Lilac will create a project in `{abspath(project_dir)}`. Do you want to continue? (y/n)',
type=str)).lower()
type=str,
)
).lower()
if value == 'n':
exit()

Expand All @@ -52,7 +52,9 @@ def init_command(project_dir: str) -> None:
value = str(
click.prompt(
f'Lilac will create a project in `{abspath(project_dir)}`. Do you want to continue? (y/n)',
type=str)).lower()
type=str,
)
).lower()
if value == 'n':
exit()

Expand All @@ -66,20 +68,23 @@ def init_command(project_dir: str) -> None:
type=str,
help='[Optional] The path to a json or yml file describing the configuration. '
'The file contents should be an instance of `lilac.Config` or `lilac.DatasetConfig`. '
'When not defined, uses `LILAC_PROJECT_DIR`/lilac.yml.')
'When not defined, uses `LILAC_PROJECT_DIR`/lilac.yml.',
)
@click.option(
'--overwrite',
help='When True, runs all data from scratch, overwriting existing data. When false, only'
'load new datasets, embeddings, and signals.',
type=bool,
is_flag=True,
default=False)
default=False,
)
def load_command(project_dir: str, config_path: str, overwrite: bool) -> None:
"""Load from a project configuration."""
project_dir = project_dir or get_project_dir()
if not project_dir:
raise ValueError(
'--project_dir or the environment variable `LILAC_PROJECT_DIR` must be defined.')
'--project_dir or the environment variable `LILAC_PROJECT_DIR` must be defined.'
)

load(project_dir, config_path, overwrite)

Expand All @@ -100,59 +105,69 @@ def hf_docker_start_command() -> None:
@click.option(
'--project_dir',
help='The project directory to use for the demo. Defaults to `env.LILAC_PROJECT_DIR`.',
type=str)
type=str,
)
@click.option(
'--hf_space',
help='The huggingface space. Should be formatted like `SPACE_ORG/SPACE_NAME`.',
type=str,
required=True)
required=True,
)
@click.option('--dataset', help='The name of a dataset to upload', type=str, multiple=True)
@click.option(
'--make_datasets_public',
help='When true, sets the huggingface datasets uploaded to public. Defaults to false.',
is_flag=True,
default=False)
default=False,
)
@click.option(
'--concept',
help='The name of a concept to upload. By default all lilac/ concepts are uploaded.',
type=str,
multiple=True)
multiple=True,
)
@click.option(
'--skip_cache',
help='Skip uploading the cache files from .cache/lilac which contain cached concept pkl models.',
type=bool,
is_flag=True,
default=False)
default=False,
)
@click.option(
'--skip_data_upload',
help='When true, only uploads the wheel files without any other changes.',
is_flag=True,
default=False)
default=False,
)
@click.option(
'--create_space',
help='When True, creates the HuggingFace space if it doesnt exist. The space will be created '
'with the storage type defined by --hf_space_storage.',
is_flag=True,
default=False)
default=False,
)
@click.option(
'--load_on_space',
help='When True, loads the datasets from your project in the space and does not upload data. '
'NOTE: This could be expensive if your project config locally has embeddings as they will be '
'recomputed in HuggingFace.',
is_flag=True,
default=False)
default=False,
)
@click.option(
'--hf_space_storage',
help='If defined, sets the HuggingFace space persistent storage type. '
'NOTE: This only actually sets the space storage type when creating the space. '
'For more details, see https://huggingface.co/docs/hub/spaces-storage',
type=click.Choice(['small', 'medium', 'large'], case_sensitive=False),
default=None)
default=None,
)
@click.option(
'--hf_token',
help='The HuggingFace access token to use when making datasets private. '
'This can also be set via the `HF_ACCESS_TOKEN` environment flag.',
type=str)
type=str,
)
def deploy_project_command(
project_dir: str,
hf_space: str,
Expand Down Expand Up @@ -185,7 +200,8 @@ def deploy_project_command(
create_space=create_space,
load_on_space=load_on_space,
hf_space_storage=hf_space_storage,
hf_token=hf_token)
hf_token=hf_token,
)


@click.command()
Expand Down
8 changes: 1 addition & 7 deletions lilac/concepts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,4 @@
from .concept import Example, ExampleIn
from .db_concept import ConceptUpdate, DiskConceptDB, DiskConceptModelDB

__all__ = [
'DiskConceptDB',
'DiskConceptModelDB',
'Example',
'ExampleIn',
'ConceptUpdate',
]
__all__ = ['DiskConceptDB', 'DiskConceptModelDB', 'Example', 'ExampleIn', 'ConceptUpdate']
Loading

0 comments on commit 7046727

Please sign in to comment.