Skip to content

Commit

Permalink
Add publishing to pip (#460)
Browse files Browse the repository at this point in the history
  • Loading branch information
dsmilkov authored Jul 20, 2023
1 parent b762af7 commit 34db2e7
Show file tree
Hide file tree
Showing 124 changed files with 134 additions and 93 deletions.
2 changes: 1 addition & 1 deletion .coveragerc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[run]
source = src
source = lilac
omit =
*_test.py
*/__init__.py
4 changes: 2 additions & 2 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Add 'backend' label to any change within src/ but not web* folders
# Add 'backend' label to any change within lilac/ but not web* folders
'backend':
- any: ['src/**/*']
- any: ['lilac/**/*']

# Add 'frontend' label to any change to blueprint folder
'frontend':
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ on:
pull_request:
types: [opened, reopened, synchronize]
paths:
- 'src/**'
- 'lilac/**'
- '**/*.py'
- 'poetry.lock'
- 'poetry.toml'
Expand Down
6 changes: 3 additions & 3 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,14 @@
"git.confirmSync": false,
"git.autofetch": true,
"git.postCommitCommand": "sync",
"python.testing.pytestArgs": ["src"],
"python.testing.pytestArgs": ["lilac"],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"python.analysis.importFormat": "relative",
"python.analysis.indexing": true,
"python.analysis.useLibraryCodeForTypes": true,
"python.analysis.extraPaths": ["src"],
"python.analysis.include": ["src"],
"python.analysis.extraPaths": ["lilac"],
"python.analysis.include": ["lilac"],
"notebook.output.textLineLimit": 200,
"notebook.output.scrolling": true,
"notebook.lineNumbers": "on",
Expand Down
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@ COPY LICENSE .
COPY /web/blueprint/build ./web/blueprint/build

# Copy python files.
COPY /src ./src/
COPY /lilac ./lilac/

# Copy the data files. We use glob so docker copy won't fail if the directory doesn't exist.
COPY /dat[a] ./data/

CMD [ \
"gunicorn", "src.server:app", \
"gunicorn", "lilac.server:app", \
"--bind", "0.0.0.0:5432", \
"-k", "uvicorn.workers.UvicornWorker" \
]
11 changes: 10 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,15 @@ key = ''.join(random.choices(string.ascii_uppercase + string.digits, k=64))
print(f"LILAC_OAUTH_SECRET_KEY='{key}'")
```

### Publishing on pip

To authenticate, add the `PYPI_TOKEN` to your `.env.local` file. You can get the token from
[pypi.org](https://pypi.org/manage/project/lilacai/settings/). To publish, run:

```sh
./scripts/publish_pip.sh
```

### Configuration

To use various API's, API keys need to be provided. Create a file named `.env.local` in the root, and add variables that are listed in `.env` with your own values.
Expand Down Expand Up @@ -129,7 +138,7 @@ Test JavaScript:
Datasets can be ingested entirely from the UI, however if you prefer to use the CLI you can ingest data with the following command:

```sh
poetry run python -m src.data_loader \
poetry run python -m lilac.data_loader \
--dataset_name=$DATASET \
--output_dir=./data/ \
--config_path=./datasets/the_movies_dataset.json
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ def func(xs: Iterable[int]) -> Iterable[int]:
out = sparse_to_dense_compute(sparse_input, func)
assert list(out) == [None, 2, 8, None, None, 4, None, 6, None, None]


def test_sparse_to_dense_compute_batching() -> None:
sparse_input = iter([None, 1, 7, None, None, 3, None, 5, None, None])

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion src/data_loader.py → lilac/data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
To run the source loader as a binary directly:
poetry run python -m src.data_loader \
poetry run python -m lilac.data_loader \
--dataset_name=movies_dataset \
--output_dir=./data/ \
--config_path=./datasets/the_movies_dataset.json
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion src/make_openapi.py → lilac/make_openapi.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Writes the openapi.json file to the specified output.
This is meant to run as a standalone script. It lives in src/ so that we can import the FastAPI app.
This is meant to run as a standalone script. It lives in lilac/ so we can import the FastAPI app.
"""
import json

Expand Down
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion src/router_data_loader.py → lilac/router_data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
To run the source loader as a binary directly:
poetry run python -m src.datasets.loader \
poetry run python -m lilac.datasets.loader \
--dataset_name=$DATASET \
--output_dir=./data/ \
--config_path=./datasets/the_movies_dataset.json
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
20 changes: 6 additions & 14 deletions notebooks/API.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -12,28 +12,20 @@
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/dsmilkov/code/lilac/.venv/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Computing signal \"signal_name='concept_score' embedding='sbert' namespace='lilac' concept_name='legal-termination' draft='main' num_negative_examples=100\" took 0.280s.\n",
"Wrote signal manifest to ./data/datasets/local/legal-clauses/clause_text/sbert/embedding/lilac/legal-termination/v31/signal_manifest.json\n"
"Computing signal \"signal_name='concept_score' embedding='sbert' namespace='lilac' concept_name='legal-termination' draft='main' num_negative_examples=100\" took 0.339s.\n",
"Wrote signal manifest to ./data/datasets/local/legal-clauses/clause_text/sbert/embedding/lilac/legal-termination/v33/signal_manifest.json\n"
]
}
],
"source": [
"from src.db_manager import get_dataset, set_default_dataset_cls\n",
"from src.data.dataset_duckdb import DatasetDuckDB\n",
"from src.signals.concept_scorer import ConceptScoreSignal\n",
"from src.signals.default_signals import register_default_signals\n",
"from lilac.db_manager import get_dataset, set_default_dataset_cls\n",
"from lilac.data.dataset_duckdb import DatasetDuckDB\n",
"from lilac.signals.concept_scorer import ConceptScoreSignal\n",
"from lilac.signals.default_signals import register_default_signals\n",
"\n",
"register_default_signals()\n",
"set_default_dataset_cls(DatasetDuckDB)\n",
Expand Down
6 changes: 3 additions & 3 deletions notebooks/Sentiment.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@
}
],
"source": [
"from src.embeddings.openai import OpenAI\n",
"from lilac.embeddings.openai import OpenAI\n",
"\n",
"signal = OpenAI()\n",
"signal.setup()\n",
Expand Down Expand Up @@ -318,7 +318,7 @@
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.metrics import roc_auc_score, f1_score\n",
"from src.utils import DebugTimer\n",
"from lilac.utils import DebugTimer\n",
"\n",
"model = LogisticRegression(\n",
" class_weight='balanced', C=30, tol=1e-5, warm_start=True, max_iter=10_000)\n",
Expand Down Expand Up @@ -356,7 +356,7 @@
"metadata": {},
"outputs": [],
"source": [
"from src.concepts.concept import Concept, Example\n",
"from lilac.concepts.concept import Concept, Example\n",
"\n",
"\n",
"def save_concept(positive_sentiment):\n",
Expand Down
4 changes: 2 additions & 2 deletions notebooks/Toxicity.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@
"source": [
"from sklearn.metrics import roc_auc_score\n",
"\n",
"from src.utils import DebugTimer\n",
"from lilac.utils import DebugTimer\n",
"\n",
"SAMPLE_SIZE_PER_GROUP = 512\n",
"LABEL_TYPE = 'obscene'\n",
Expand Down Expand Up @@ -214,7 +214,7 @@
"metadata": {},
"outputs": [],
"source": [
"from src.concepts.concept import Concept, Example\n",
"from lilac.concepts.concept import Concept, Example\n",
"\n",
"data = {}\n",
"\n",
Expand Down
79 changes: 47 additions & 32 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 10 additions & 9 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
[tool.poetry]
authors = ["Lilac AI Inc. <[email protected]>"]
description = "Index your dataset"
name = "lilac"
packages = [{include = "src"}]
description = "Organize unstructured data"
license = "Apache-2.0"
name = "lilacai"
repository = "https://github.com/lilacai/lilac"
packages = [{include = "lilac"}]
readme = "README.md"
version = "0.0.1"
version = "0.0.6"

[tool.poetry.dependencies]

Expand All @@ -27,9 +29,9 @@ pillow = "^9.3.0" # Image processing.
psutil = "^5.9.5"
pyarrow = "^9.0.0"
pydantic = "^1.10.11"
python = "~3.9"
python = ">=3.9,<4.0"
python-dotenv = "^1.0.0"
requests = "^2.28.1"
requests = "^2"
scikit-learn = "^1.3.0"
tenacity = "^8.2.2"
tqdm = "^4.65.0"
Expand Down Expand Up @@ -64,7 +66,6 @@ regex = "^2023.6.3"
# For language detection.
langdetect = {version = "^1.0.9", optional = true}


[tool.poetry.extras]
all = [
"cohere",
Expand Down Expand Up @@ -137,11 +138,11 @@ exclude = [
"**/node_modules",
"**/__pycache__",
]
include = ["src"]
include = ["lilac"]

[tool.ruff]
line-length = 100
src = ["src"]
src = ["lilac"]

# Enable Pyflakes `F`, pycodestyle "W" and `E`, "Q" quotes, "I" imports.
fix = true
Expand Down
2 changes: 1 addition & 1 deletion pytest.ini
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Filter deprecation warnings from subpackages so they don't spam the console when testing.
[pytest]
testpaths =
src
lilac
filterwarnings =
ignore::DeprecationWarning:bokeh.*:
ignore::DeprecationWarning:google.api_core.*:
Expand Down
6 changes: 3 additions & 3 deletions run_server_dev.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@ npm run dev --workspace web/blueprint -- --open &
pid[2]=$!

# Run the node server.
poetry run uvicorn src.server:app --reload --port 5432 --host 0.0.0.0 \
--reload-dir src &
poetry run uvicorn lilac.server:app --reload --port 5432 --host 0.0.0.0 \
--reload-dir lilac &
pid[1]=$!

poetry run watchmedo shell-command \
--patterns="*.py" \
--recursive \
--command='poetry run python -m scripts.make_fastapi_client' \
./src &
./lilac &
pid[0]=$!

# When control+c is pressed, kill all process ids.
Expand Down
Loading

0 comments on commit 34db2e7

Please sign in to comment.