-
Notifications
You must be signed in to change notification settings - Fork 96
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Check in the python and frontend code.
- Loading branch information
0 parents
commit 0ae41c0
Showing
114 changed files
with
24,201 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
[run] | ||
source = src | ||
omit = | ||
*_test.py | ||
*/__init__.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
{ | ||
"extends": [ | ||
"eslint:recommended", | ||
// https://khalilstemmler.com/blogs/typescript/eslint-for-typescript/ | ||
"plugin:@typescript-eslint/eslint-recommended", | ||
"plugin:@typescript-eslint/recommended", | ||
"prettier" | ||
], | ||
"parser": "@babel/eslint-parser", | ||
"plugins": ["prettier"], | ||
"rules": { | ||
"prettier/prettier": ["error"], | ||
"max-len": ["error", {"code": 100}] | ||
}, | ||
"ignorePatterns": ["**/node_modules/**/*", "**/dist/**/*"], | ||
"parserOptions": { | ||
"requireConfigFile": false | ||
}, | ||
"overrides": [ | ||
{ | ||
"files": ["*.ts", "*.tsx"], | ||
"parser": "@typescript-eslint/parser", | ||
"plugins": ["@typescript-eslint"], | ||
// See https://github.com/typescript-eslint/typescript-eslint/blob/main/docs/linting/TROUBLESHOOTING.md#i-get-errors-from-the-no-undef-rule-about-global-variables-not-being-defined-even-though-there-are-no-typescript-errors | ||
"rules": { | ||
"no-undef": "off", | ||
"@typescript-eslint/no-non-null-assertion": "off" | ||
} | ||
} | ||
], | ||
"env": { | ||
"browser": true, | ||
"node": true | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
on: | ||
pull_request: | ||
types: [opened, reopened, synchronize] | ||
|
||
jobs: | ||
build: | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- uses: actions/checkout@v3 | ||
- name: Install poetry | ||
run: pipx install poetry | ||
- uses: actions/setup-python@v4 | ||
with: | ||
python-version: '3.9.13' | ||
cache: 'poetry' | ||
- name: Install dependencies | ||
run: | | ||
./scripts/setup_py.sh | ||
- name: Lint python | ||
run: | | ||
poetry run ./scripts/lint_py.sh | ||
- name: Test python | ||
run: | | ||
poetry run ./scripts/test_py.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
on: | ||
pull_request: | ||
types: [opened, reopened, synchronize] | ||
|
||
jobs: | ||
build: | ||
runs-on: ubuntu-latest | ||
strategy: | ||
matrix: | ||
node-version: [16.x] | ||
|
||
steps: | ||
- uses: actions/checkout@v3 | ||
- name: Use Node.js ${{ matrix.node-version }} | ||
uses: actions/setup-node@v3 | ||
with: | ||
node-version: ${{ matrix.node-version }} | ||
cache: 'npm' | ||
cache-dependency-path: | | ||
package-lock.json | ||
server/package-lock.json | ||
web/package-lock.json | ||
- name: Install dependencies | ||
run: | | ||
./scripts/setup_ts.sh | ||
- name: Lint and build typescript | ||
run: | | ||
./scripts/lint_ts.sh | ||
- name: 'Test typescript' | ||
run: | | ||
./scripts/test_ts.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# Python. | ||
.mypy_cache/ | ||
.venv/ | ||
__pycache__/ | ||
.coverage | ||
cloned_repos/ | ||
.pytest_cache/ | ||
py_coverage_html/ | ||
*.deps.txt | ||
|
||
# Mac OS. | ||
.DS_Store | ||
|
||
# Node. | ||
node_modules/ | ||
dist/ | ||
|
||
# Data. | ||
gcs_cache/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{ | ||
"singleQuote": true, | ||
"printWidth": 100, | ||
"bracketSpacing": false | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
3.9.13 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
[style] | ||
based_on_style = google | ||
column_limit = 100 | ||
indent_width = 2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
{ | ||
// See https://go.microsoft.com/fwlink/?LinkId=827846 to learn about workspace recommendations. | ||
// Extension identifier format: ${publisher}.${name}. Example: vscode.csharp | ||
// List of extensions which should be recommended for users of this workspace. | ||
"recommendations": [ | ||
"ms-python.python", | ||
"ms-python.vscode-pylance", | ||
"GitHub.vscode-pull-request-github", | ||
"esbenp.prettier-vscode", | ||
"dbaeumer.vscode-eslint", | ||
"clinyong.vscode-css-modules", | ||
"csstools.postcss", | ||
"bungcip.better-toml", | ||
"breadnaught.vscode-ignore", | ||
"MS-vsliveshare.vsliveshare", | ||
"charliermarsh.ruff" | ||
], | ||
// List of extensions recommended by VS Code that should not be recommended for users of this workspace. | ||
"unwantedRecommendations": [] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
{ | ||
"search.exclude": { | ||
"**/node_modules": true, | ||
"**/.venv/": true, | ||
"**/.mypy_cache/": true, | ||
"**/dist/": true, | ||
"**/yarn.lock": true, | ||
"**/poetry.lock": true, | ||
"**/package-lock.json": true | ||
}, | ||
"files.trimTrailingWhitespace": true, | ||
"editor.formatOnSave": true, | ||
"editor.tabSize": 2, | ||
"editor.insertSpaces": true, | ||
"editor.rulers": [100], | ||
"files.insertFinalNewline": true, | ||
"editor.detectIndentation": false, | ||
"editor.wrappingIndent": "none", | ||
"files.exclude": { | ||
"**/__pycache__": true, | ||
"**/.venv/": true, | ||
"**/.mypy_cache/": true, | ||
"**/node_modules": true | ||
}, | ||
"typescript.tsdk": "./node_modules/typescript/lib", | ||
"[typescript]": { | ||
"editor.defaultFormatter": "esbenp.prettier-vscode" | ||
}, | ||
"[typescriptreact]": { | ||
"editor.defaultFormatter": "esbenp.prettier-vscode" | ||
}, | ||
"[html]": { | ||
"editor.defaultFormatter": "esbenp.prettier-vscode" | ||
}, | ||
"[json]": { | ||
"editor.defaultFormatter": "esbenp.prettier-vscode" | ||
}, | ||
"[jsonc]": { | ||
"editor.defaultFormatter": "esbenp.prettier-vscode" | ||
}, | ||
"[python]": { | ||
"editor.formatOnSave": true, | ||
"editor.codeActionsOnSave": { | ||
"source.fixAll": true, | ||
"source.organizeImports": true | ||
} | ||
}, | ||
"eslint.workingDirectories": ["auto"], | ||
"eslint.validate": ["typescript"], | ||
"python.envFile": "${workspaceFolder}/.venv", | ||
"python.linting.mypyEnabled": true, | ||
"python.formatting.provider": "yapf", | ||
"python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python", | ||
"git.enableSmartCommit": true, | ||
"git.confirmSync": false, | ||
"git.autofetch": true, | ||
"git.postCommitCommand": "sync", | ||
"python.testing.pytestArgs": ["src"], | ||
"python.testing.unittestEnabled": false, | ||
"python.testing.pytestEnabled": true, | ||
"python.analysis.importFormat": "relative", | ||
"python.analysis.indexing": true, | ||
"python.analysis.useLibraryCodeForTypes": true, | ||
"python.analysis.extraPaths": ["src"], | ||
"python.analysis.include": ["src"] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
# Lilac | ||
|
||
### Dev setup | ||
|
||
Before you start developing, install the following tools: | ||
|
||
- [Install XCode and sign license](https://apps.apple.com/us/app/xcode/id497799835?mt=12) | ||
- [XCode command line tools](https://mac.install.guide/commandlinetools/4.html) (MacOS) | ||
- [homebrew](https://brew.sh/) (MacOS) | ||
- [pyenv](https://github.com/pyenv/pyenv) (Python version management) | ||
- [Current python version](./.python-version) | ||
- [Python Poetry](https://pypi.org/project/poetry/) | ||
- [GitHub CLI](https://cli.github.com/) | ||
- [VSCode](https://code.visualstudio.com/) | ||
- [DuckDB CLI](https://duckdb.org/docs/installation/index) | ||
- [Refined Github extension](https://github.com/refined-github/refined-github) | ||
|
||
### Setup environment | ||
|
||
```sh | ||
./scripts/setup.sh | ||
``` | ||
|
||
### Source | ||
|
||
The source ingests user data and converts it to parquet files. | ||
|
||
#### Ingesting data | ||
|
||
To run the `source` locally as a binary: | ||
|
||
```sh | ||
poetry run python -m src.datasets.dataset_loader \ | ||
--dataset_name=$DATASET \ | ||
--output_dir=./gcs_cache/ \ | ||
--config_path=./datasets/the_movies_dataset.json | ||
``` | ||
|
||
### Web Server | ||
|
||
#### Development | ||
|
||
To run the web server in dev mode with fast edit-refresh: | ||
|
||
```sh | ||
./run_server_dev.sh | ||
``` | ||
|
||
#### Testing | ||
|
||
Run all the presubmits: | ||
|
||
```sh | ||
./scripts/presubmit.sh | ||
``` | ||
|
||
Test python: | ||
|
||
```sh | ||
./scripts/test_py.sh | ||
``` | ||
|
||
Test JavaScript: | ||
|
||
```sh | ||
./scripts/test_ts.sh | ||
``` | ||
|
||
### Troubleshooting | ||
|
||
#### pyenv install not working on M1 | ||
|
||
If your pyenv does not work on M1 machines after installing xcode, you may need to reinstall xcode command line tools. [Stack Overflow Link](https://stackoverflow.com/questions/65778888/pyenv-configure-error-c-compiler-cannot-create-executables) | ||
|
||
#### No module named `_lzma` | ||
|
||
Follow instructions from [pyenv](https://github.com/pyenv/pyenv/wiki#suggested-build-environment): | ||
|
||
- Uninstall python via `pyenv uninstall` | ||
- Run `brew install openssl readline sqlite3 xz zlib tcl-tk` | ||
- Reinstall python via `pyenv install` | ||
|
||
```sh | ||
$ sudo rm -rf /Library/Developer/CommandLineTools | ||
$ xcode-select --install | ||
``` | ||
|
||
#### Installing TensorFlow on M1 | ||
|
||
M1/M2 chips need a special TF installation. These steps are taken from the official | ||
[Apple docs](https://developer.apple.com/metal/tensorflow-plugin/): | ||
|
||
1. Click [here](https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh) to download Conda env | ||
2. Run: | ||
|
||
``` | ||
chmod +x ~/Downloads/Miniforge3-MacOSX-arm64.sh | ||
sh ~/Downloads/Miniforge3-MacOSX-arm64.sh | ||
source ~/miniforge3/bin/activate | ||
``` | ||
|
||
3. Install the TensorFlow `2.9.0` dependencies: `conda install -c apple tensorflow-deps=2.9.0` | ||
|
||
#### Too many open files on MacOS | ||
|
||
When downloading and pre-processing TFDS datasets, you might get `too many open files` | ||
error. To fix, increase [the max open files limit](https://superuser.com/a/1679740). | ||
|
||
### Relevant projects | ||
|
||
- [Voxel51](https://voxel51.com/docs/fiftyone/) | ||
- Open-source tool for visualizing datasets. | ||
- [Fastdup](https://github.com/visual-layer/fastdup) | ||
- Easily manage, clean & curate Visual Data | ||
- Can scale to 400M images (low Cost: can process 12M images on a $1 cloud machine budget) | ||
- Enterprise version coming soon: https://www.visual-layer.com/ | ||
- [Scale Nucleus](https://scale.com/nucleus) | ||
- Allows people to upload ML data, label it via UI or by sending it to a pool of labelers. Allows people to train a model and analyze the performance. Nucleus is one of their products for browsing data. Valuation is around $10B! | ||
- [Labelbox](https://labelbox.com/) | ||
- Another largish company, similar to Scale. $190M in funding. | ||
- [Deepnote](https://deepnote.com/) | ||
- Python notebook for data. Integrates with BigQuery, GCS, Snowflake, MySQL, Postgre, and another 20+ sources. | ||
- [Tadviewer](https://www.tadviewer.com/) | ||
- Open source desktop app for browsing CSV, Parquet, SQLite and DuckDB data. | ||
- [Datasette](https://datasette.io/) | ||
- Open source. Similar to Tadviewer. See [a related twitter thread](https://twitter.com/simonw/status/1572285367382061057?s=46&t=6Rc-qn2_pufUx7hwG7z_PQ) from the creator. | ||
- [VisiData](https://www.visidata.org/) | ||
- Open source. Similar to Tadviewer and Datasette, but for the terminal. | ||
- [Amazon Sagemaker Data Wrangler](https://aws.amazon.com/sagemaker/data-wrangler) | ||
- Build a pipeline for preparing data for ML. | ||
- [bit.io](https://bit.io/) | ||
- Drag and drop a file to get a Postgres database and a short link you can share with people. You can also paste a URL to data on the web. | ||
- [Top companies in data technology](https://www.valuer.ai/blog/top-companies-in-data-technology) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
{ | ||
"source_name": "csv", | ||
"filepaths": [ | ||
"gs://lilac-data-us-east1/datasets/csv_datasets/the_movies_dataset/the_movies_dataset.csv" | ||
] | ||
} |
Oops, something went wrong.