Document classification with PyTorch. This repository was made using the practicalAI boilerplate template.
cd src
virtualenv -p python3 venv
source venv/bin/activate
python3 setup.py develop
python3 -m pytest tests
gunicorn --log-level ERROR --workers 4 --timeout 60 --graceful-timeout 30 --bind 0.0.0.0:5000 --access-logfile - --error-logfile - --reload wsgi
tensorboard --logdir="tensorboard" --port=6006
docker build -t document-classification:latest -f Dockerfile .
docker run -d -p 5000:5000 --name document-classification document-classification:latest
- Training
POST /train
curl --request POST \
--url http://localhost:5000/document-classification/train \
--header "Content-Type: application/json" \
--data '{
"config_file": "training.json"
}'
- Inference
POST /predict
curl --request POST \
--url http://localhost:5000/document-classification/predict/latest \
--header "Content-Type: application/json" \
--data '{
"X": "Global warming is inevitables, scientists warn."
}'
- Python package
from api.utils import predict
X = "Global warming is inevitables, scientists warn."
prediction = predict(experiment_id="latest", X=X)["data"]["prediction"]
>>> print (prediction)
[{'y': 'Sci/Tech', 'probability': 0.6540133357048035}, {'y': 'Business', 'probability': 0.339420884847641}, {'y': 'World', 'probability': 0.003702996065840125}, {'y': 'Sports', 'probability': 0.002862769179046154}]
- Health check
GET /api
curl --request GET \
--url http://localhost:5000/document-classification
- Training
POST /train
curl --request POST \
--url http://localhost:5000/document-classification/train \
--header "Content-Type: application/json" \
--data '{
"config_file": "training.json"
}'
- Inference
POST /predict
curl --request POST \
--url http://localhost:5000/document-classification/predict/latest \
--header "Content-Type: application/json" \
--data '{
"X": "Global warming is inevitables, scientists warn."
}'
- List of experiments
GET /experiments
curl --request GET \
--url http://localhost:5000/document-classification/experiments
- Experiment info
GET /info/<experiment_id>
curl --request GET \
--url http://localhost:5000/document-classification/info
- Get classes for a model
GET /classes/<experiement_id>
curl --request GET \
--url http://localhost:5000/document-classification/classes
- Delete an experiment
GET /delete/<experiement_id>
curl --request GET \
--url http://localhost:5000/document-classification/delete/2019-03-14T01:05:49.989428_fafe6eb4-462f-11e9-bfe0-f0189887caab
document-classification/
βββ src/
| βββ api/ - holds all API scripts
| | βββ endpoints.py - API endpoint definitions
| | βββ utils.py - utility functions for endpoints
| βββ configs/ - configuration files
| | βββ logging.json - logger configuration
| | βββ training.json - training configuration
| βββ datasets/ - directory to hold datasets
| | βββ news.csv - data file
| βββ document_classification/ - ML files
| | βββ dataset.py - dataset
| | βββ model.py - model functions
| | βββ utils.py - utility functions
| | βββ vectorizer.py - vectorize the processed data
| | βββ vocabulary.py - vocabulary to vectorize data
| βββ tests/ - tests
| | βββ e2e/ - integration tests
| | βββ unit/ - unit tests
| βββ application.py - application script
| βββ config.py - application configuration
| βββ requirements.txt - python package requirements
| βββ setup.py - custom package setup
| βββ wsgi.py - application initialization
βββ .dockerignore - dockerignore file
βββ .gitignore - gitignore file
βββ Dockerfile - Dockerfile for the application
βββ CODE_OF_CONDUCT.md - code of conduct
βββ CODEOWNERS - code owner assignments
βββ LICENSE - license description
βββ README.md - repository readme