Skip to content

Commit

Permalink
Release v1.0.0 (#272)
Browse files Browse the repository at this point in the history
* ✨ Add min_count parameter to the base_processor.
* ✨ Add `disable_auto_summary` config.
* ✨ Allows custom average types, logs to an array for easy access to the last epoch.
* 💥 Introducing breaking changes, rename package name to kashgari.
* 🔊 Adding migration guide.
  • Loading branch information
BrikerMan authored Oct 18, 2019
1 parent 7d7c819 commit 822169e
Show file tree
Hide file tree
Showing 13 changed files with 190 additions and 35 deletions.
18 changes: 9 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,17 +73,17 @@ There are also articles and posts that illustrate how to use Kashgari:

### Requirements and Installation

🎉🎉🎉 We renamed the tf.keras version as **kashgari-tf** 🎉🎉🎉
🎉🎉🎉 We renamed again for consistency and clarity. From now on, it is all `kashgari`. 🎉🎉🎉

The project is based on TensorFlow 1.14.0 and Python 3.6+, because it is 2019 and type hinting is cool.
The project is based on Python 3.6+, because it is 2019 and type hinting is cool.

```bash
pip install kashgari-tf
# CPU
pip install tensorflow==1.14.0
# GPU
pip install tensorflow-gpu==1.14.0
```
| Backend | pypi version | desc |
| ---------------- | -------------------------------------- | --------------- |
| TensorFlow 2.x | `pip install 'kashgari>=2.0.0'` | coming soon |
| TensorFlow 1.14+ | `pip install 'kashgari>=1.0.0,<2.0.0'` | current version |
| Keras | `pip install 'kashgari<1.0.0'` | legacy version |

[Find more info about the name changing.](https://github.com/BrikerMan/Kashgari/releases/tag/v1.0.0)

### Example Usage

Expand Down
4 changes: 3 additions & 1 deletion kashgari/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,6 @@
from kashgari import utils
from kashgari import callbacks

import tensorflow as tf
from kashgari import migeration

migeration.show_migration_guide()
14 changes: 6 additions & 8 deletions kashgari/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@

class EvalCallBack(keras.callbacks.Callback):

def __init__(self, kash_model: BaseModel, valid_x, valid_y, step=5, batch_size=256):
def __init__(self, kash_model: BaseModel, valid_x, valid_y,
step=5, batch_size=256, average='weighted'):
"""
Evaluate callback, calculate precision, recall and f1
Args:
Expand All @@ -32,9 +33,8 @@ def __init__(self, kash_model: BaseModel, valid_x, valid_y, step=5, batch_size=2
self.valid_y = valid_y
self.step = step
self.batch_size = batch_size
self.logs = {}

self.average = 'weighted'
self.average = average
self.logs = []

def on_epoch_end(self, epoch, logs=None):
if (epoch + 1) % self.step == 0:
Expand All @@ -51,15 +51,13 @@ def on_epoch_end(self, epoch, logs=None):
recall = metrics.recall_score(y_true, y_pred, average=self.average)
f1 = metrics.f1_score(y_true, y_pred, average=self.average)

self.logs[epoch] = {
self.logs.append({
'precision': precision,
'recall': recall,
'f1': f1
}
})
print(f"\nepoch: {epoch} precision: {precision:.6f}, recall: {recall:.6f}, f1: {f1:.6f}")


if __name__ == "__main__":
print("Hello world")
config_path = '/Users/brikerman/Desktop/python/Kashgari/tests/test-data/bert/bert_config.json'
check_point_path = '/Users/brikerman/Desktop/python/Kashgari/tests/test-data/bert/bert_model.ckpt'
3 changes: 2 additions & 1 deletion kashgari/embeddings/gpt_2_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ def _build_model(self, **kwargs):
model = gpt2.load_trained_model_from_checkpoint(config_path,
checkpoint_path,
self.sequence_length)
model.summary()
if not kashgari.config.disable_auto_summary:
model.summary()
self.embed_model = model
# if self.token_count == 0:
# logging.debug('need to build after build_word2idx')
Expand Down
1 change: 1 addition & 0 deletions kashgari/macros.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class Config(object):

def __init__(self):
self._use_cudnn_cell = False
self.disable_auto_summary = False

if tf.test.is_gpu_available(cuda_only=True):
logging.warning("CUDA GPU available, you can set `kashgari.config.use_cudnn_cell = True` to use CuDNNCell. "
Expand Down
43 changes: 43 additions & 0 deletions kashgari/migeration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# encoding: utf-8

# author: BrikerMan
# contact: [email protected]
# blog: https://eliyar.biz

# file: migration.py
# time: 2:31 下午
import subprocess
import logging


guide = """
╭─────────────────────────────────────────────────────────────────────────╮
│ ◎ ○ ○ ░░░░░░░░░░░░░░░░░░░░░ Important Message ░░░░░░░░░░░░░░░░░░░░░░░░│
├─────────────────────────────────────────────────────────────────────────┤
│ │
│ We renamed again for consistency and clarity. │
│ From now on, it is all `kashgari`. │
│ Changelog: https://github.com/BrikerMan/Kashgari/releases/tag/v1.0.0 │
│ │
│ | Backend | pypi version | desc | │
│ | ---------------- | -------------- | -------------- | │
│ | TensorFlow 2.x | kashgari 2.x.x | coming soon | │
│ | TensorFlow 1.14+ | kashgari 1.x.x | | │
│ | Keras | kashgari 0.x.x | legacy version | │
│ │
╰─────────────────────────────────────────────────────────────────────────╯
"""


def show_migration_guide():
requirements = subprocess.getoutput("pip freeze")
for package in requirements.splitlines():
if '==' in package:
package_name, package_version = package.split('==')
if package_name == 'kashgari-tf':
logging.warning(guide)


if __name__ == "__main__":
show_migration_guide()
print("hello, world")
7 changes: 4 additions & 3 deletions kashgari/processors/base_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ def __init__(self, **kwargs):

self.sequence_length = kwargs.get('sequence_length', None)

self.min_count = kwargs.get('min_count', 3)

def info(self):
return {
'class_name': self.__class__.__name__,
Expand All @@ -65,13 +67,12 @@ def info(self):
def analyze_corpus(self,
corpus: Union[List[List[str]]],
labels: Union[List[List[str]], List[str]],
min_count: int = 3,
force: bool = False):
rec_len = sorted([len(seq) for seq in corpus])[int(0.95 * len(corpus))]
self.dataset_info['RECOMMEND_LEN'] = rec_len

if len(self.token2idx) == 0 or force:
self._build_token_dict(corpus, min_count)
self._build_token_dict(corpus, self.min_count)
if len(self.label2idx) == 0 or force:
self._build_label_dict(labels)

Expand Down Expand Up @@ -103,7 +104,7 @@ def _build_token_dict(self, corpus: List[List[str]], min_count: int = 3):
token2count = collections.OrderedDict(sorted_token2count)

for token, token_count in token2count.items():
if token not in token2idx and token_count > min_count:
if token not in token2idx and token_count >= min_count:
token2idx[token] = len(token2idx)

self.token2idx = token2idx
Expand Down
3 changes: 2 additions & 1 deletion kashgari/tasks/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,8 @@ def compile_model(self, **kwargs):
kwargs['metrics'] = ['accuracy']

self.tf_model.compile(**kwargs)
self.tf_model.summary()
if not kashgari.config.disable_auto_summary:
self.tf_model.summary()

def predict(self,
x_data,
Expand Down
2 changes: 1 addition & 1 deletion kashgari/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@
# file: __version__.py.py
# time: 2019-05-20 16:32

__version__ = '0.5.4'
__version__ = '1.0.0'
26 changes: 26 additions & 0 deletions mkdocs/docs/about/release-notes.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,31 @@ pip show kashgari-tf

## Current Release

### [1.0.0] -

Unfortunately, we have to change the package name for clarity and consistency. Here is the new naming sytle.

| Backend | pypi version | desc |
| ---------------- | -------------- | -------------- |
| TensorFlow 2.x | kashgari 2.x.x | coming soon |
| TensorFlow 1.14+ | kashgari 1.x.x | |
| Keras | kashgari 0.x.x | legacy version |

Here is how the existing versions changes

| Supported Backend | Kashgari Versions | Kahgsari-tf Version |
| ----------------- | ----------------- | ------------------- |
| TensorFlow 2.x | kashgari 2.x.x | - |
| TensorFlow 1.14+ | kashgari 1.0.1 | - |
| TensorFlow 1.14+ | kashgari 1.0.0 | 0.5.5 |
| TensorFlow 1.14+ | - | 0.5.4 |
| TensorFlow 1.14+ | - | 0.5.3 |
| TensorFlow 1.14+ | - | 0.5.2 |
| TensorFlow 1.14+ | - | 0.5.1 |
| Keras (legacy) | kashgari 0.2.6 | - |
| Keras (legacy) | kashgari 0.2.5 | - |
| Keras (legacy) | kashgari 0.x.x | - |

### [0.5.4] - 2019.09.30

- ✨ Add shuffle parameter to fit function ([#249])
Expand Down Expand Up @@ -113,6 +138,7 @@ pip show kashgari-tf
- fix classification model evaluate result output
- change test settings

[1.0.0]: https://github.com/BrikerMan/Kashgari/compare/v0.5.4...v1.0.0
[0.5.4]: https://github.com/BrikerMan/Kashgari/compare/v0.5.3...v0.5.4
[0.5.3]: https://github.com/BrikerMan/Kashgari/compare/v0.5.2...v0.5.3
[0.5.2]: https://github.com/BrikerMan/Kashgari/compare/v0.5.1...v0.5.2
Expand Down
18 changes: 9 additions & 9 deletions mkdocs/docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,17 +64,17 @@ There are also articles and posts that illustrate how to use Kashgari:

### Requirements and Installation

🎉🎉🎉 We renamed the tf.keras version as **kashgari-tf** 🎉🎉🎉
🎉🎉🎉 We renamed again for consistency and clarity. From now on, it is all `kashgari`. 🎉🎉🎉

The project is based on TenorFlow 1.14.0 and Python 3.6+, because it is 2019 and type hints is cool.
The project is based on Python 3.6+, because it is 2019 and type hinting is cool.

```bash
pip install kashgari-tf
# CPU
pip install tensorflow==1.14.0
# GPU
pip install tensorflow-gpu==1.14.0
```
| Backend | pypi version | desc |
| ---------------- | -------------------------------------- | --------------- |
| TensorFlow 2.x | `pip install 'kashgari>=2.0.0'` | coming soon |
| TensorFlow 1.14+ | `pip install 'kashgari>=1.0.0,<2.0.0'` | current version |
| Keras | `pip install 'kashgari<1.0.0'` | legacy version |

[Find more info about the name changing.](https://github.com/BrikerMan/Kashgari/releases/tag/v1.0.0)

### Example Usage

Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def find_version(*file_paths):
raise RuntimeError("Unable to find version string.")


__name__ = 'kashgari-tf'
__name__ = 'kashgari'
__author__ = "BrikerMan"
__copyright__ = "Copyright 2018, BrikerMan"
__credits__ = []
Expand Down Expand Up @@ -79,4 +79,4 @@ def find_version(*file_paths):
)

if __name__ == "__main__":
print("Hello world")
print("Hello world")
82 changes: 82 additions & 0 deletions setup.tf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#!/usr/bin/env python
# encoding: utf-8
"""
@author: BrikerMan
@contact: [email protected]
@blog: https://eliyar.biz
@version: 1.0
@license: Apache Licence
@file: setup.py
@time: 2019-01-24 16:42
"""
import codecs
import os
import pathlib
import re

from setuptools import find_packages, setup

HERE = pathlib.Path(__file__).parent


def read(*parts):
with codecs.open(os.path.join(HERE, *parts), 'r') as fp:
return fp.read()


def find_version(*file_paths):
version_file = read(*file_paths)
version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]",
version_file, re.M)
if version_match:
return version_match.group(1)
raise RuntimeError("Unable to find version string.")


__name__ = 'kashgari-tf'
__author__ = "BrikerMan"
__copyright__ = "Copyright 2018, BrikerMan"
__credits__ = []
__license__ = "Apache License 2.0"
__maintainer__ = "BrikerMan"
__email__ = "[email protected]"

__url__ = 'https://github.com/BrikerMan/Kashgari'
__description__ = 'Simple, Keras-powered multilingual NLP framework,' \
' allows you to build your models in 5 minutes for named entity recognition (NER),' \
' part-of-speech tagging (PoS) and text classification tasks. ' \
'Includes BERT, GPT-2 and word2vec embedding.'

__version__ = '0.5.5'
README = (HERE / "README.md").read_text(encoding='utf-8')

with codecs.open('requirements.txt', 'r', 'utf8') as reader:
install_requires = list(map(lambda x: x.strip(), reader.readlines()))

setup(
name=__name__,
version=__version__,
description=__description__,
python_requires='>3.6',
long_description=README,
long_description_content_type="text/markdown",
author=__author__,
author_email=__email__,
url=__url__,
packages=find_packages(exclude=('tests',)),
install_requires=install_requires,
include_package_data=True,
license=__license__,
classifiers=[
'License :: OSI Approved :: Apache Software License',
# 'Programming Language :: Python',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: Implementation :: CPython',
'Programming Language :: Python :: Implementation :: PyPy'
],
)

if __name__ == "__main__":
print("Hello world")

0 comments on commit 822169e

Please sign in to comment.