Skip to content

Commit

Permalink
Merge pull request #213 from ku-nlp/dev
Browse files Browse the repository at this point in the history
v2.4.0
  • Loading branch information
omukazu authored Jul 18, 2024
2 parents a95d045 + 7ded0e0 commit 4f60532
Show file tree
Hide file tree
Showing 57 changed files with 1,942 additions and 1,958 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
POETRY_PYTHON: ${{ steps.setup-python.outputs.python-path }}
run: |
poetry env use $POETRY_PYTHON
poetry install --no-interaction --without dev,test
poetry install --no-interaction --without dev,test --no-cache
- name: Build KWJA
run: |
poetry build
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
- name: Add path for Python packages
run: echo "$HOME/.local/bin" >> $GITHUB_PATH
- name: Install dependencies
run: poetry install --no-interaction --only main
run: poetry install --no-interaction --only main --no-cache
- name: Build package
run: poetry build

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
uses: actions/checkout@v4
- name: Create Release
id: create_release
uses: softprops/action-gh-release@v1
uses: softprops/action-gh-release@v2
if: startsWith(github.ref, 'refs/tags/')
with:
body: |
Expand Down
7 changes: 2 additions & 5 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,12 @@ jobs:
echo "$HOME/.local/bin" >> $GITHUB_PATH
- name: Install dependencies
run: |
poetry config virtualenvs.create false
poetry install --no-interaction --without dev
poetry install --no-interaction --without dev --no-cache
- name: Run tests
run: |
poetry run pytest --cov=./ --cov-report=xml
env:
XDG_CACHE_HOME: ${{ github.workspace }}/.cache
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ./coverage.xml
Expand Down
18 changes: 9 additions & 9 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,42 @@ default_language_version:
python: python3.10
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
rev: v4.6.0
hooks:
- id: end-of-file-fixer
- id: trailing-whitespace
- id: check-yaml
- id: check-toml
- repo: https://github.com/psf/black-pre-commit-mirror
rev: 23.11.0
rev: 24.4.0
hooks:
- id: black
- repo: https://github.com/PyCQA/flake8
rev: 6.1.0
rev: 7.0.0
hooks:
- id: flake8
additional_dependencies: [Flake8-pyproject]
- repo: https://github.com/PyCQA/isort
rev: 5.12.0
rev: 5.13.2
hooks:
- id: isort
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.7.0
rev: v1.9.0
hooks:
- id: mypy
additional_dependencies:
- rhoknp==1.6.0
- hydra-core==1.3.2
- torch==2.1.1
- torchmetrics==1.2.0
- transformers==4.34.1
- torch==2.2.0
- torchmetrics==1.3.0
- transformers==4.38.2
- tokenizers
- wandb
- typer
- types-PyYAML
- cohesion-tools==0.5.7
- repo: https://github.com/asottile/pyupgrade
rev: v3.15.0
rev: v3.15.2
hooks:
- id: pyupgrade
args:
Expand Down
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [v2.4.0] - 2024-07-18
### Added
- Introduce a special token to handle whitespaces as they are.

### Changed
- Set default value of num_beams to 1.
- Refactor seq2seq module to make it slightly faster.

### Removed
- Remove normalization of whitespaces to "␣".

## [v2.3.0] - 2024-02-01
### Added
- Support Python 3.12.
Expand Down
1 change: 1 addition & 0 deletions configs/char_module.debug.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ compile: ${oc.env:COMPILE,false}
ignore_hparams_on_save: false

# constants
special_tokens: [" "]
hparams_to_ignore_on_save:
- project
- work_dir
Expand Down
1 change: 1 addition & 0 deletions configs/char_module.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ compile: ${oc.env:COMPILE,false}
ignore_hparams_on_save: false

# constants
special_tokens: [" "]
hparams_to_ignore_on_save:
- project
- work_dir
Expand Down
1 change: 1 addition & 0 deletions configs/datamodule/base/char.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ denormalize_probability: ${denormalize_probability}
tokenizer:
_target_: transformers.AutoTokenizer.from_pretrained
pretrained_model_name_or_path: ${encoder.pretrained_model_name_or_path}
additional_special_tokens: ${special_tokens}
do_word_tokenize: false
_convert_: all
4 changes: 3 additions & 1 deletion configs/datamodule/predict/char_inference.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,7 @@ defaults:
- _self_

_target_: kwja.datamodule.datasets.CharInferenceDataset
texts: []
doc_id_prefix: null
# texts and raw_text_file are mutually exclusive
texts: []
raw_text_file: null
3 changes: 1 addition & 2 deletions configs/datamodule/predict/seq2seq_inference.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,4 @@ defaults:
- _self_

_target_: kwja.datamodule.datasets.Seq2SeqInferenceDataset
texts: []
doc_id_prefix: null
juman_file: null
2 changes: 2 additions & 0 deletions configs/datamodule/predict/typo_inference.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,6 @@ defaults:
- _self_

_target_: kwja.datamodule.datasets.TypoInferenceDataset
# texts and raw_text_file are mutually exclusive
texts: []
raw_text_file: null
6 changes: 3 additions & 3 deletions configs/eval.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ dependency_topk: 4
discourse_threshold: 0.0

# environment dependent settings
num_workers: -1
num_workers: ${oc.env:NUM_WORKERS,0}
devices: ${oc.env:DEVICES,1}
max_batches_per_device: 4
compile: false
max_batches_per_device: ${oc.env:MAX_BATCHES_PER_DEVICE,2}
compile: ${oc.env:COMPILE,false}
4 changes: 2 additions & 2 deletions configs/seq2seq_module.debug.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ do_predict_after_train: false
checkpoint_path: ""

# For decoding settings
use_forced_decoding: true
use_surf_forced_decoding: true
decoding:
max_length: ${max_tgt_length}
num_beams: 3
num_beams: 1

# set monitor and mode for early_stopping and model_checkpoint
monitor: valid/seq2seq_loss
Expand Down
4 changes: 2 additions & 2 deletions configs/seq2seq_module.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ do_predict_after_train: false
checkpoint_path: ""

# For decoding settings
use_forced_decoding: true
use_surf_forced_decoding: true
decoding:
max_length: ${max_tgt_length}
num_beams: 3
num_beams: 1

# set monitor and mode for early_stopping and model_checkpoint
monitor: valid/seq2seq_loss
Expand Down
2 changes: 1 addition & 1 deletion configs/typo_module.debug.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ compile: ${oc.env:COMPILE,false}
ignore_hparams_on_save: false

# constants
special_tokens: ["<k>", "<d>", "<_>", "<dummy>"]
special_tokens: ["<k>", "<d>", "<_>", "<dummy>", " "]
hparams_to_ignore_on_save:
- project
- work_dir
Expand Down
2 changes: 1 addition & 1 deletion configs/typo_module.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ compile: ${oc.env:COMPILE,false}
ignore_hparams_on_save: false

# constants
special_tokens: ["<k>", "<d>", "<_>", "<dummy>"]
special_tokens: ["<k>", "<d>", "<_>", "<dummy>", " "]
hparams_to_ignore_on_save:
- project
- work_dir
Expand Down
2 changes: 1 addition & 1 deletion configs/word_module.debug.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ compile: ${oc.env:COMPILE,false}
ignore_hparams_on_save: false

# constants
special_tokens: ["[著者]", "[読者]", "[不特定:人]", "[不特定:物]", "[NULL]", "[NA]", "[ROOT]"]
special_tokens: ["[著者]", "[読者]", "[不特定:人]", "[不特定:物]", "[NULL]", "[NA]", "[ROOT]", " "]
hparams_to_ignore_on_save:
- project
- work_dir
Expand Down
2 changes: 1 addition & 1 deletion configs/word_module.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ compile: ${oc.env:COMPILE,false}
ignore_hparams_on_save: false

# constants
special_tokens: ["[著者]", "[読者]", "[不特定:人]", "[不特定:物]", "[NULL]", "[NA]", "[ROOT]"]
special_tokens: ["[著者]", "[読者]", "[不特定:人]", "[不特定:物]", "[NULL]", "[NA]", "[ROOT]", " "]
hparams_to_ignore_on_save:
- project
- work_dir
Expand Down
Loading

0 comments on commit 4f60532

Please sign in to comment.