diff --git a/README.md b/README.md index 9106a96..4328491 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ This repository contains an unofficial `CodeBLEU` implementation that supports `Linux`, `MacOS` (incl. M-series) and `Windows`. It is available through `PyPI` and the `evaluate` library. -Available for: `Python`, `C`, `C#`, `C++`, `Java`, `JavaScript`, `PHP`, `Go`, `Ruby`. +Available for: `Python`, `C`, `C#`, `C++`, `Java`, `JavaScript`, `PHP`, `Go`, `Ruby`, `Rust`. --- diff --git a/evaluate_app/README.md b/evaluate_app/README.md index b99d0cc..90a6f15 100644 --- a/evaluate_app/README.md +++ b/evaluate_app/README.md @@ -16,7 +16,7 @@ pinned: false This repository contains an unofficial `CodeBLEU` implementation that supports `Linux`, `MacOS` and `Windows`. It is available through `PyPI` and the `evaluate` library. -Available for: `Python`, `C`, `C#`, `C++`, `Java`, `JavaScript`, `PHP`, `Go`, `Ruby`. +Available for: `Python`, `C`, `C#`, `C++`, `Java`, `JavaScript`, `PHP`, `Go`, `Ruby`, `Rust`. --- diff --git a/evaluate_app/codebleu.py b/evaluate_app/codebleu.py index 7af0ccd..b7facbb 100644 --- a/evaluate_app/codebleu.py +++ b/evaluate_app/codebleu.py @@ -41,7 +41,7 @@ should be a string with tokens separated by spaces. references: list of reference for each prediction. Each reference should be a string with tokens separated by spaces. - language: programming language in ['java','js','c_sharp','php','c','python','cpp']. Please note that, due to the way Datasets works, the number of entities in the language array must match the number of entries in the predictions and references arrays, but only the first value from the languages array will be used. This means that you will not be able to compute a metric for different langauges at the same time, but mst do them as sequential calls to CodeBleu. + language: programming language in ['java','js','c_sharp','php','c','python','cpp','go','ruby','rust']. Please note that, due to the way Datasets works, the number of entities in the language array must match the number of entries in the predictions and references arrays, but only the first value from the languages array will be used. This means that you will not be able to compute a metric for different langauges at the same time, but mst do them as sequential calls to CodeBleu. weights: tuple of 4 floats to use as weights for scores. Defaults to (0.25, 0.25, 0.25, 0.25). Returns: codebleu: resulting `CodeBLEU` score, diff --git a/tests/test_codebleu.py b/tests/test_codebleu.py index 7a39cd0..65936a9 100644 --- a/tests/test_codebleu.py +++ b/tests/test_codebleu.py @@ -41,12 +41,13 @@ def test_exact_match_works_for_all_langs(lang: str) -> None: ("php", ["function foo ( x ) { return x }"], ["function bar ( y ) {\n return y\n}"]), ("go", ["func foo ( x ) { return x }"], ["func bar ( y ) {\n return y\n}"]), ("ruby", ["def foo ( x ) :\n return x"], ["def bar ( y ) :\n return y"]), + ("rust", ["fn foo ( x ) -> i32 { x }"], ["fn bar ( y ) -> i32 { y }"]), ], ) def test_simple_cases_work_for_all_langs(lang: str, predictions: List[Any], references: List[Any]) -> None: result = calc_codebleu(references, predictions, lang) logging.debug(result) - assert result["codebleu"] == pytest.approx(0.6, 0.05) + assert result["codebleu"] == pytest.approx(0.6, 0.1) def test_error_when_lang_not_supported() -> None: diff --git a/use.py b/use.py deleted file mode 100644 index 42f1552..0000000 --- a/use.py +++ /dev/null @@ -1,25 +0,0 @@ -from codebleu import calc_codebleu - -#prediction = "def add ( a , b ) :\n return a + b" -#reference = "def sum ( first , second ) :\n return second + first" -#result = calc_codebleu([reference], [prediction], lang="python", weights=(0.25, 0.25, 0.25, 0.25), tokenizer=None) -#print(result) - -# { -# 'codebleu': 0.5537, -# 'ngram_match_score': 0.1041, -# 'weighted_ngram_match_score': 0.1109, -# 'syntax_match_score': 1.0, -# 'dataflow_match_score': 1.0 -# } - -# prediction = "void add (int a ,int b ) {\n return a + b;}" -# reference = "void sum ( int first , int second ) {\n return second + first;}" -# result = calc_codebleu([reference], [prediction], lang="c", weights=(0.25, 0.25, 0.25, 0.25), tokenizer=None) -# print(result) - -prediction = "fn add ( a , b )->i8 {\n a + b}" -reference = "fn sum ( first , second )->i8 {\n second + first}" -result = calc_codebleu([reference], [prediction], lang="rust", weights=(0.25, 0.25, 0.25, 0.25), tokenizer=None) -print(result) -