Adds docs and bibliography for hard monotonic.

CUNY-CL · Jun 9, 2024 · 020fe48 · 020fe48
1 parent e65e498
commit 020fe48
Show file tree

Hide file tree

Showing 3 changed files with 26 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -197,6 +197,12 @@ additional flags. Supported values for `--arch` are:
 -   `attentive_lstm`: This is an LSTM decoder with LSTM encoders (by default)
     and an attention mechanism. The initial hidden state is treated as a learned
     parameter.
+-   `hard_attention_lstm`: This is an LSTM encoder/decoder modeling generation
+    as a Markov process. By default, it assumes a non-monotonic progression over
+    the source string, but with `--enforce_monotonic` the model must progress 
+    over each source character in order. A non-zero value of
+    `--attention_context` (default: `0`) widens the context window for
+    conditioning state transitions to include one or more previous states.
 -   `lstm`: This is an LSTM decoder with LSTM encoders (by default); in lieu of
     an attention mechanism, the last non-padding hidden state of the encoder is
     concatenated with the decoder hidden state.

diff --git a/pyproject.toml b/pyproject.toml
@@ -29,6 +29,7 @@ keywords = [
 ]
 dependencies = [
     "maxwell >= 0.2.4",
+    "numpy >= 1.24.3",
     "pytorch-lightning >= 1.7.0, < 2.0.0",
     "torch >= 2.0.0",
     "wandb >= 0.15.3",

diff --git a/yoyodyne.bib b/yoyodyne.bib
@@ -1,11 +1,11 @@
-@inproceedings{Luong:15,
+@inproceedings{Luong:EtAl:15,
     author = {Luong, Minh-Thang and Pham, Hieu and Manning, Christopher D.},
     year = {2015},
     title = {Effective approaches to atention-based neural machine translation},
     booktitle = {Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing},
     pages = {1412–1421}}
 
-@inproceedings{Faruqui:16,
+@inproceedings{Faruqui:EtAl:16,
     author = {Faruqui, Manaal and Tsvetkov, Yulia and Neubig, Graham and Dyer, Chris},
     year =  {2016},
     title = {Morphological inflection generation using character sequence to sequence learning},
@@ -19,31 +19,44 @@ @inproceedings{Kann:Schuetze:16
     booktitle = {Proceedings of the 14th SIGMORPHON Workshop on Computational Research in Phonetics, Phonology, and Morphology},
     pages = {62-70}}
 
-@inproceedings{See:17,
+@inproceedings{See:EtAl:17,
     author = {See, Abigail and Liu, Peter J. and Manning, Christopher D.},
     year = {2017},
     title = {Get to the point: summarization with pointer-generator networks},
     booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
     pages = {1073-1083}}
 
-@inproceedings{Vaswani:17,
+@inproceedings{Vaswani:EtAl:17,
     author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N. and Kaiser, Łukasz and Polosukhin, Illia},
     year = {2017},
     title = {Attention is all you need},
     booktitle = {Advances in Neural Information Processing Systems 30},
     pages = {5998-6008}}
 
-@inproceedings{Sharma:18,
+@inproceedings{Sharma:EtAl:18,
     author = {Sharma, Abhishek and Katrapati, Ganesh and Sharma, Dipti Misra},
     year = {2018},
     title = {{IIT(BHU)-IIITH} at {CoNLL}--{SIGMORPHON} 2018 shared task on universal morphological reinflection},
     booktitle = {Proceedings of the {CoNLL}-–{SIGMORPHON 2018} Shared Task: Universal Morphological Reinflection},
     pages = {105-111}}
 
-@inproceedings{Wu:21,
+@inproceedings{Wu:Cotterell:19,
+    author = {Wu, Shijie and Cotterell, Ryan},
+    title = {Exact hard monotonic attention for character-level transduction},
+    booktitle = {Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics},
+    year = {2019},
+    pages = {1530-1537}}
+
+@inproceedings{Wu:EtAl:21,
     author = {Wu, Shijie and Cotterell, Ryan and Hulden, Mans},
     year = {2021},
     title = {Applying the transformer to character-level transductions},
     booktitle = {Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume},
     pages = {1901-1907}}
 
+@inproceedings{Wiemerslage:EtAl:24,
+    title = {Quantifying the hyperparameter sensitivity of neural networks for character-level sequence-to-sequence tasks},
+    author = {Wiemerslage, Adam and Gorman, Kyle and von der Wense, Katharina},
+    booktitle = {Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)},
+    year = {2024},
+    pages = {674-689}}