diff --git a/content/publication/acl-2017-association-linguistics/cite.bib b/content/publication/acl-2017-association-linguistics/cite.bib new file mode 100644 index 0000000..c47d1af --- /dev/null +++ b/content/publication/acl-2017-association-linguistics/cite.bib @@ -0,0 +1,11 @@ +@proceedings{acl-2017-association-linguistics, + address = {Vancouver, Canada}, + doi = {10.18653/v1/P17-2}, + editor = {Barzilay, Regina and +Kan, Min-Yen}, + month = {July}, + publisher = {Association for Computational Linguistics}, + title = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)}, + url = {https://aclanthology.org/P17-2000}, + year = {2017} +} diff --git a/content/publication/acl-2017-association-linguistics/index.md b/content/publication/acl-2017-association-linguistics/index.md new file mode 100644 index 0000000..b03138f --- /dev/null +++ b/content/publication/acl-2017-association-linguistics/index.md @@ -0,0 +1,16 @@ +--- +title: 'Proceedings of the 55th Annual Meeting of the Association for Computational + Linguistics (Volume 2: Short Papers)' +authors: +- Regina Barzilay +- Min-Yen Kan +date: '2017-07-01' +publishDate: '2024-07-11T07:40:56.389063Z' +publication_types: +- book +publication: '*Association for Computational Linguistics*' +doi: 10.18653/v1/P17-2 +links: +- name: URL + url: https://aclanthology.org/P17-2000 +--- diff --git a/content/publication/bhola-etal-2020-retrieving/cite.bib b/content/publication/bhola-etal-2020-retrieving/cite.bib new file mode 100644 index 0000000..d75806b --- /dev/null +++ b/content/publication/bhola-etal-2020-retrieving/cite.bib @@ -0,0 +1,19 @@ +@inproceedings{bhola-etal-2020-retrieving, + abstract = {We introduce a deep learning model to learn the set of enumerated job skills associated with a job description. In our analysis of a large-scale government job portal mycareersfuture.sg, we observe that as much as 65% of job descriptions miss describing a significant number of relevant skills. Our model addresses this task from the perspective of an extreme multi-label classification (XMLC) problem, where descriptions are the evidence for the binary relevance of thousands of individual skills. Building upon the current state-of-the-art language modeling approaches such as BERT, we show our XMLC method improves on an existing baseline solution by over 9% and 7% absolute improvements in terms of recall and normalized discounted cumulative gain. We further show that our approach effectively addresses the missing skills problem, and helps in recovering relevant skills that were missed out in the job postings by taking into account the structured semantic representation of skills and their co-occurrences through a Correlation Aware Bootstrapping process. We further show that our approach, to ensure the BERT-XMLC model accounts for structured semantic representation of skills and their co-occurrences through a Correlation Aware Bootstrapping process, effectively addresses the missing skills problem, and helps in recovering relevant skills that were missed out in the job postings. To facilitate future research and replication of our work, we have made the dataset and the implementation of our model publicly available.}, + address = {Barcelona, Spain (Online)}, + author = {Bhola, Akshay and +Halder, Kishaloy and +Prasad, Animesh and +Kan, Min-Yen}, + booktitle = {Proceedings of the 28th International Conference on Computational Linguistics}, + doi = {10.18653/v1/2020.coling-main.513}, + editor = {Scott, Donia and +Bel, Nuria and +Zong, Chengqing}, + month = {December}, + pages = {5832--5842}, + publisher = {International Committee on Computational Linguistics}, + title = {Retrieving Skills from Job Descriptions: A Language Model Based Extreme Multi-label Classification Framework}, + url = {https://aclanthology.org/2020.coling-main.513}, + year = {2020} +} diff --git a/content/publication/bhola-etal-2020-retrieving/index.md b/content/publication/bhola-etal-2020-retrieving/index.md new file mode 100644 index 0000000..7795f4c --- /dev/null +++ b/content/publication/bhola-etal-2020-retrieving/index.md @@ -0,0 +1,37 @@ +--- +title: 'Retrieving Skills from Job Descriptions: A Language Model Based Extreme Multi-label + Classification Framework' +authors: +- Akshay Bhola +- Kishaloy Halder +- Animesh Prasad +- Min-Yen Kan +date: '2020-12-01' +publishDate: '2024-07-11T07:40:56.291153Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 28th International Conference on Computational Linguistics*' +doi: 10.18653/v1/2020.coling-main.513 +abstract: We introduce a deep learning model to learn the set of enumerated job skills + associated with a job description. In our analysis of a large-scale government job + portal mycareersfuture.sg, we observe that as much as 65% of job descriptions miss + describing a significant number of relevant skills. Our model addresses this task + from the perspective of an extreme multi-label classification (XMLC) problem, where + descriptions are the evidence for the binary relevance of thousands of individual + skills. Building upon the current state-of-the-art language modeling approaches + such as BERT, we show our XMLC method improves on an existing baseline solution + by over 9% and 7% absolute improvements in terms of recall and normalized discounted + cumulative gain. We further show that our approach effectively addresses the missing + skills problem, and helps in recovering relevant skills that were missed out in + the job postings by taking into account the structured semantic representation of + skills and their co-occurrences through a Correlation Aware Bootstrapping process. + We further show that our approach, to ensure the BERT-XMLC model accounts for structured + semantic representation of skills and their co-occurrences through a Correlation + Aware Bootstrapping process, effectively addresses the missing skills problem, and + helps in recovering relevant skills that were missed out in the job postings. To + facilitate future research and replication of our work, we have made the dataset + and the implementation of our model publicly available. +links: +- name: URL + url: https://aclanthology.org/2020.coling-main.513 +--- diff --git a/content/publication/bird-etal-2008-acl/cite.bib b/content/publication/bird-etal-2008-acl/cite.bib new file mode 100644 index 0000000..77787d9 --- /dev/null +++ b/content/publication/bird-etal-2008-acl/cite.bib @@ -0,0 +1,27 @@ +@inproceedings{bird-etal-2008-acl, + abstract = {The ACL Anthology is a digital archive of conference and journal papers in natural language processing and computational linguistics. Its primary purpose is to serve as a reference repository of research results, but we believe that it can also be an object of study and a platform for research in its own right. We describe an enriched and standardized reference corpus derived from the ACL Anthology that can be used for research in scholarly document processing. This corpus, which we call the ACL Anthology Reference Corpus (ACL ARC), brings together the recent activities of a number of research groups around the world. Our goal is to make the corpus widely available, and to encourage other researchers to use it as a standard testbed for experiments in both bibliographic and bibliometric research.}, + address = {Marrakech, Morocco}, + author = {Bird, Steven and +Dale, Robert and +Dorr, Bonnie and +Gibson, Bryan and +Joseph, Mark and +Kan, Min-Yen and +Lee, Dongwon and +Powley, Brett and +Radev, Dragomir and +Tan, Yee Fan}, + booktitle = {Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08)}, + editor = {Calzolari, Nicoletta and +Choukri, Khalid and +Maegaard, Bente and +Mariani, Joseph and +Odijk, Jan and +Piperidis, Stelios and +Tapias, Daniel}, + month = {May}, + publisher = {European Language Resources Association (ELRA)}, + title = {The ACL Anthology Reference Corpus: A Reference Dataset for Bibliographic Research in Computational Linguistics}, + url = {http://www.lrec-conf.org/proceedings/lrec2008/pdf/445_paper.pdf}, + year = {2008} +} diff --git a/content/publication/bird-etal-2008-acl/index.md b/content/publication/bird-etal-2008-acl/index.md new file mode 100644 index 0000000..adc4baa --- /dev/null +++ b/content/publication/bird-etal-2008-acl/index.md @@ -0,0 +1,32 @@ +--- +title: 'The ACL Anthology Reference Corpus: A Reference Dataset for Bibliographic + Research in Computational Linguistics' +authors: +- Steven Bird +- Robert Dale +- Bonnie Dorr +- Bryan Gibson +- Mark Joseph +- Min-Yen Kan +- Dongwon Lee +- Brett Powley +- Dragomir Radev +- Yee Fan Tan +date: '2008-05-01' +publishDate: '2024-07-11T07:40:56.554775Z' +publication_types: +- paper-conference +publication: "*Proceedings of the Sixth International Conference on Language Resources + and Evaluation (LREC'08)*" +abstract: The ACL Anthology is a digital archive of conference and journal papers + in natural language processing and computational linguistics. Its primary purpose + is to serve as a reference repository of research results, but we believe that it + can also be an object of study and a platform for research in its own right. We + describe an enriched and standardized reference corpus derived from the ACL Anthology + that can be used for research in scholarly document processing. This corpus, which + we call the ACL Anthology Reference Corpus (ACL ARC), brings together the recent + activities of a number of research groups around the world. Our goal is to make + the corpus widely available, and to encourage other researchers to use it as a standard + testbed for experiments in both bibliographic and bibliometric research. +url_pdf: http://www.lrec-conf.org/proceedings/lrec2008/pdf/445_paper.pdf +--- diff --git a/content/publication/bysani-kan-2012-integrating/cite.bib b/content/publication/bysani-kan-2012-integrating/cite.bib new file mode 100644 index 0000000..b20d1a8 --- /dev/null +++ b/content/publication/bysani-kan-2012-integrating/cite.bib @@ -0,0 +1,13 @@ +@inproceedings{bysani-kan-2012-integrating, + address = {Jeju Island, Korea}, + author = {Bysani, Praveen and +Kan, Min-Yen}, + booktitle = {Proceedings of the ACL-2012 Special Workshop on Rediscovering 50 Years of Discoveries}, + editor = {Banchs, Rafael E.}, + month = {July}, + pages = {83--87}, + publisher = {Association for Computational Linguistics}, + title = {Integrating User-Generated Content in the ACL Anthology}, + url = {https://aclanthology.org/W12-3209}, + year = {2012} +} diff --git a/content/publication/bysani-kan-2012-integrating/index.md b/content/publication/bysani-kan-2012-integrating/index.md new file mode 100644 index 0000000..36ad406 --- /dev/null +++ b/content/publication/bysani-kan-2012-integrating/index.md @@ -0,0 +1,15 @@ +--- +title: Integrating User-Generated Content in the ACL Anthology +authors: +- Praveen Bysani +- Min-Yen Kan +date: '2012-07-01' +publishDate: '2024-07-11T07:40:56.444716Z' +publication_types: +- paper-conference +publication: '*Proceedings of the ACL-2012 Special Workshop on Rediscovering 50 Years + of Discoveries*' +links: +- name: URL + url: https://aclanthology.org/W12-3209 +--- diff --git a/content/publication/cao-etal-2020-expertise/cite.bib b/content/publication/cao-etal-2020-expertise/cite.bib new file mode 100644 index 0000000..5f265cc --- /dev/null +++ b/content/publication/cao-etal-2020-expertise/cite.bib @@ -0,0 +1,22 @@ +@inproceedings{cao-etal-2020-expertise, + abstract = {The curse of knowledge can impede communication between experts and laymen. We propose a new task of expertise style transfer and contribute a manually annotated dataset with the goal of alleviating such cognitive biases. Solving this task not only simplifies the professional language, but also improves the accuracy and expertise level of laymen descriptions using simple words. This is a challenging task, unaddressed in previous work, as it requires the models to have expert intelligence in order to modify text with a deep understanding of domain knowledge and structures. We establish the benchmark performance of five state-of-the-art models for style transfer and text simplification. The results demonstrate a significant gap between machine and human performance. We also discuss the challenges of automatic evaluation, to provide insights into future research directions. The dataset is publicly available at r̆lhttps://srhthu.github.io/expertise-style-transfer/.}, + address = {Online}, + author = {Cao, Yixin and +Shui, Ruihao and +Pan, Liangming and +Kan, Min-Yen and +Liu, Zhiyuan and +Chua, Tat-Seng}, + booktitle = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics}, + doi = {10.18653/v1/2020.acl-main.100}, + editor = {Jurafsky, Dan and +Chai, Joyce and +Schluter, Natalie and +Tetreault, Joel}, + month = {July}, + pages = {1061--1071}, + publisher = {Association for Computational Linguistics}, + title = {Expertise Style Transfer: A New Task Towards Better Communication between Experts and Laymen}, + url = {https://aclanthology.org/2020.acl-main.100}, + year = {2020} +} diff --git a/content/publication/cao-etal-2020-expertise/index.md b/content/publication/cao-etal-2020-expertise/index.md new file mode 100644 index 0000000..184bc62 --- /dev/null +++ b/content/publication/cao-etal-2020-expertise/index.md @@ -0,0 +1,33 @@ +--- +title: 'Expertise Style Transfer: A New Task Towards Better Communication between + Experts and Laymen' +authors: +- Yixin Cao +- Ruihao Shui +- Liangming Pan +- Min-Yen Kan +- Zhiyuan Liu +- Tat-Seng Chua +date: '2020-07-01' +publishDate: '2024-07-11T07:40:56.298355Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 58th Annual Meeting of the Association for Computational + Linguistics*' +doi: 10.18653/v1/2020.acl-main.100 +abstract: The curse of knowledge can impede communication between experts and laymen. + We propose a new task of expertise style transfer and contribute a manually annotated + dataset with the goal of alleviating such cognitive biases. Solving this task not + only simplifies the professional language, but also improves the accuracy and expertise + level of laymen descriptions using simple words. This is a challenging task, unaddressed + in previous work, as it requires the models to have expert intelligence in order + to modify text with a deep understanding of domain knowledge and structures. We + establish the benchmark performance of five state-of-the-art models for style transfer + and text simplification. The results demonstrate a significant gap between machine + and human performance. We also discuss the challenges of automatic evaluation, to + provide insights into future research directions. The dataset is publicly available + at r̆lhttps://srhthu.github.io/expertise-style-transfer/. +links: +- name: URL + url: https://aclanthology.org/2020.acl-main.100 +--- diff --git a/content/publication/chandrasekaran-kan-2018-countering/cite.bib b/content/publication/chandrasekaran-kan-2018-countering/cite.bib new file mode 100644 index 0000000..e564431 --- /dev/null +++ b/content/publication/chandrasekaran-kan-2018-countering/cite.bib @@ -0,0 +1,18 @@ +@inproceedings{chandrasekaran-kan-2018-countering, + abstract = {We systematically confirm that instructors are strongly influenced by the user interface presentation of Massive Online Open Course (MOOC) discussion forums. In a large scale dataset, we conclusively show that instructor interventions exhibit strong position bias, as measured by the position where the thread appeared on the user interface at the time of intervention. We measure and remove this bias, enabling unbiased statistical modelling and evaluation. We show that our de-biased classifier improves predicting interventions over the state-of-the-art on courses with sufficient number of interventions by 8.2% in F1 and 24.4% in recall on average.}, + address = {Melbourne, Australia}, + author = {Chandrasekaran, Muthu Kumar and +Kan, Min-Yen}, + booktitle = {Proceedings of the 5th Workshop on Natural Language Processing Techniques for Educational Applications}, + doi = {10.18653/v1/W18-3720}, + editor = {Tseng, Yuen-Hsien and +Chen, Hsin-Hsi and +Ng, Vincent and +Komachi, Mamoru}, + month = {July}, + pages = {135--142}, + publisher = {Association for Computational Linguistics}, + title = {Countering Position Bias in Instructor Interventions in MOOC Discussion Forums}, + url = {https://aclanthology.org/W18-3720}, + year = {2018} +} diff --git a/content/publication/chandrasekaran-kan-2018-countering/index.md b/content/publication/chandrasekaran-kan-2018-countering/index.md new file mode 100644 index 0000000..96700b9 --- /dev/null +++ b/content/publication/chandrasekaran-kan-2018-countering/index.md @@ -0,0 +1,24 @@ +--- +title: Countering Position Bias in Instructor Interventions in MOOC Discussion Forums +authors: +- Muthu Kumar Chandrasekaran +- Min-Yen Kan +date: '2018-07-01' +publishDate: '2024-07-11T07:40:56.347855Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 5th Workshop on Natural Language Processing Techniques + for Educational Applications*' +doi: 10.18653/v1/W18-3720 +abstract: We systematically confirm that instructors are strongly influenced by the + user interface presentation of Massive Online Open Course (MOOC) discussion forums. + In a large scale dataset, we conclusively show that instructor interventions exhibit + strong position bias, as measured by the position where the thread appeared on the + user interface at the time of intervention. We measure and remove this bias, enabling + unbiased statistical modelling and evaluation. We show that our de-biased classifier + improves predicting interventions over the state-of-the-art on courses with sufficient + number of interventions by 8.2% in F1 and 24.4% in recall on average. +links: +- name: URL + url: https://aclanthology.org/W18-3720 +--- diff --git a/content/publication/chen-etal-2015-interactive/cite.bib b/content/publication/chen-etal-2015-interactive/cite.bib new file mode 100644 index 0000000..3eb7c92 --- /dev/null +++ b/content/publication/chen-etal-2015-interactive/cite.bib @@ -0,0 +1,20 @@ +@inproceedings{chen-etal-2015-interactive, + address = {Beijing, China}, + author = {Chen, Tao and +Zheng, Naijia and +Zhao, Yue and +Chandrasekaran, Muthu Kumar and +Kan, Min-Yen}, + booktitle = {Proceedings of the 2nd Workshop on Natural Language Processing Techniques for Educational Applications}, + doi = {10.18653/v1/W15-4406}, + editor = {Chen, Hsin-Hsi and +Tseng, Yuen-Hsien and +Matsumoto, Yuji and +Wong, Lung Hsiang}, + month = {July}, + pages = {34--42}, + publisher = {Association for Computational Linguistics}, + title = {Interactive Second Language Learning from News Websites}, + url = {https://aclanthology.org/W15-4406}, + year = {2015} +} diff --git a/content/publication/chen-etal-2015-interactive/index.md b/content/publication/chen-etal-2015-interactive/index.md new file mode 100644 index 0000000..921265b --- /dev/null +++ b/content/publication/chen-etal-2015-interactive/index.md @@ -0,0 +1,19 @@ +--- +title: Interactive Second Language Learning from News Websites +authors: +- Tao Chen +- Naijia Zheng +- Yue Zhao +- Muthu Kumar Chandrasekaran +- Min-Yen Kan +date: '2015-07-01' +publishDate: '2024-07-11T07:40:56.401303Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 2nd Workshop on Natural Language Processing Techniques + for Educational Applications*' +doi: 10.18653/v1/W15-4406 +links: +- name: URL + url: https://aclanthology.org/W15-4406 +--- diff --git a/content/publication/councill-etal-2008-parscit/cite.bib b/content/publication/councill-etal-2008-parscit/cite.bib new file mode 100644 index 0000000..88d06a2 --- /dev/null +++ b/content/publication/councill-etal-2008-parscit/cite.bib @@ -0,0 +1,20 @@ +@inproceedings{councill-etal-2008-parscit, + abstract = {We describe ParsCit, a freely available, open-source implementation of a reference string parsing package. At the core of ParsCit is a trained conditional random field (CRF) model used to label the token sequences in the reference string. A heuristic model wraps this core with added functionality to identify reference strings from a plain text file, and to retrieve the citation contexts. The package comes with utilities to run it as a web service or as a standalone utility. We compare ParsCit on three distinct reference string datasets and show that it compares well with other previously published work.}, + address = {Marrakech, Morocco}, + author = {Councill, Isaac and +Giles, C. Lee and +Kan, Min-Yen}, + booktitle = {Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08)}, + editor = {Calzolari, Nicoletta and +Choukri, Khalid and +Maegaard, Bente and +Mariani, Joseph and +Odijk, Jan and +Piperidis, Stelios and +Tapias, Daniel}, + month = {May}, + publisher = {European Language Resources Association (ELRA)}, + title = {ParsCit: an Open-source CRF Reference String Parsing Package}, + url = {http://www.lrec-conf.org/proceedings/lrec2008/pdf/166_paper.pdf}, + year = {2008} +} diff --git a/content/publication/councill-etal-2008-parscit/index.md b/content/publication/councill-etal-2008-parscit/index.md new file mode 100644 index 0000000..1b346a9 --- /dev/null +++ b/content/publication/councill-etal-2008-parscit/index.md @@ -0,0 +1,22 @@ +--- +title: 'ParsCit: an Open-source CRF Reference String Parsing Package' +authors: +- Isaac Councill +- C. Lee Giles +- Min-Yen Kan +date: '2008-05-01' +publishDate: '2024-07-11T07:40:56.561520Z' +publication_types: +- paper-conference +publication: "*Proceedings of the Sixth International Conference on Language Resources + and Evaluation (LREC'08)*" +abstract: We describe ParsCit, a freely available, open-source implementation of a + reference string parsing package. At the core of ParsCit is a trained conditional + random field (CRF) model used to label the token sequences in the reference string. + A heuristic model wraps this core with added functionality to identify reference + strings from a plain text file, and to retrieve the citation contexts. The package + comes with utilities to run it as a web service or as a standalone utility. We compare + ParsCit on three distinct reference string datasets and show that it compares well + with other previously published work. +url_pdf: http://www.lrec-conf.org/proceedings/lrec2008/pdf/166_paper.pdf +--- diff --git a/content/publication/elmacioglu-etal-2007-psnus/cite.bib b/content/publication/elmacioglu-etal-2007-psnus/cite.bib new file mode 100644 index 0000000..e8215c0 --- /dev/null +++ b/content/publication/elmacioglu-etal-2007-psnus/cite.bib @@ -0,0 +1,18 @@ +@inproceedings{elmacioglu-etal-2007-psnus, + address = {Prague, Czech Republic}, + author = {Elmacioglu, Ergin and +Tan, Yee Fan and +Yan, Su and +Kan, Min-Yen and +Lee, Dongwon}, + booktitle = {Proceedings of the Fourth International Workshop on Semantic Evaluations (SemEval-2007)}, + editor = {Agirre, Eneko and +Màrquez, Lluı́s and +Wicentowski, Richard}, + month = {June}, + pages = {268--271}, + publisher = {Association for Computational Linguistics}, + title = {PSNUS: Web People Name Disambiguation by Simple Clustering with Rich Features}, + url = {https://aclanthology.org/S07-1058}, + year = {2007} +} diff --git a/content/publication/elmacioglu-etal-2007-psnus/index.md b/content/publication/elmacioglu-etal-2007-psnus/index.md new file mode 100644 index 0000000..f61cad8 --- /dev/null +++ b/content/publication/elmacioglu-etal-2007-psnus/index.md @@ -0,0 +1,18 @@ +--- +title: 'PSNUS: Web People Name Disambiguation by Simple Clustering with Rich Features' +authors: +- Ergin Elmacioglu +- Yee Fan Tan +- Su Yan +- Min-Yen Kan +- Dongwon Lee +date: '2007-06-01' +publishDate: '2024-07-11T07:40:56.579914Z' +publication_types: +- paper-conference +publication: '*Proceedings of the Fourth International Workshop on Semantic Evaluations + (SemEval-2007)*' +links: +- name: URL + url: https://aclanthology.org/S07-1058 +--- diff --git a/content/publication/gildea-etal-2018-acl/cite.bib b/content/publication/gildea-etal-2018-acl/cite.bib new file mode 100644 index 0000000..09ae4cf --- /dev/null +++ b/content/publication/gildea-etal-2018-acl/cite.bib @@ -0,0 +1,21 @@ +@inproceedings{gildea-etal-2018-acl, + abstract = {The Association of Computational Linguistic′s Anthology is the open source archive, and the main source for computational linguistics and natural language processing′s scientific literature. The ACL Anthology is currently maintained exclusively by community volunteers and has to be available and up-to-date at all times. We first discuss the current, open source approach used to achieve this, and then discuss how the planned use of Docker images will improve the Anthology′s long-term stability. This change will make it easier for researchers to utilize Anthology data for experimentation. We believe the ACL community can directly benefit from the extension-friendly architecture of the Anthology. We end by issuing an open challenge of reviewer matching we encourage the community to rally towards.}, + address = {Melbourne, Australia}, + author = {Gildea, Daniel and +Kan, Min-Yen and +Madnani, Nitin and +Teichmann, Christoph and +Villalba, Martı́n}, + booktitle = {Proceedings of Workshop for NLP Open Source Software (NLP-OSS)}, + doi = {10.18653/v1/W18-2504}, + editor = {Park, Eunjeong L. and +Hagiwara, Masato and +Milajevs, Dmitrijs and +Tan, Liling}, + month = {July}, + pages = {23--28}, + publisher = {Association for Computational Linguistics}, + title = {The ACL Anthology: Current State and Future Directions}, + url = {https://aclanthology.org/W18-2504}, + year = {2018} +} diff --git a/content/publication/gildea-etal-2018-acl/index.md b/content/publication/gildea-etal-2018-acl/index.md new file mode 100644 index 0000000..ebeaabc --- /dev/null +++ b/content/publication/gildea-etal-2018-acl/index.md @@ -0,0 +1,28 @@ +--- +title: 'The ACL Anthology: Current State and Future Directions' +authors: +- Daniel Gildea +- Min-Yen Kan +- Nitin Madnani +- Christoph Teichmann +- Martı́n Villalba +date: '2018-07-01' +publishDate: '2024-07-11T07:40:56.354411Z' +publication_types: +- paper-conference +publication: '*Proceedings of Workshop for NLP Open Source Software (NLP-OSS)*' +doi: 10.18653/v1/W18-2504 +abstract: The Association of Computational Linguistic′s Anthology is the open source + archive, and the main source for computational linguistics and natural language + processing′s scientific literature. The ACL Anthology is currently maintained exclusively + by community volunteers and has to be available and up-to-date at all times. We + first discuss the current, open source approach used to achieve this, and then discuss + how the planned use of Docker images will improve the Anthology′s long-term stability. + This change will make it easier for researchers to utilize Anthology data for experimentation. + We believe the ACL community can directly benefit from the extension-friendly architecture + of the Anthology. We end by issuing an open challenge of reviewer matching we encourage + the community to rally towards. +links: +- name: URL + url: https://aclanthology.org/W18-2504 +--- diff --git a/content/publication/halder-etal-2017-modeling/cite.bib b/content/publication/halder-etal-2017-modeling/cite.bib new file mode 100644 index 0000000..f6bdf6d --- /dev/null +++ b/content/publication/halder-etal-2017-modeling/cite.bib @@ -0,0 +1,18 @@ +@inproceedings{halder-etal-2017-modeling, + abstract = {Patients turn to Online Health Communities not only for information on specific conditions but also for emotional support. Previous research has indicated that the progression of emotional status can be studied through the linguistic patterns of an individual′s posts. We analyze a real-world dataset from the Mental Health section of HealthBoards.com. Estimated from the word usages in their posts, we find that the emotional progress across patients vary widely. We study the problem of predicting a patient′s emotional status in the future from her past posts and we propose a Recurrent Neural Network (RNN) based architecture to address it. We find that the future emotional status can be predicted with reasonable accuracy given her historical posts and participation features. Our evaluation results demonstrate the efficacy of our proposed architecture, by outperforming state-of-the-art approaches with over 0.13 reduction in Mean Absolute Error.}, + address = {Copenhagen, Denmark}, + author = {Halder, Kishaloy and +Poddar, Lahari and +Kan, Min-Yen}, + booktitle = {Proceedings of the 8th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis}, + doi = {10.18653/v1/W17-5217}, + editor = {Balahur, Alexandra and +Mohammad, Saif M. and +van der Goot, Erik}, + month = {September}, + pages = {127--135}, + publisher = {Association for Computational Linguistics}, + title = {Modeling Temporal Progression of Emotional Status in Mental Health Forum: A Recurrent Neural Net Approach}, + url = {https://aclanthology.org/W17-5217}, + year = {2017} +} diff --git a/content/publication/halder-etal-2017-modeling/index.md b/content/publication/halder-etal-2017-modeling/index.md new file mode 100644 index 0000000..53f4e0c --- /dev/null +++ b/content/publication/halder-etal-2017-modeling/index.md @@ -0,0 +1,30 @@ +--- +title: 'Modeling Temporal Progression of Emotional Status in Mental Health Forum: + A Recurrent Neural Net Approach' +authors: +- Kishaloy Halder +- Lahari Poddar +- Min-Yen Kan +date: '2017-09-01' +publishDate: '2024-07-11T07:40:56.375219Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 8th Workshop on Computational Approaches to Subjectivity, + Sentiment and Social Media Analysis*' +doi: 10.18653/v1/W17-5217 +abstract: Patients turn to Online Health Communities not only for information on specific + conditions but also for emotional support. Previous research has indicated that + the progression of emotional status can be studied through the linguistic patterns + of an individual′s posts. We analyze a real-world dataset from the Mental Health + section of HealthBoards.com. Estimated from the word usages in their posts, we find + that the emotional progress across patients vary widely. We study the problem of + predicting a patient′s emotional status in the future from her past posts and we + propose a Recurrent Neural Network (RNN) based architecture to address it. We find + that the future emotional status can be predicted with reasonable accuracy given + her historical posts and participation features. Our evaluation results demonstrate + the efficacy of our proposed architecture, by outperforming state-of-the-art approaches + with over 0.13 reduction in Mean Absolute Error. +links: +- name: URL + url: https://aclanthology.org/W17-5217 +--- diff --git a/content/publication/halder-etal-2019-predicting/cite.bib b/content/publication/halder-etal-2019-predicting/cite.bib new file mode 100644 index 0000000..ec8cf42 --- /dev/null +++ b/content/publication/halder-etal-2019-predicting/cite.bib @@ -0,0 +1,18 @@ +@inproceedings{halder-etal-2019-predicting, + abstract = {Users participate in online discussion forums to learn from others and share their knowledge with the community. They often start a thread with a question or by sharing their new findings on a certain topic. We find that, unlike Community Question Answering, where questions are mostly factoid based, the threads in a forum are often open-ended (e.g., asking for recommendations from others) without a single correct answer. In this paper, we address the task of identifying helpful posts in a forum thread to help users comprehend long running discussion threads, which often contain repetitive or irrelevant posts. We propose a recurrent neural network based architecture to model (i) the relevance of a post regarding the original post starting the thread and (ii) the novelty it brings to the discussion, compared to the previous posts in the thread. Experimental results on different types of online forum datasets show that our model significantly outperforms the state-of-the-art neural network models for text classification.}, + address = {Minneapolis, Minnesota}, + author = {Halder, Kishaloy and +Kan, Min-Yen and +Sugiyama, Kazunari}, + booktitle = {Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)}, + doi = {10.18653/v1/N19-1318}, + editor = {Burstein, Jill and +Doran, Christy and +Solorio, Thamar}, + month = {June}, + pages = {3148--3157}, + publisher = {Association for Computational Linguistics}, + title = {Predicting Helpful Posts in Open-Ended Discussion Forums: A Neural Architecture}, + url = {https://aclanthology.org/N19-1318}, + year = {2019} +} diff --git a/content/publication/halder-etal-2019-predicting/index.md b/content/publication/halder-etal-2019-predicting/index.md new file mode 100644 index 0000000..de5cf85 --- /dev/null +++ b/content/publication/halder-etal-2019-predicting/index.md @@ -0,0 +1,31 @@ +--- +title: 'Predicting Helpful Posts in Open-Ended Discussion Forums: A Neural Architecture' +authors: +- Kishaloy Halder +- Min-Yen Kan +- Kazunari Sugiyama +date: '2019-06-01' +publishDate: '2024-07-11T07:40:56.327069Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 2019 Conference of the North American Chapter of + the Association for Computational Linguistics: Human Language Technologies, Volume + 1 (Long and Short Papers)*' +doi: 10.18653/v1/N19-1318 +abstract: Users participate in online discussion forums to learn from others and share + their knowledge with the community. They often start a thread with a question or + by sharing their new findings on a certain topic. We find that, unlike Community + Question Answering, where questions are mostly factoid based, the threads in a forum + are often open-ended (e.g., asking for recommendations from others) without a single + correct answer. In this paper, we address the task of identifying helpful posts + in a forum thread to help users comprehend long running discussion threads, which + often contain repetitive or irrelevant posts. We propose a recurrent neural network + based architecture to model (i) the relevance of a post regarding the original post + starting the thread and (ii) the novelty it brings to the discussion, compared to + the previous posts in the thread. Experimental results on different types of online + forum datasets show that our model significantly outperforms the state-of-the-art + neural network models for text classification. +links: +- name: URL + url: https://aclanthology.org/N19-1318 +--- diff --git a/content/publication/hoang-etal-2009-examination/cite.bib b/content/publication/hoang-etal-2009-examination/cite.bib new file mode 100644 index 0000000..c39ec7c --- /dev/null +++ b/content/publication/hoang-etal-2009-examination/cite.bib @@ -0,0 +1,17 @@ +@inproceedings{hoang-etal-2009-examination, + address = {Singapore}, + author = {Hoang, Hung Huu and +Kim, Su Nam and +Kan, Min-Yen}, + booktitle = {Proceedings of the Workshop on Multiword Expressions: Identification, Interpretation, Disambiguation and Applications (MWE 2009)}, + editor = {Anastasiou, Dimitra and +Hashimoto, Chikara and +Nakov, Preslav and +Kim, Su Nam}, + month = {August}, + pages = {31--39}, + publisher = {Association for Computational Linguistics}, + title = {A re-examination of lexical association measures}, + url = {https://aclanthology.org/W09-2905}, + year = {2009} +} diff --git a/content/publication/hoang-etal-2009-examination/index.md b/content/publication/hoang-etal-2009-examination/index.md new file mode 100644 index 0000000..0fdea75 --- /dev/null +++ b/content/publication/hoang-etal-2009-examination/index.md @@ -0,0 +1,16 @@ +--- +title: A re-examination of lexical association measures +authors: +- Hung Huu Hoang +- Su Nam Kim +- Min-Yen Kan +date: '2009-08-01' +publishDate: '2024-07-11T07:40:56.530100Z' +publication_types: +- paper-conference +publication: '*Proceedings of the Workshop on Multiword Expressions: Identification, + Interpretation, Disambiguation and Applications (MWE 2009)*' +links: +- name: URL + url: https://aclanthology.org/W09-2905 +--- diff --git a/content/publication/hoang-kan-2010-towards/cite.bib b/content/publication/hoang-kan-2010-towards/cite.bib new file mode 100644 index 0000000..870bd89 --- /dev/null +++ b/content/publication/hoang-kan-2010-towards/cite.bib @@ -0,0 +1,14 @@ +@inproceedings{hoang-kan-2010-towards, + address = {Beijing, China}, + author = {Hoang, Cong Duy Vu and +Kan, Min-Yen}, + booktitle = {Coling 2010: Posters}, + editor = {Huang, Chu-Ren and +Jurafsky, Dan}, + month = {August}, + pages = {427--435}, + publisher = {Coling 2010 Organizing Committee}, + title = {Towards Automated Related Work Summarization}, + url = {https://aclanthology.org/C10-2049}, + year = {2010} +} diff --git a/content/publication/hoang-kan-2010-towards/index.md b/content/publication/hoang-kan-2010-towards/index.md new file mode 100644 index 0000000..610441a --- /dev/null +++ b/content/publication/hoang-kan-2010-towards/index.md @@ -0,0 +1,14 @@ +--- +title: Towards Automated Related Work Summarization +authors: +- Cong Duy Vu Hoang +- Min-Yen Kan +date: '2010-08-01' +publishDate: '2024-07-11T07:40:56.499873Z' +publication_types: +- paper-conference +publication: '*Coling 2010: Posters*' +links: +- name: URL + url: https://aclanthology.org/C10-2049 +--- diff --git a/content/publication/jiang-etal-2018-identifying/cite.bib b/content/publication/jiang-etal-2018-identifying/cite.bib new file mode 100644 index 0000000..9526671 --- /dev/null +++ b/content/publication/jiang-etal-2018-identifying/cite.bib @@ -0,0 +1,18 @@ +@inproceedings{jiang-etal-2018-identifying, + abstract = {Identifying emergent research trends is a key issue for both primary researchers as well as secondary research managers. Such processes can uncover the historical development of an area, and yield insight on developing topics. We propose an embedded trend detection framework for this task which incorporates our bijunctive hypothesis that important phrases are written by important authors within a field and vice versa. By ranking both author and phrase information in a multigraph, our method jointly determines key phrases and authoritative authors. We represent this intermediate output as phrasal embeddings, and feed this to a recurrent neural network (RNN) to compute trend scores that identify research trends. Over two large datasets of scientific articles, we demonstrate that our approach successfully detects past trends from the field, outperforming baselines based solely on text centrality or citation.}, + address = {Santa Fe, New Mexico, USA}, + author = {Jiang, Shenhao and +Prasad, Animesh and +Kan, Min-Yen and +Sugiyama, Kazunari}, + booktitle = {Proceedings of the 27th International Conference on Computational Linguistics}, + editor = {Bender, Emily M. and +Derczynski, Leon and +Isabelle, Pierre}, + month = {August}, + pages = {259--269}, + publisher = {Association for Computational Linguistics}, + title = {Identifying Emergent Research Trends by Key Authors and Phrases}, + url = {https://aclanthology.org/C18-1022}, + year = {2018} +} diff --git a/content/publication/jiang-etal-2018-identifying/index.md b/content/publication/jiang-etal-2018-identifying/index.md new file mode 100644 index 0000000..e7db811 --- /dev/null +++ b/content/publication/jiang-etal-2018-identifying/index.md @@ -0,0 +1,28 @@ +--- +title: Identifying Emergent Research Trends by Key Authors and Phrases +authors: +- Shenhao Jiang +- Animesh Prasad +- Min-Yen Kan +- Kazunari Sugiyama +date: '2018-08-01' +publishDate: '2024-07-11T07:40:56.368249Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 27th International Conference on Computational Linguistics*' +abstract: Identifying emergent research trends is a key issue for both primary researchers + as well as secondary research managers. Such processes can uncover the historical + development of an area, and yield insight on developing topics. We propose an embedded + trend detection framework for this task which incorporates our bijunctive hypothesis + that important phrases are written by important authors within a field and vice + versa. By ranking both author and phrase information in a multigraph, our method + jointly determines key phrases and authoritative authors. We represent this intermediate + output as phrasal embeddings, and feed this to a recurrent neural network (RNN) + to compute trend scores that identify research trends. Over two large datasets of + scientific articles, we demonstrate that our approach successfully detects past + trends from the field, outperforming baselines based solely on text centrality or + citation. +links: +- name: URL + url: https://aclanthology.org/C18-1022 +--- diff --git a/content/publication/jin-etal-2013-mining/cite.bib b/content/publication/jin-etal-2013-mining/cite.bib new file mode 100644 index 0000000..3817a6a --- /dev/null +++ b/content/publication/jin-etal-2013-mining/cite.bib @@ -0,0 +1,19 @@ +@inproceedings{jin-etal-2013-mining, + address = {Seattle, Washington, USA}, + author = {Jin, Yiping and +Kan, Min-Yen and +Ng, Jun-Ping and +He, Xiangnan}, + booktitle = {Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing}, + editor = {Yarowsky, David and +Baldwin, Timothy and +Korhonen, Anna and +Livescu, Karen and +Bethard, Steven}, + month = {October}, + pages = {780--790}, + publisher = {Association for Computational Linguistics}, + title = {Mining Scientific Terms and their Definitions: A Study of the ACL Anthology}, + url = {https://aclanthology.org/D13-1073}, + year = {2013} +} diff --git a/content/publication/jin-etal-2013-mining/index.md b/content/publication/jin-etal-2013-mining/index.md new file mode 100644 index 0000000..07ad51a --- /dev/null +++ b/content/publication/jin-etal-2013-mining/index.md @@ -0,0 +1,17 @@ +--- +title: 'Mining Scientific Terms and their Definitions: A Study of the ACL Anthology' +authors: +- Yiping Jin +- Min-Yen Kan +- Jun-Ping Ng +- Xiangnan He +date: '2013-10-01' +publishDate: '2024-07-11T07:40:56.438607Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 2013 Conference on Empirical Methods in Natural + Language Processing*' +links: +- name: URL + url: https://aclanthology.org/D13-1073 +--- diff --git a/content/publication/kan-2015-keywords/cite.bib b/content/publication/kan-2015-keywords/cite.bib new file mode 100644 index 0000000..e4c010d --- /dev/null +++ b/content/publication/kan-2015-keywords/cite.bib @@ -0,0 +1,16 @@ +@inproceedings{kan-2015-keywords, + address = {Beijing, China}, + author = {Kan, Min-Yen}, + booktitle = {Proceedings of the ACL 2015 Workshop on Novel Computational Approaches to Keyphrase Extraction}, + doi = {10.18653/v1/W15-3601}, + editor = {Gollapalli, Sujatha Das and +Caragea, Cornelia and +Li, Xiaoli and +Giles, C. Lee}, + month = {July}, + pages = {1}, + publisher = {Association for Computational Linguistics}, + title = {Keywords, phrases, clauses and sentences: topicality, indicativeness and informativeness at scales}, + url = {https://aclanthology.org/W15-3601}, + year = {2015} +} diff --git a/content/publication/kan-2015-keywords/index.md b/content/publication/kan-2015-keywords/index.md new file mode 100644 index 0000000..09138bc --- /dev/null +++ b/content/publication/kan-2015-keywords/index.md @@ -0,0 +1,16 @@ +--- +title: 'Keywords, phrases, clauses and sentences: topicality, indicativeness and informativeness + at scales' +authors: +- Min-Yen Kan +date: '2015-07-01' +publishDate: '2024-07-11T07:40:56.407539Z' +publication_types: +- paper-conference +publication: '*Proceedings of the ACL 2015 Workshop on Novel Computational Approaches + to Keyphrase Extraction*' +doi: 10.18653/v1/W15-3601 +links: +- name: URL + url: https://aclanthology.org/W15-3601 +--- diff --git a/content/publication/kan-etal-1998-linear/cite.bib b/content/publication/kan-etal-1998-linear/cite.bib new file mode 100644 index 0000000..f0d6790 --- /dev/null +++ b/content/publication/kan-etal-1998-linear/cite.bib @@ -0,0 +1,9 @@ +@inproceedings{kan-etal-1998-linear, + author = {Kan, Min-Yen and +Klavans, Judith L. and +McKeown, Kathleen R.}, + booktitle = {Sixth Workshop on Very Large Corpora}, + title = {Linear Segmentation and Segment Significance}, + url = {https://aclanthology.org/W98-1123}, + year = {1998} +} diff --git a/content/publication/kan-etal-1998-linear/index.md b/content/publication/kan-etal-1998-linear/index.md new file mode 100644 index 0000000..161d6bd --- /dev/null +++ b/content/publication/kan-etal-1998-linear/index.md @@ -0,0 +1,15 @@ +--- +title: Linear Segmentation and Segment Significance +authors: +- Min-Yen Kan +- Judith L. Klavans +- Kathleen R. McKeown +date: '1998-01-01' +publishDate: '2024-07-11T07:40:56.632433Z' +publication_types: +- paper-conference +publication: '*Sixth Workshop on Very Large Corpora*' +links: +- name: URL + url: https://aclanthology.org/W98-1123 +--- diff --git a/content/publication/kan-etal-2001-applying/cite.bib b/content/publication/kan-etal-2001-applying/cite.bib new file mode 100644 index 0000000..f5b920a --- /dev/null +++ b/content/publication/kan-etal-2001-applying/cite.bib @@ -0,0 +1,14 @@ +@inproceedings{kan-etal-2001-applying, + address = {Toulouse, France}, + author = {Kan, Min-Yen and +McKeown, Kathleen R. and +Klavans, Judith L.}, + booktitle = {Proceedings of the ACL 2001 Eighth European Workshop on Natural Language Generation (EWNLG)}, + editor = {Horacek, Helmut and +Nicolov, Nicolas and +Wanner, Leo}, + publisher = {Association for Computational Linguistics}, + title = {Applying Natural Language Generation to Indicative Summarization}, + url = {https://aclanthology.org/W01-0813}, + year = {2001} +} diff --git a/content/publication/kan-etal-2001-applying/index.md b/content/publication/kan-etal-2001-applying/index.md new file mode 100644 index 0000000..d7ff2ce --- /dev/null +++ b/content/publication/kan-etal-2001-applying/index.md @@ -0,0 +1,16 @@ +--- +title: Applying Natural Language Generation to Indicative Summarization +authors: +- Min-Yen Kan +- Kathleen R. McKeown +- Judith L. Klavans +date: '2001-01-01' +publishDate: '2024-07-11T07:40:56.622249Z' +publication_types: +- paper-conference +publication: '*Proceedings of the ACL 2001 Eighth European Workshop on Natural Language + Generation (EWNLG)*' +links: +- name: URL + url: https://aclanthology.org/W01-0813 +--- diff --git a/content/publication/kan-etal-2002-using/cite.bib b/content/publication/kan-etal-2002-using/cite.bib new file mode 100644 index 0000000..0b972b7 --- /dev/null +++ b/content/publication/kan-etal-2002-using/cite.bib @@ -0,0 +1,14 @@ +@inproceedings{kan-etal-2002-using, + address = {Las Palmas, Canary Islands - Spain}, + author = {Kan, Min-Yen and +Klavans, Judith L. and +McKeown, Kathleen R.}, + booktitle = {Proceedings of the Third International Conference on Language Resources and Evaluation (LREC′02)}, + editor = {González Rodrı́guez, Manuel and +Suarez Araujo, Carmen Paz}, + month = {May}, + publisher = {European Language Resources Association (ELRA)}, + title = {Using the Annotated Bibliography as a Resource for Indicative Summarization}, + url = {http://www.lrec-conf.org/proceedings/lrec2002/pdf/7.pdf}, + year = {2002} +} diff --git a/content/publication/kan-etal-2002-using/index.md b/content/publication/kan-etal-2002-using/index.md new file mode 100644 index 0000000..d463bda --- /dev/null +++ b/content/publication/kan-etal-2002-using/index.md @@ -0,0 +1,14 @@ +--- +title: Using the Annotated Bibliography as a Resource for Indicative Summarization +authors: +- Min-Yen Kan +- Judith L. Klavans +- Kathleen R. McKeown +date: '2002-05-01' +publishDate: '2024-07-11T07:40:56.616248Z' +publication_types: +- paper-conference +publication: '*Proceedings of the Third International Conference on Language Resources + and Evaluation (LREC′02)*' +url_pdf: http://www.lrec-conf.org/proceedings/lrec2002/pdf/7.pdf +--- diff --git a/content/publication/kan-mckeown-2002-corpus/cite.bib b/content/publication/kan-mckeown-2002-corpus/cite.bib new file mode 100644 index 0000000..7724024 --- /dev/null +++ b/content/publication/kan-mckeown-2002-corpus/cite.bib @@ -0,0 +1,13 @@ +@inproceedings{kan-mckeown-2002-corpus, + address = {Harriman, New York, USA}, + author = {Kan, Min-Yen and +McKeown, Kathleen R.}, + booktitle = {Proceedings of the International Natural Language Generation Conference}, + editor = {McKeown, Kathleen}, + month = {July}, + pages = {1--8}, + publisher = {Association for Computational Linguistics}, + title = {Corpus-trained Text Generation for Summarization}, + url = {https://aclanthology.org/W02-2101}, + year = {2002} +} diff --git a/content/publication/kan-mckeown-2002-corpus/index.md b/content/publication/kan-mckeown-2002-corpus/index.md new file mode 100644 index 0000000..de4c1b4 --- /dev/null +++ b/content/publication/kan-mckeown-2002-corpus/index.md @@ -0,0 +1,14 @@ +--- +title: Corpus-trained Text Generation for Summarization +authors: +- Min-Yen Kan +- Kathleen R. McKeown +date: '2002-07-01' +publishDate: '2024-07-11T07:40:56.610197Z' +publication_types: +- paper-conference +publication: '*Proceedings of the International Natural Language Generation Conference*' +links: +- name: URL + url: https://aclanthology.org/W02-2101 +--- diff --git a/content/publication/kim-etal-2009-extracting/cite.bib b/content/publication/kim-etal-2009-extracting/cite.bib new file mode 100644 index 0000000..eddf76c --- /dev/null +++ b/content/publication/kim-etal-2009-extracting/cite.bib @@ -0,0 +1,14 @@ +@inproceedings{kim-etal-2009-extracting, + address = {Sydney, Australia}, + author = {Kim, Su Nam and +Baldwin, Timothy and +Kan, Min-Yen}, + booktitle = {Proceedings of the Australasian Language Technology Association Workshop 2009}, + editor = {Pizzato, Luiz Augusto and +Schwitter, Rolf}, + month = {December}, + pages = {94--98}, + title = {Extracting Domain-Specific Words - A Statistical Approach}, + url = {https://aclanthology.org/U09-1013}, + year = {2009} +} diff --git a/content/publication/kim-etal-2009-extracting/index.md b/content/publication/kim-etal-2009-extracting/index.md new file mode 100644 index 0000000..c4f75e6 --- /dev/null +++ b/content/publication/kim-etal-2009-extracting/index.md @@ -0,0 +1,16 @@ +--- +title: Extracting Domain-Specific Words - A Statistical Approach +authors: +- Su Nam Kim +- Timothy Baldwin +- Min-Yen Kan +date: '2009-12-01' +publishDate: '2024-07-11T07:40:56.536274Z' +publication_types: +- paper-conference +publication: '*Proceedings of the Australasian Language Technology Association Workshop + 2009*' +links: +- name: URL + url: https://aclanthology.org/U09-1013 +--- diff --git a/content/publication/kim-etal-2010-evaluating/cite.bib b/content/publication/kim-etal-2010-evaluating/cite.bib new file mode 100644 index 0000000..e93eb27 --- /dev/null +++ b/content/publication/kim-etal-2010-evaluating/cite.bib @@ -0,0 +1,15 @@ +@inproceedings{kim-etal-2010-evaluating, + address = {Beijing, China}, + author = {Kim, Su Nam and +Baldwin, Timothy and +Kan, Min-Yen}, + booktitle = {Proceedings of the 23rd International Conference on Computational Linguistics (Coling 2010)}, + editor = {Huang, Chu-Ren and +Jurafsky, Dan}, + month = {August}, + pages = {572--580}, + publisher = {Coling 2010 Organizing Committee}, + title = {Evaluating N-gram based Evaluation Metrics for Automatic Keyphrase Extraction}, + url = {https://aclanthology.org/C10-1065}, + year = {2010} +} diff --git a/content/publication/kim-etal-2010-evaluating/index.md b/content/publication/kim-etal-2010-evaluating/index.md new file mode 100644 index 0000000..e5a8159 --- /dev/null +++ b/content/publication/kim-etal-2010-evaluating/index.md @@ -0,0 +1,16 @@ +--- +title: Evaluating N-gram based Evaluation Metrics for Automatic Keyphrase Extraction +authors: +- Su Nam Kim +- Timothy Baldwin +- Min-Yen Kan +date: '2010-08-01' +publishDate: '2024-07-11T07:40:56.505844Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 23rd International Conference on Computational Linguistics + (Coling 2010)*' +links: +- name: URL + url: https://aclanthology.org/C10-1065 +--- diff --git a/content/publication/kim-etal-2010-semeval/cite.bib b/content/publication/kim-etal-2010-semeval/cite.bib new file mode 100644 index 0000000..1c33576 --- /dev/null +++ b/content/publication/kim-etal-2010-semeval/cite.bib @@ -0,0 +1,16 @@ +@inproceedings{kim-etal-2010-semeval, + address = {Uppsala, Sweden}, + author = {Kim, Su Nam and +Medelyan, Olena and +Kan, Min-Yen and +Baldwin, Timothy}, + booktitle = {Proceedings of the 5th International Workshop on Semantic Evaluation}, + editor = {Erk, Katrin and +Strapparava, Carlo}, + month = {July}, + pages = {21--26}, + publisher = {Association for Computational Linguistics}, + title = {SemEval-2010 Task 5 : Automatic Keyphrase Extraction from Scientific Articles}, + url = {https://aclanthology.org/S10-1004}, + year = {2010} +} diff --git a/content/publication/kim-etal-2010-semeval/index.md b/content/publication/kim-etal-2010-semeval/index.md new file mode 100644 index 0000000..a76c90b --- /dev/null +++ b/content/publication/kim-etal-2010-semeval/index.md @@ -0,0 +1,16 @@ +--- +title: 'SemEval-2010 Task 5 : Automatic Keyphrase Extraction from Scientific Articles' +authors: +- Su Nam Kim +- Olena Medelyan +- Min-Yen Kan +- Timothy Baldwin +date: '2010-07-01' +publishDate: '2024-07-11T07:40:56.487584Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 5th International Workshop on Semantic Evaluation*' +links: +- name: URL + url: https://aclanthology.org/S10-1004 +--- diff --git a/content/publication/kim-kan-2009-examining/cite.bib b/content/publication/kim-kan-2009-examining/cite.bib new file mode 100644 index 0000000..359245c --- /dev/null +++ b/content/publication/kim-kan-2009-examining/cite.bib @@ -0,0 +1,16 @@ +@inproceedings{kim-kan-2009-examining, + address = {Singapore}, + author = {Kim, Su Nam and +Kan, Min-Yen}, + booktitle = {Proceedings of the Workshop on Multiword Expressions: Identification, Interpretation, Disambiguation and Applications (MWE 2009)}, + editor = {Anastasiou, Dimitra and +Hashimoto, Chikara and +Nakov, Preslav and +Kim, Su Nam}, + month = {August}, + pages = {9--16}, + publisher = {Association for Computational Linguistics}, + title = {Re-examining Automatic Keyphrase Extraction Approaches in Scientific Articles}, + url = {https://aclanthology.org/W09-2902}, + year = {2009} +} diff --git a/content/publication/kim-kan-2009-examining/index.md b/content/publication/kim-kan-2009-examining/index.md new file mode 100644 index 0000000..656dd4a --- /dev/null +++ b/content/publication/kim-kan-2009-examining/index.md @@ -0,0 +1,15 @@ +--- +title: Re-examining Automatic Keyphrase Extraction Approaches in Scientific Articles +authors: +- Su Nam Kim +- Min-Yen Kan +date: '2009-08-01' +publishDate: '2024-07-11T07:40:56.524050Z' +publication_types: +- paper-conference +publication: '*Proceedings of the Workshop on Multiword Expressions: Identification, + Interpretation, Disambiguation and Applications (MWE 2009)*' +links: +- name: URL + url: https://aclanthology.org/W09-2902 +--- diff --git a/content/publication/klavans-kan-1998-role-verbs/cite.bib b/content/publication/klavans-kan-1998-role-verbs/cite.bib new file mode 100644 index 0000000..dcde309 --- /dev/null +++ b/content/publication/klavans-kan-1998-role-verbs/cite.bib @@ -0,0 +1,13 @@ +@inproceedings{klavans-kan-1998-role-verbs, + address = {Montreal, Quebec, Canada}, + author = {Klavans, Judith L. and +Kan, Min-Yen}, + booktitle = {36th Annual Meeting of the Association for Computational Linguistics and 17th International Conference on Computational Linguistics, Volume 1}, + doi = {10.3115/980845.980959}, + month = {August}, + pages = {680--686}, + publisher = {Association for Computational Linguistics}, + title = {Role of Verbs in Document Analysis}, + url = {https://aclanthology.org/P98-1112}, + year = {1998} +} diff --git a/content/publication/klavans-kan-1998-role-verbs/index.md b/content/publication/klavans-kan-1998-role-verbs/index.md new file mode 100644 index 0000000..a8357e2 --- /dev/null +++ b/content/publication/klavans-kan-1998-role-verbs/index.md @@ -0,0 +1,16 @@ +--- +title: Role of Verbs in Document Analysis +authors: +- Judith L. Klavans +- Min-Yen Kan +date: '1998-08-01' +publishDate: '2024-07-11T07:40:56.643091Z' +publication_types: +- paper-conference +publication: '*36th Annual Meeting of the Association for Computational Linguistics + and 17th International Conference on Computational Linguistics, Volume 1*' +doi: 10.3115/980845.980959 +links: +- name: URL + url: https://aclanthology.org/P98-1112 +--- diff --git a/content/publication/lei-etal-2018-sequicity/cite.bib b/content/publication/lei-etal-2018-sequicity/cite.bib new file mode 100644 index 0000000..54d0742 --- /dev/null +++ b/content/publication/lei-etal-2018-sequicity/cite.bib @@ -0,0 +1,20 @@ +@inproceedings{lei-etal-2018-sequicity, + abstract = {Existing solutions to task-oriented dialogue systems follow pipeline designs which introduces architectural complexity and fragility. We propose a novel, holistic, extendable framework based on a single sequence-to-sequence (seq2seq) model which can be optimized with supervised or reinforcement learning. A key contribution is that we design text spans named belief spans to track dialogue believes, allowing task-oriented dialogue systems to be modeled in a seq2seq way. Based on this, we propose a simplistic Two Stage CopyNet instantiation which emonstrates good scalability: significantly reducing model complexity in terms of number of parameters and training time by a magnitude. It significantly outperforms state-of-the-art pipeline-based methods on large datasets and retains a satisfactory entity match rate on out-of-vocabulary (OOV) cases where pipeline-designed competitors totally fail.}, + address = {Melbourne, Australia}, + author = {Lei, Wenqiang and +Jin, Xisen and +Kan, Min-Yen and +Ren, Zhaochun and +He, Xiangnan and +Yin, Dawei}, + booktitle = {Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, + doi = {10.18653/v1/P18-1133}, + editor = {Gurevych, Iryna and +Miyao, Yusuke}, + month = {July}, + pages = {1437--1447}, + publisher = {Association for Computational Linguistics}, + title = {Sequicity: Simplifying Task-oriented Dialogue Systems with Single Sequence-to-Sequence Architectures}, + url = {https://aclanthology.org/P18-1133}, + year = {2018} +} diff --git a/content/publication/lei-etal-2018-sequicity/index.md b/content/publication/lei-etal-2018-sequicity/index.md new file mode 100644 index 0000000..d00d5c1 --- /dev/null +++ b/content/publication/lei-etal-2018-sequicity/index.md @@ -0,0 +1,32 @@ +--- +title: 'Sequicity: Simplifying Task-oriented Dialogue Systems with Single Sequence-to-Sequence + Architectures' +authors: +- Wenqiang Lei +- Xisen Jin +- Min-Yen Kan +- Zhaochun Ren +- Xiangnan He +- Dawei Yin +date: '2018-07-01' +publishDate: '2024-07-11T07:40:56.361300Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 56th Annual Meeting of the Association for Computational + Linguistics (Volume 1: Long Papers)*' +doi: 10.18653/v1/P18-1133 +abstract: 'Existing solutions to task-oriented dialogue systems follow pipeline designs + which introduces architectural complexity and fragility. We propose a novel, holistic, + extendable framework based on a single sequence-to-sequence (seq2seq) model which + can be optimized with supervised or reinforcement learning. A key contribution is + that we design text spans named belief spans to track dialogue believes, allowing + task-oriented dialogue systems to be modeled in a seq2seq way. Based on this, we + propose a simplistic Two Stage CopyNet instantiation which emonstrates good scalability: + significantly reducing model complexity in terms of number of parameters and training + time by a magnitude. It significantly outperforms state-of-the-art pipeline-based + methods on large datasets and retains a satisfactory entity match rate on out-of-vocabulary + (OOV) cases where pipeline-designed competitors totally fail.' +links: +- name: URL + url: https://aclanthology.org/P18-1133 +--- diff --git a/content/publication/lei-etal-2020-examining/cite.bib b/content/publication/lei-etal-2020-examining/cite.bib new file mode 100644 index 0000000..7777c7e --- /dev/null +++ b/content/publication/lei-etal-2020-examining/cite.bib @@ -0,0 +1,23 @@ +@inproceedings{lei-etal-2020-examining, + abstract = {In existing sophisticated text-to-SQL models, schema linking is often considered as a simple, minor component, belying its importance. By providing a schema linking corpus based on the Spider text-to-SQL dataset, we systematically study the role of schema linking. We also build a simple BERT-based baseline, called Schema-Linking SQL (SLSQL) to perform a data-driven study. We find when schema linking is done well, SLSQL demonstrates good performance on Spider despite its structural simplicity. Many remaining errors are attributable to corpus noise. This suggests schema linking is the crux for the current text-to-SQL task. Our analytic studies provide insights on the characteristics of schema linking for future developments of text-to-SQL tasks.}, + address = {Online}, + author = {Lei, Wenqiang and +Wang, Weixin and +Ma, Zhixin and +Gan, Tian and +Lu, Wei and +Kan, Min-Yen and +Chua, Tat-Seng}, + booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)}, + doi = {10.18653/v1/2020.emnlp-main.564}, + editor = {Webber, Bonnie and +Cohn, Trevor and +He, Yulan and +Liu, Yang}, + month = {November}, + pages = {6943--6954}, + publisher = {Association for Computational Linguistics}, + title = {Re-examining the Role of Schema Linking in Text-to-SQL}, + url = {https://aclanthology.org/2020.emnlp-main.564}, + year = {2020} +} diff --git a/content/publication/lei-etal-2020-examining/index.md b/content/publication/lei-etal-2020-examining/index.md new file mode 100644 index 0000000..f5d759f --- /dev/null +++ b/content/publication/lei-etal-2020-examining/index.md @@ -0,0 +1,31 @@ +--- +title: Re-examining the Role of Schema Linking in Text-to-SQL +authors: +- Wenqiang Lei +- Weixin Wang +- Zhixin Ma +- Tian Gan +- Wei Lu +- Min-Yen Kan +- Tat-Seng Chua +date: '2020-11-01' +publishDate: '2024-07-11T07:40:56.269384Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 2020 Conference on Empirical Methods in Natural + Language Processing (EMNLP)*' +doi: 10.18653/v1/2020.emnlp-main.564 +abstract: In existing sophisticated text-to-SQL models, schema linking is often considered + as a simple, minor component, belying its importance. By providing a schema linking + corpus based on the Spider text-to-SQL dataset, we systematically study the role + of schema linking. We also build a simple BERT-based baseline, called Schema-Linking + SQL (SLSQL) to perform a data-driven study. We find when schema linking is done + well, SLSQL demonstrates good performance on Spider despite its structural simplicity. + Many remaining errors are attributable to corpus noise. This suggests schema linking + is the crux for the current text-to-SQL task. Our analytic studies provide insights + on the characteristics of schema linking for future developments of text-to-SQL + tasks. +links: +- name: URL + url: https://aclanthology.org/2020.emnlp-main.564 +--- diff --git a/content/publication/li-etal-2020-molweni/cite.bib b/content/publication/li-etal-2020-molweni/cite.bib new file mode 100644 index 0000000..ff5f44e --- /dev/null +++ b/content/publication/li-etal-2020-molweni/cite.bib @@ -0,0 +1,23 @@ +@inproceedings{li-etal-2020-molweni, + abstract = {Research into the area of multiparty dialog has grown considerably over recent years. We present the Molweni dataset, a machine reading comprehension (MRC) dataset with discourse structure built over multiparty dialog. Molweni′s source samples from the Ubuntu Chat Corpus, including 10,000 dialogs comprising 88,303 utterances. We annotate 30,066 questions on this corpus, including both answerable and unanswerable questions. Molweni also uniquely contributes discourse dependency annotations in a modified Segmented Discourse Representation Theory (SDRT; Asher et al., 2016) style for all of its multiparty dialogs, contributing large-scale (78,245 annotated discourse relations) data to bear on the task of multiparty dialog discourse parsing. Our experiments show that Molweni is a challenging dataset for current MRC models: BERT-wwm, a current, strong SQuAD 2.0 performer, achieves only 67.7% F1 on Molweni′s questions, a 20+% significant drop as compared against its SQuAD 2.0 performance.}, + address = {Barcelona, Spain (Online)}, + author = {Li, Jiaqi and +Liu, Ming and +Kan, Min-Yen and +Zheng, Zihao and +Wang, Zekun and +Lei, Wenqiang and +Liu, Ting and +Qin, Bing}, + booktitle = {Proceedings of the 28th International Conference on Computational Linguistics}, + doi = {10.18653/v1/2020.coling-main.238}, + editor = {Scott, Donia and +Bel, Nuria and +Zong, Chengqing}, + month = {December}, + pages = {2642--2652}, + publisher = {International Committee on Computational Linguistics}, + title = {Molweni: A Challenge Multiparty Dialogues-based Machine Reading Comprehension Dataset with Discourse Structure}, + url = {https://aclanthology.org/2020.coling-main.238}, + year = {2020} +} diff --git a/content/publication/li-etal-2020-molweni/index.md b/content/publication/li-etal-2020-molweni/index.md new file mode 100644 index 0000000..0e2fd85 --- /dev/null +++ b/content/publication/li-etal-2020-molweni/index.md @@ -0,0 +1,35 @@ +--- +title: 'Molweni: A Challenge Multiparty Dialogues-based Machine Reading Comprehension + Dataset with Discourse Structure' +authors: +- Jiaqi Li +- Ming Liu +- Min-Yen Kan +- Zihao Zheng +- Zekun Wang +- Wenqiang Lei +- Ting Liu +- Bing Qin +date: '2020-12-01' +publishDate: '2024-07-11T07:40:56.284002Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 28th International Conference on Computational Linguistics*' +doi: 10.18653/v1/2020.coling-main.238 +abstract: 'Research into the area of multiparty dialog has grown considerably over + recent years. We present the Molweni dataset, a machine reading comprehension (MRC) + dataset with discourse structure built over multiparty dialog. Molweni′s source + samples from the Ubuntu Chat Corpus, including 10,000 dialogs comprising 88,303 + utterances. We annotate 30,066 questions on this corpus, including both answerable + and unanswerable questions. Molweni also uniquely contributes discourse dependency + annotations in a modified Segmented Discourse Representation Theory (SDRT; Asher + et al., 2016) style for all of its multiparty dialogs, contributing large-scale + (78,245 annotated discourse relations) data to bear on the task of multiparty dialog + discourse parsing. Our experiments show that Molweni is a challenging dataset for + current MRC models: BERT-wwm, a current, strong SQuAD 2.0 performer, achieves only + 67.7% F1 on Molweni′s questions, a 20+% significant drop as compared against its + SQuAD 2.0 performance.' +links: +- name: URL + url: https://aclanthology.org/2020.coling-main.238 +--- diff --git a/content/publication/lin-etal-2009-recognizing/cite.bib b/content/publication/lin-etal-2009-recognizing/cite.bib new file mode 100644 index 0000000..5b42002 --- /dev/null +++ b/content/publication/lin-etal-2009-recognizing/cite.bib @@ -0,0 +1,15 @@ +@inproceedings{lin-etal-2009-recognizing, + address = {Singapore}, + author = {Lin, Ziheng and +Kan, Min-Yen and +Ng, Hwee Tou}, + booktitle = {Proceedings of the 2009 Conference on Empirical Methods in Natural Language Processing}, + editor = {Koehn, Philipp and +Mihalcea, Rada}, + month = {August}, + pages = {343--351}, + publisher = {Association for Computational Linguistics}, + title = {Recognizing Implicit Discourse Relations in the Penn Discourse Treebank}, + url = {https://aclanthology.org/D09-1036}, + year = {2009} +} diff --git a/content/publication/lin-etal-2009-recognizing/index.md b/content/publication/lin-etal-2009-recognizing/index.md new file mode 100644 index 0000000..de9f526 --- /dev/null +++ b/content/publication/lin-etal-2009-recognizing/index.md @@ -0,0 +1,16 @@ +--- +title: Recognizing Implicit Discourse Relations in the Penn Discourse Treebank +authors: +- Ziheng Lin +- Min-Yen Kan +- Hwee Tou Ng +date: '2009-08-01' +publishDate: '2024-07-11T07:40:56.548673Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 2009 Conference on Empirical Methods in Natural + Language Processing*' +links: +- name: URL + url: https://aclanthology.org/D09-1036 +--- diff --git a/content/publication/lin-etal-2010-extracting/cite.bib b/content/publication/lin-etal-2010-extracting/cite.bib new file mode 100644 index 0000000..bac74a4 --- /dev/null +++ b/content/publication/lin-etal-2010-extracting/cite.bib @@ -0,0 +1,19 @@ +@inproceedings{lin-etal-2010-extracting, + address = {Los Angeles, California, USA}, + author = {Lin, Sein and +Ng, Jun-Ping and +Pradhan, Shreyasee and +Shah, Jatin and +Pietrobon, Ricardo and +Kan, Min-Yen}, + booktitle = {Proceedings of the NAACL HLT 2010 Second Louhi Workshop on Text and Data Mining of Health Documents}, + editor = {Dalianis, Hercules and +Hassel, Martin and +Nilsson, Gunnar}, + month = {June}, + pages = {90--95}, + publisher = {Association for Computational Linguistics}, + title = {Extracting Formulaic and Free Text Clinical Research Articles Metadata using Conditional Random Fields}, + url = {https://aclanthology.org/W10-1114}, + year = {2010} +} diff --git a/content/publication/lin-etal-2010-extracting/index.md b/content/publication/lin-etal-2010-extracting/index.md new file mode 100644 index 0000000..670f221 --- /dev/null +++ b/content/publication/lin-etal-2010-extracting/index.md @@ -0,0 +1,20 @@ +--- +title: Extracting Formulaic and Free Text Clinical Research Articles Metadata using + Conditional Random Fields +authors: +- Sein Lin +- Jun-Ping Ng +- Shreyasee Pradhan +- Jatin Shah +- Ricardo Pietrobon +- Min-Yen Kan +date: '2010-06-01' +publishDate: '2024-07-11T07:40:56.481260Z' +publication_types: +- paper-conference +publication: '*Proceedings of the NAACL HLT 2010 Second Louhi Workshop on Text and + Data Mining of Health Documents*' +links: +- name: URL + url: https://aclanthology.org/W10-1114 +--- diff --git a/content/publication/lin-etal-2011-automatically/cite.bib b/content/publication/lin-etal-2011-automatically/cite.bib new file mode 100644 index 0000000..ea76fc9 --- /dev/null +++ b/content/publication/lin-etal-2011-automatically/cite.bib @@ -0,0 +1,16 @@ +@inproceedings{lin-etal-2011-automatically, + address = {Portland, Oregon, USA}, + author = {Lin, Ziheng and +Ng, Hwee Tou and +Kan, Min-Yen}, + booktitle = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies}, + editor = {Lin, Dekang and +Matsumoto, Yuji and +Mihalcea, Rada}, + month = {June}, + pages = {997--1006}, + publisher = {Association for Computational Linguistics}, + title = {Automatically Evaluating Text Coherence Using Discourse Relations}, + url = {https://aclanthology.org/P11-1100}, + year = {2011} +} diff --git a/content/publication/lin-etal-2011-automatically/index.md b/content/publication/lin-etal-2011-automatically/index.md new file mode 100644 index 0000000..86bb256 --- /dev/null +++ b/content/publication/lin-etal-2011-automatically/index.md @@ -0,0 +1,16 @@ +--- +title: Automatically Evaluating Text Coherence Using Discourse Relations +authors: +- Ziheng Lin +- Hwee Tou Ng +- Min-Yen Kan +date: '2011-06-01' +publishDate: '2024-07-11T07:40:56.475209Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 49th Annual Meeting of the Association for Computational + Linguistics: Human Language Technologies*' +links: +- name: URL + url: https://aclanthology.org/P11-1100 +--- diff --git a/content/publication/lin-etal-2012-combining/cite.bib b/content/publication/lin-etal-2012-combining/cite.bib new file mode 100644 index 0000000..b85149a --- /dev/null +++ b/content/publication/lin-etal-2012-combining/cite.bib @@ -0,0 +1,19 @@ +@inproceedings{lin-etal-2012-combining, + address = {Jeju Island, Korea}, + author = {Lin, Ziheng and +Liu, Chang and +Ng, Hwee Tou and +Kan, Min-Yen}, + booktitle = {Proceedings of the 50th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, + editor = {Li, Haizhou and +Lin, Chin-Yew and +Osborne, Miles and +Lee, Gary Geunbae and +Park, Jong C.}, + month = {July}, + pages = {1006--1014}, + publisher = {Association for Computational Linguistics}, + title = {Combining Coherence Models and Machine Translation Evaluation Metrics for Summarization Evaluation}, + url = {https://aclanthology.org/P12-1106}, + year = {2012} +} diff --git a/content/publication/lin-etal-2012-combining/index.md b/content/publication/lin-etal-2012-combining/index.md new file mode 100644 index 0000000..7fa32f1 --- /dev/null +++ b/content/publication/lin-etal-2012-combining/index.md @@ -0,0 +1,18 @@ +--- +title: Combining Coherence Models and Machine Translation Evaluation Metrics for Summarization + Evaluation +authors: +- Ziheng Lin +- Chang Liu +- Hwee Tou Ng +- Min-Yen Kan +date: '2012-07-01' +publishDate: '2024-07-11T07:40:56.456833Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 50th Annual Meeting of the Association for Computational + Linguistics (Volume 1: Long Papers)*' +links: +- name: URL + url: https://aclanthology.org/P12-1106 +--- diff --git a/content/publication/lin-kan-2007-timestamped/cite.bib b/content/publication/lin-kan-2007-timestamped/cite.bib new file mode 100644 index 0000000..5cce4c0 --- /dev/null +++ b/content/publication/lin-kan-2007-timestamped/cite.bib @@ -0,0 +1,15 @@ +@inproceedings{lin-kan-2007-timestamped, + address = {Rochester, NY, USA}, + author = {Lin, Ziheng and +Kan, Min-Yen}, + booktitle = {Proceedings of the Second Workshop on TextGraphs: Graph-Based Algorithms for Natural Language Processing}, + editor = {Biemann, Chris and +Matveeva, Irina and +Mihalcea, Rada and +Radev, Dragomir}, + pages = {25--32}, + publisher = {Association for Computational Linguistics}, + title = {Timestamped Graphs: Evolutionary Models of Text for Multi-Document Summarization}, + url = {https://aclanthology.org/W07-0204}, + year = {2007} +} diff --git a/content/publication/lin-kan-2007-timestamped/index.md b/content/publication/lin-kan-2007-timestamped/index.md new file mode 100644 index 0000000..a96882e --- /dev/null +++ b/content/publication/lin-kan-2007-timestamped/index.md @@ -0,0 +1,15 @@ +--- +title: 'Timestamped Graphs: Evolutionary Models of Text for Multi-Document Summarization' +authors: +- Ziheng Lin +- Min-Yen Kan +date: '2007-01-01' +publishDate: '2024-07-11T07:40:56.573882Z' +publication_types: +- paper-conference +publication: '*Proceedings of the Second Workshop on TextGraphs: Graph-Based Algorithms + for Natural Language Processing*' +links: +- name: URL + url: https://aclanthology.org/W07-0204 +--- diff --git a/content/publication/luong-etal-2010-hybrid/cite.bib b/content/publication/luong-etal-2010-hybrid/cite.bib new file mode 100644 index 0000000..b0f7251 --- /dev/null +++ b/content/publication/luong-etal-2010-hybrid/cite.bib @@ -0,0 +1,15 @@ +@inproceedings{luong-etal-2010-hybrid, + address = {Cambridge, MA}, + author = {Luong, Minh-Thang and +Nakov, Preslav and +Kan, Min-Yen}, + booktitle = {Proceedings of the 2010 Conference on Empirical Methods in Natural Language Processing}, + editor = {Li, Hang and +Màrquez, Lluı́s}, + month = {October}, + pages = {148--157}, + publisher = {Association for Computational Linguistics}, + title = {A Hybrid Morpheme-Word Representation for Machine Translation of Morphologically Rich Languages}, + url = {https://aclanthology.org/D10-1015}, + year = {2010} +} diff --git a/content/publication/luong-etal-2010-hybrid/index.md b/content/publication/luong-etal-2010-hybrid/index.md new file mode 100644 index 0000000..4c8f266 --- /dev/null +++ b/content/publication/luong-etal-2010-hybrid/index.md @@ -0,0 +1,17 @@ +--- +title: A Hybrid Morpheme-Word Representation for Machine Translation of Morphologically + Rich Languages +authors: +- Minh-Thang Luong +- Preslav Nakov +- Min-Yen Kan +date: '2010-10-01' +publishDate: '2024-07-11T07:40:56.493708Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 2010 Conference on Empirical Methods in Natural + Language Processing*' +links: +- name: URL + url: https://aclanthology.org/D10-1015 +--- diff --git a/content/publication/luong-kan-2010-enhancing/cite.bib b/content/publication/luong-kan-2010-enhancing/cite.bib new file mode 100644 index 0000000..88ad20c --- /dev/null +++ b/content/publication/luong-kan-2010-enhancing/cite.bib @@ -0,0 +1,14 @@ +@inproceedings{luong-kan-2010-enhancing, + address = {Beijing, China}, + author = {Luong, Minh-Thang and +Kan, Min-Yen}, + booktitle = {Proceedings of the 23rd International Conference on Computational Linguistics (Coling 2010)}, + editor = {Huang, Chu-Ren and +Jurafsky, Dan}, + month = {August}, + pages = {743--751}, + publisher = {Coling 2010 Organizing Committee}, + title = {Enhancing Morphological Alignment for Translating Highly Inflected Languages}, + url = {https://aclanthology.org/C10-1084}, + year = {2010} +} diff --git a/content/publication/luong-kan-2010-enhancing/index.md b/content/publication/luong-kan-2010-enhancing/index.md new file mode 100644 index 0000000..15004dc --- /dev/null +++ b/content/publication/luong-kan-2010-enhancing/index.md @@ -0,0 +1,15 @@ +--- +title: Enhancing Morphological Alignment for Translating Highly Inflected Languages +authors: +- Minh-Thang Luong +- Min-Yen Kan +date: '2010-08-01' +publishDate: '2024-07-11T07:40:56.511952Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 23rd International Conference on Computational Linguistics + (Coling 2010)*' +links: +- name: URL + url: https://aclanthology.org/C10-1084 +--- diff --git a/content/publication/ng-etal-2012-exploiting/cite.bib b/content/publication/ng-etal-2012-exploiting/cite.bib new file mode 100644 index 0000000..3e9142c --- /dev/null +++ b/content/publication/ng-etal-2012-exploiting/cite.bib @@ -0,0 +1,17 @@ +@inproceedings{ng-etal-2012-exploiting, + address = {Mumbai, India}, + author = {Ng, Jun-Ping and +Bysani, Praveen and +Lin, Ziheng and +Kan, Min-Yen and +Tan, Chew-Lim}, + booktitle = {Proceedings of COLING 2012}, + editor = {Kay, Martin and +Boitet, Christian}, + month = {December}, + pages = {2093--2108}, + publisher = {The COLING 2012 Organizing Committee}, + title = {Exploiting Category-Specific Information for Multi-Document Summarization}, + url = {https://aclanthology.org/C12-1128}, + year = {2012} +} diff --git a/content/publication/ng-etal-2012-exploiting/index.md b/content/publication/ng-etal-2012-exploiting/index.md new file mode 100644 index 0000000..465a839 --- /dev/null +++ b/content/publication/ng-etal-2012-exploiting/index.md @@ -0,0 +1,17 @@ +--- +title: Exploiting Category-Specific Information for Multi-Document Summarization +authors: +- Jun-Ping Ng +- Praveen Bysani +- Ziheng Lin +- Min-Yen Kan +- Chew-Lim Tan +date: '2012-12-01' +publishDate: '2024-07-11T07:40:56.463025Z' +publication_types: +- paper-conference +publication: '*Proceedings of COLING 2012*' +links: +- name: URL + url: https://aclanthology.org/C12-1128 +--- diff --git a/content/publication/ng-etal-2013-exploiting/cite.bib b/content/publication/ng-etal-2013-exploiting/cite.bib new file mode 100644 index 0000000..e81b232 --- /dev/null +++ b/content/publication/ng-etal-2013-exploiting/cite.bib @@ -0,0 +1,22 @@ +@inproceedings{ng-etal-2013-exploiting, + address = {Seattle, Washington, USA}, + author = {Ng, Jun-Ping and +Kan, Min-Yen and +Lin, Ziheng and +Feng, Wei and +Chen, Bin and +Su, Jian and +Tan, Chew-Lim}, + booktitle = {Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing}, + editor = {Yarowsky, David and +Baldwin, Timothy and +Korhonen, Anna and +Livescu, Karen and +Bethard, Steven}, + month = {October}, + pages = {12--23}, + publisher = {Association for Computational Linguistics}, + title = {Exploiting Discourse Analysis for Article-Wide Temporal Classification}, + url = {https://aclanthology.org/D13-1002}, + year = {2013} +} diff --git a/content/publication/ng-etal-2013-exploiting/index.md b/content/publication/ng-etal-2013-exploiting/index.md new file mode 100644 index 0000000..62e8307 --- /dev/null +++ b/content/publication/ng-etal-2013-exploiting/index.md @@ -0,0 +1,20 @@ +--- +title: Exploiting Discourse Analysis for Article-Wide Temporal Classification +authors: +- Jun-Ping Ng +- Min-Yen Kan +- Ziheng Lin +- Wei Feng +- Bin Chen +- Jian Su +- Chew-Lim Tan +date: '2013-10-01' +publishDate: '2024-07-11T07:40:56.432328Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 2013 Conference on Empirical Methods in Natural + Language Processing*' +links: +- name: URL + url: https://aclanthology.org/D13-1002 +--- diff --git a/content/publication/ng-etal-2014-exploiting/cite.bib b/content/publication/ng-etal-2014-exploiting/cite.bib new file mode 100644 index 0000000..98d6472 --- /dev/null +++ b/content/publication/ng-etal-2014-exploiting/cite.bib @@ -0,0 +1,17 @@ +@inproceedings{ng-etal-2014-exploiting, + address = {Baltimore, Maryland}, + author = {Ng, Jun-Ping and +Chen, Yan and +Kan, Min-Yen and +Li, Zhoujun}, + booktitle = {Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, + doi = {10.3115/v1/P14-1087}, + editor = {Toutanova, Kristina and +Wu, Hua}, + month = {June}, + pages = {923--933}, + publisher = {Association for Computational Linguistics}, + title = {Exploiting Timelines to Enhance Multi-document Summarization}, + url = {https://aclanthology.org/P14-1087}, + year = {2014} +} diff --git a/content/publication/ng-etal-2014-exploiting/index.md b/content/publication/ng-etal-2014-exploiting/index.md new file mode 100644 index 0000000..ad3436c --- /dev/null +++ b/content/publication/ng-etal-2014-exploiting/index.md @@ -0,0 +1,18 @@ +--- +title: Exploiting Timelines to Enhance Multi-document Summarization +authors: +- Jun-Ping Ng +- Yan Chen +- Min-Yen Kan +- Zhoujun Li +date: '2014-06-01' +publishDate: '2024-07-11T07:40:56.413715Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 52nd Annual Meeting of the Association for Computational + Linguistics (Volume 1: Long Papers)*' +doi: 10.3115/v1/P14-1087 +links: +- name: URL + url: https://aclanthology.org/P14-1087 +--- diff --git a/content/publication/ng-kan-2012-improved/cite.bib b/content/publication/ng-kan-2012-improved/cite.bib new file mode 100644 index 0000000..f6c87c8 --- /dev/null +++ b/content/publication/ng-kan-2012-improved/cite.bib @@ -0,0 +1,14 @@ +@inproceedings{ng-kan-2012-improved, + address = {Mumbai, India}, + author = {Ng, Jun-Ping and +Kan, Min-Yen}, + booktitle = {Proceedings of COLING 2012}, + editor = {Kay, Martin and +Boitet, Christian}, + month = {December}, + pages = {2109--2124}, + publisher = {The COLING 2012 Organizing Committee}, + title = {Improved Temporal Relation Classification using Dependency Parses and Selective Crowdsourced Annotations}, + url = {https://aclanthology.org/C12-1129}, + year = {2012} +} diff --git a/content/publication/ng-kan-2012-improved/index.md b/content/publication/ng-kan-2012-improved/index.md new file mode 100644 index 0000000..1ffbc04 --- /dev/null +++ b/content/publication/ng-kan-2012-improved/index.md @@ -0,0 +1,15 @@ +--- +title: Improved Temporal Relation Classification using Dependency Parses and Selective + Crowdsourced Annotations +authors: +- Jun-Ping Ng +- Min-Yen Kan +date: '2012-12-01' +publishDate: '2024-07-11T07:40:56.469115Z' +publication_types: +- paper-conference +publication: '*Proceedings of COLING 2012*' +links: +- name: URL + url: https://aclanthology.org/C12-1129 +--- diff --git a/content/publication/nguyen-etal-2018-treatment/cite.bib b/content/publication/nguyen-etal-2018-treatment/cite.bib new file mode 100644 index 0000000..6ea923b --- /dev/null +++ b/content/publication/nguyen-etal-2018-treatment/cite.bib @@ -0,0 +1,19 @@ +@inproceedings{nguyen-etal-2018-treatment, + abstract = {With Health 2.0, patients and caregivers increasingly seek information regarding possible drug side effects during their medical treatments in online health communities. These are helpful platforms for non-professional medical opinions, yet pose risk of being unreliable in quality and insufficient in quantity to cover the wide range of potential drug reactions. Existing approaches which analyze such user-generated content in online forums heavily rely on feature engineering of both documents and users, and often overlook the relationships between posts within a common discussion thread. Inspired by recent advancements, we propose a neural architecture that models the textual content of user-generated documents and user experiences in online communities to predict side effects during treatment. Experimental results show that our proposed architecture outperforms baseline models.}, + address = {Brussels, Belgium}, + author = {Nguyen, Van Hoang and +Sugiyama, Kazunari and +Kan, Min-Yen and +Halder, Kishaloy}, + booktitle = {Proceedings of the Ninth International Workshop on Health Text Mining and Information Analysis}, + doi = {10.18653/v1/W18-5602}, + editor = {Lavelli, Alberto and +Minard, Anne-Lyse and +Rinaldi, Fabio}, + month = {October}, + pages = {12--21}, + publisher = {Association for Computational Linguistics}, + title = {Treatment Side Effect Prediction from Online User-generated Content}, + url = {https://aclanthology.org/W18-5602}, + year = {2018} +} diff --git a/content/publication/nguyen-etal-2018-treatment/index.md b/content/publication/nguyen-etal-2018-treatment/index.md new file mode 100644 index 0000000..a9a0399 --- /dev/null +++ b/content/publication/nguyen-etal-2018-treatment/index.md @@ -0,0 +1,29 @@ +--- +title: Treatment Side Effect Prediction from Online User-generated Content +authors: +- Van Hoang Nguyen +- Kazunari Sugiyama +- Min-Yen Kan +- Kishaloy Halder +date: '2018-10-01' +publishDate: '2024-07-11T07:40:56.340987Z' +publication_types: +- paper-conference +publication: '*Proceedings of the Ninth International Workshop on Health Text Mining + and Information Analysis*' +doi: 10.18653/v1/W18-5602 +abstract: With Health 2.0, patients and caregivers increasingly seek information regarding + possible drug side effects during their medical treatments in online health communities. + These are helpful platforms for non-professional medical opinions, yet pose risk + of being unreliable in quality and insufficient in quantity to cover the wide range + of potential drug reactions. Existing approaches which analyze such user-generated + content in online forums heavily rely on feature engineering of both documents and + users, and often overlook the relationships between posts within a common discussion + thread. Inspired by recent advancements, we propose a neural architecture that models + the textual content of user-generated documents and user experiences in online communities + to predict side effects during treatment. Experimental results show that our proposed + architecture outperforms baseline models. +links: +- name: URL + url: https://aclanthology.org/W18-5602 +--- diff --git a/content/publication/pan-etal-2020-semantic/cite.bib b/content/publication/pan-etal-2020-semantic/cite.bib new file mode 100644 index 0000000..11f9426 --- /dev/null +++ b/content/publication/pan-etal-2020-semantic/cite.bib @@ -0,0 +1,21 @@ +@inproceedings{pan-etal-2020-semantic, + abstract = {This paper proposes the problem of Deep Question Generation (DQG), which aims to generate complex questions that require reasoning over multiple pieces of information about the input passage. In order to capture the global structure of the document and facilitate reasoning, we propose a novel framework that first constructs a semantic-level graph for the input document and then encodes the semantic graph by introducing an attention-based GGNN (Att-GGNN). Afterward, we fuse the document-level and graph-level representations to perform joint training of content selection and question decoding. On the HotpotQA deep-question centric dataset, our model greatly improves performance over questions requiring reasoning over multiple facts, leading to state-of-the-art performance. The code is publicly available at r̆lhttps://github.com/WING-NUS/SG-Deep-Question-Generation.}, + address = {Online}, + author = {Pan, Liangming and +Xie, Yuxi and +Feng, Yansong and +Chua, Tat-Seng and +Kan, Min-Yen}, + booktitle = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics}, + doi = {10.18653/v1/2020.acl-main.135}, + editor = {Jurafsky, Dan and +Chai, Joyce and +Schluter, Natalie and +Tetreault, Joel}, + month = {July}, + pages = {1463--1475}, + publisher = {Association for Computational Linguistics}, + title = {Semantic Graphs for Generating Deep Questions}, + url = {https://aclanthology.org/2020.acl-main.135}, + year = {2020} +} diff --git a/content/publication/pan-etal-2020-semantic/index.md b/content/publication/pan-etal-2020-semantic/index.md new file mode 100644 index 0000000..f3debb2 --- /dev/null +++ b/content/publication/pan-etal-2020-semantic/index.md @@ -0,0 +1,29 @@ +--- +title: Semantic Graphs for Generating Deep Questions +authors: +- Liangming Pan +- Yuxi Xie +- Yansong Feng +- Tat-Seng Chua +- Min-Yen Kan +date: '2020-07-01' +publishDate: '2024-07-11T07:40:56.306034Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 58th Annual Meeting of the Association for Computational + Linguistics*' +doi: 10.18653/v1/2020.acl-main.135 +abstract: This paper proposes the problem of Deep Question Generation (DQG), which + aims to generate complex questions that require reasoning over multiple pieces of + information about the input passage. In order to capture the global structure of + the document and facilitate reasoning, we propose a novel framework that first constructs + a semantic-level graph for the input document and then encodes the semantic graph + by introducing an attention-based GGNN (Att-GGNN). Afterward, we fuse the document-level + and graph-level representations to perform joint training of content selection and + question decoding. On the HotpotQA deep-question centric dataset, our model greatly + improves performance over questions requiring reasoning over multiple facts, leading + to state-of-the-art performance. The code is publicly available at r̆lhttps://github.com/WING-NUS/SG-Deep-Question-Generation. +links: +- name: URL + url: https://aclanthology.org/2020.acl-main.135 +--- diff --git a/content/publication/pan-etal-2021-unsupervised/cite.bib b/content/publication/pan-etal-2021-unsupervised/cite.bib new file mode 100644 index 0000000..09b2f6d --- /dev/null +++ b/content/publication/pan-etal-2021-unsupervised/cite.bib @@ -0,0 +1,26 @@ +@inproceedings{pan-etal-2021-unsupervised, + abstract = {Obtaining training data for multi-hop question answering (QA) is time-consuming and resource-intensive. We explore the possibility to train a well-performed multi-hop QA model without referencing any human-labeled multi-hop question-answer pairs, i.e., unsupervised multi-hop QA. We propose MQA-QG, an unsupervised framework that can generate human-like multi-hop training data from both homogeneous and heterogeneous data sources. MQA-QG generates questions by first selecting/generating relevant information from each data source and then integrating the multiple information to form a multi-hop question. Using only generated training data, we can train a competent multi-hop QA which achieves 61% and 83% of the supervised learning performance for the HybridQA and the HotpotQA dataset, respectively. We also show that pretraining the QA system with the generated data would greatly reduce the demand for human-annotated training data. Our codes are publicly available at r̆lhttps://github.com/teacherpeterpan/Unsupervised-Multi-hop-QA.}, + address = {Online}, + author = {Pan, Liangming and +Chen, Wenhu and +Xiong, Wenhan and +Kan, Min-Yen and +Wang, William Yang}, + booktitle = {Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies}, + doi = {10.18653/v1/2021.naacl-main.469}, + editor = {Toutanova, Kristina and +Rumshisky, Anna and +Zettlemoyer, Luke and +Hakkani-Tur, Dilek and +Beltagy, Iz and +Bethard, Steven and +Cotterell, Ryan and +Chakraborty, Tanmoy and +Zhou, Yichao}, + month = {June}, + pages = {5866--5880}, + publisher = {Association for Computational Linguistics}, + title = {Unsupervised Multi-hop Question Answering by Question Generation}, + url = {https://aclanthology.org/2021.naacl-main.469}, + year = {2021} +} diff --git a/content/publication/pan-etal-2021-unsupervised/index.md b/content/publication/pan-etal-2021-unsupervised/index.md new file mode 100644 index 0000000..886865e --- /dev/null +++ b/content/publication/pan-etal-2021-unsupervised/index.md @@ -0,0 +1,31 @@ +--- +title: Unsupervised Multi-hop Question Answering by Question Generation +authors: +- Liangming Pan +- Wenhu Chen +- Wenhan Xiong +- Min-Yen Kan +- William Yang Wang +date: '2021-06-01' +publishDate: '2024-07-11T07:40:56.233397Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 2021 Conference of the North American Chapter of + the Association for Computational Linguistics: Human Language Technologies*' +doi: 10.18653/v1/2021.naacl-main.469 +abstract: Obtaining training data for multi-hop question answering (QA) is time-consuming + and resource-intensive. We explore the possibility to train a well-performed multi-hop + QA model without referencing any human-labeled multi-hop question-answer pairs, + i.e., unsupervised multi-hop QA. We propose MQA-QG, an unsupervised framework that + can generate human-like multi-hop training data from both homogeneous and heterogeneous + data sources. MQA-QG generates questions by first selecting/generating relevant + information from each data source and then integrating the multiple information + to form a multi-hop question. Using only generated training data, we can train a + competent multi-hop QA which achieves 61% and 83% of the supervised learning performance + for the HybridQA and the HotpotQA dataset, respectively. We also show that pretraining + the QA system with the generated data would greatly reduce the demand for human-annotated + training data. Our codes are publicly available at r̆lhttps://github.com/teacherpeterpan/Unsupervised-Multi-hop-QA. +links: +- name: URL + url: https://aclanthology.org/2021.naacl-main.469 +--- diff --git a/content/publication/pan-etal-2021-zero/cite.bib b/content/publication/pan-etal-2021-zero/cite.bib new file mode 100644 index 0000000..e6eace6 --- /dev/null +++ b/content/publication/pan-etal-2021-zero/cite.bib @@ -0,0 +1,21 @@ +@inproceedings{pan-etal-2021-zero, + abstract = {Neural models for automated fact verification have achieved promising results thanks to the availability of large, human-annotated datasets. However, for each new domain that requires fact verification, creating a dataset by manually writing claims and linking them to their supporting evidence is expensive. We develop QACG, a framework for training a robust fact verification model by using automatically generated claims that can be supported, refuted, or unverifiable from evidence from Wikipedia. QACG generates question-answer pairs from the evidence and then converts them into different types of claims. Experiments on the FEVER dataset show that our QACG framework significantly reduces the demand for human-annotated training data. In a zero-shot scenario, QACG improves a RoBERTa model′s F1 from 50% to 77%, equivalent in performance to 2K+ manually-curated examples. Our QACG code is publicly available.}, + address = {Online}, + author = {Pan, Liangming and +Chen, Wenhu and +Xiong, Wenhan and +Kan, Min-Yen and +Wang, William Yang}, + booktitle = {Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)}, + doi = {10.18653/v1/2021.acl-short.61}, + editor = {Zong, Chengqing and +Xia, Fei and +Li, Wenjie and +Navigli, Roberto}, + month = {August}, + pages = {476--483}, + publisher = {Association for Computational Linguistics}, + title = {Zero-shot Fact Verification by Claim Generation}, + url = {https://aclanthology.org/2021.acl-short.61}, + year = {2021} +} diff --git a/content/publication/pan-etal-2021-zero/index.md b/content/publication/pan-etal-2021-zero/index.md new file mode 100644 index 0000000..96843b8 --- /dev/null +++ b/content/publication/pan-etal-2021-zero/index.md @@ -0,0 +1,32 @@ +--- +title: Zero-shot Fact Verification by Claim Generation +authors: +- Liangming Pan +- Wenhu Chen +- Wenhan Xiong +- Min-Yen Kan +- William Yang Wang +date: '2021-08-01' +publishDate: '2024-07-11T07:40:56.240829Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 59th Annual Meeting of the Association for Computational + Linguistics and the 11th International Joint Conference on Natural Language Processing + (Volume 2: Short Papers)*' +doi: 10.18653/v1/2021.acl-short.61 +abstract: Neural models for automated fact verification have achieved promising results + thanks to the availability of large, human-annotated datasets. However, for each + new domain that requires fact verification, creating a dataset by manually writing + claims and linking them to their supporting evidence is expensive. We develop QACG, + a framework for training a robust fact verification model by using automatically + generated claims that can be supported, refuted, or unverifiable from evidence from + Wikipedia. QACG generates question-answer pairs from the evidence and then converts + them into different types of claims. Experiments on the FEVER dataset show that + our QACG framework significantly reduces the demand for human-annotated training + data. In a zero-shot scenario, QACG improves a RoBERTa model′s F1 from 50% to 77%, + equivalent in performance to 2K+ manually-curated examples. Our QACG code is publicly + available. +links: +- name: URL + url: https://aclanthology.org/2021.acl-short.61 +--- diff --git a/content/publication/prasad-kan-2017-wing/cite.bib b/content/publication/prasad-kan-2017-wing/cite.bib new file mode 100644 index 0000000..0788a1d --- /dev/null +++ b/content/publication/prasad-kan-2017-wing/cite.bib @@ -0,0 +1,20 @@ +@inproceedings{prasad-kan-2017-wing, + abstract = {We describe an end-to-end pipeline processing approach for SemEval 2017′s Task 10 to extract keyphrases and their relations from scientific publications. We jointly identify and classify keyphrases by modeling the subtasks as sequential labeling. Our system utilizes standard, surface-level features along with the adjacent word features, and performs conditional decoding on whole text to extract keyphrases. We focus only on the identification and typing of keyphrases (Subtasks A and B, together referred as extraction), but provide an end-to-end system inclusive of keyphrase relation identification (Subtask C) for completeness. Our top performing configuration achieves an $F_1$ of 0.27 for the end-to-end keyphrase extraction and relation identification scenario on the final test data, and compares on par to other top ranked systems for keyphrase extraction. Our system outperforms other techniques that do not employ global decoding and hence do not account for dependencies between keyphrases. We believe this is crucial for keyphrase classification in the given context of scientific document mining.}, + address = {Vancouver, Canada}, + author = {Prasad, Animesh and +Kan, Min-Yen}, + booktitle = {Proceedings of the 11th International Workshop on Semantic Evaluation (SemEval-2017)}, + doi = {10.18653/v1/S17-2170}, + editor = {Bethard, Steven and +Carpuat, Marine and +Apidianaki, Marianna and +Mohammad, Saif M. and +Cer, Daniel and +Jurgens, David}, + month = {August}, + pages = {973--977}, + publisher = {Association for Computational Linguistics}, + title = {WING-NUS at SemEval-2017 Task 10: Keyphrase Extraction and Classification as Joint Sequence Labeling}, + url = {https://aclanthology.org/S17-2170}, + year = {2017} +} diff --git a/content/publication/prasad-kan-2017-wing/index.md b/content/publication/prasad-kan-2017-wing/index.md new file mode 100644 index 0000000..a24cf60 --- /dev/null +++ b/content/publication/prasad-kan-2017-wing/index.md @@ -0,0 +1,31 @@ +--- +title: 'WING-NUS at SemEval-2017 Task 10: Keyphrase Extraction and Classification + as Joint Sequence Labeling' +authors: +- Animesh Prasad +- Min-Yen Kan +date: '2017-08-01' +publishDate: '2024-07-11T07:40:56.382115Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 11th International Workshop on Semantic Evaluation + (SemEval-2017)*' +doi: 10.18653/v1/S17-2170 +abstract: We describe an end-to-end pipeline processing approach for SemEval 2017′s + Task 10 to extract keyphrases and their relations from scientific publications. + We jointly identify and classify keyphrases by modeling the subtasks as sequential + labeling. Our system utilizes standard, surface-level features along with the adjacent + word features, and performs conditional decoding on whole text to extract keyphrases. + We focus only on the identification and typing of keyphrases (Subtasks A and B, + together referred as extraction), but provide an end-to-end system inclusive of + keyphrase relation identification (Subtask C) for completeness. Our top performing + configuration achieves an $F_1$ of 0.27 for the end-to-end keyphrase extraction + and relation identification scenario on the final test data, and compares on par + to other top ranked systems for keyphrase extraction. Our system outperforms other + techniques that do not employ global decoding and hence do not account for dependencies + between keyphrases. We believe this is crucial for keyphrase classification in the + given context of scientific document mining. +links: +- name: URL + url: https://aclanthology.org/S17-2170 +--- diff --git a/content/publication/prasad-kan-2019-glocal/cite.bib b/content/publication/prasad-kan-2019-glocal/cite.bib new file mode 100644 index 0000000..c25c82b --- /dev/null +++ b/content/publication/prasad-kan-2019-glocal/cite.bib @@ -0,0 +1,17 @@ +@inproceedings{prasad-kan-2019-glocal, + abstract = {Graph Convolutional Networks (GCNs) are a class of spectral clustering techniques that leverage localized convolution filters to perform supervised classification directly on graphical structures. While such methods model nodes′ local pairwise importance, they lack the capability to model global importance relative to other nodes of the graph. This causes such models to miss critical information in tasks where global ranking is a key component for the task, such as in keyphrase extraction. We address this shortcoming by allowing the proper incorporation of global information into the GCN family of models through the use of scaled node weights. In the context of keyphrase extraction, incorporating global random walk scores obtained from TextRank boosts performance significantly. With our proposed method, we achieve state-of-the-art results, bettering a strong baseline by an absolute 2% increase in F1 score.}, + address = {Minneapolis, Minnesota}, + author = {Prasad, Animesh and +Kan, Min-Yen}, + booktitle = {Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)}, + doi = {10.18653/v1/N19-1182}, + editor = {Burstein, Jill and +Doran, Christy and +Solorio, Thamar}, + month = {June}, + pages = {1837--1846}, + publisher = {Association for Computational Linguistics}, + title = {Glocal: Incorporating Global Information in Local Convolution for Keyphrase Extraction}, + url = {https://aclanthology.org/N19-1182}, + year = {2019} +} diff --git a/content/publication/prasad-kan-2019-glocal/index.md b/content/publication/prasad-kan-2019-glocal/index.md new file mode 100644 index 0000000..3276fc2 --- /dev/null +++ b/content/publication/prasad-kan-2019-glocal/index.md @@ -0,0 +1,29 @@ +--- +title: 'Glocal: Incorporating Global Information in Local Convolution for Keyphrase + Extraction' +authors: +- Animesh Prasad +- Min-Yen Kan +date: '2019-06-01' +publishDate: '2024-07-11T07:40:56.320220Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 2019 Conference of the North American Chapter of + the Association for Computational Linguistics: Human Language Technologies, Volume + 1 (Long and Short Papers)*' +doi: 10.18653/v1/N19-1182 +abstract: Graph Convolutional Networks (GCNs) are a class of spectral clustering techniques + that leverage localized convolution filters to perform supervised classification + directly on graphical structures. While such methods model nodes′ local pairwise + importance, they lack the capability to model global importance relative to other + nodes of the graph. This causes such models to miss critical information in tasks + where global ranking is a key component for the task, such as in keyphrase extraction. + We address this shortcoming by allowing the proper incorporation of global information + into the GCN family of models through the use of scaled node weights. In the context + of keyphrase extraction, incorporating global random walk scores obtained from TextRank + boosts performance significantly. With our proposed method, we achieve state-of-the-art + results, bettering a strong baseline by an absolute 2% increase in F1 score. +links: +- name: URL + url: https://aclanthology.org/N19-1182 +--- diff --git a/content/publication/qiu-etal-2004-public/cite.bib b/content/publication/qiu-etal-2004-public/cite.bib new file mode 100644 index 0000000..90b8533 --- /dev/null +++ b/content/publication/qiu-etal-2004-public/cite.bib @@ -0,0 +1,17 @@ +@inproceedings{qiu-etal-2004-public, + address = {Lisbon, Portugal}, + author = {Qiu, Long and +Kan, Min-Yen and +Chua, Tat-Seng}, + booktitle = {Proceedings of the Fourth International Conference on Language Resources and Evaluation (LREC′04)}, + editor = {Lino, Maria Teresa and +Xavier, Maria Francisca and +Ferreira, Fátima and +Costa, Rute and +Silva, Raquel}, + month = {May}, + publisher = {European Language Resources Association (ELRA)}, + title = {A Public Reference Implementation of the RAP Anaphora Resolution Algorithm}, + url = {http://www.lrec-conf.org/proceedings/lrec2004/pdf/778.pdf}, + year = {2004} +} diff --git a/content/publication/qiu-etal-2004-public/index.md b/content/publication/qiu-etal-2004-public/index.md new file mode 100644 index 0000000..c449c81 --- /dev/null +++ b/content/publication/qiu-etal-2004-public/index.md @@ -0,0 +1,14 @@ +--- +title: A Public Reference Implementation of the RAP Anaphora Resolution Algorithm +authors: +- Long Qiu +- Min-Yen Kan +- Tat-Seng Chua +date: '2004-05-01' +publishDate: '2024-07-11T07:40:56.604279Z' +publication_types: +- paper-conference +publication: '*Proceedings of the Fourth International Conference on Language Resources + and Evaluation (LREC′04)*' +url_pdf: http://www.lrec-conf.org/proceedings/lrec2004/pdf/778.pdf +--- diff --git a/content/publication/qiu-etal-2006-paraphrase/cite.bib b/content/publication/qiu-etal-2006-paraphrase/cite.bib new file mode 100644 index 0000000..f196f5c --- /dev/null +++ b/content/publication/qiu-etal-2006-paraphrase/cite.bib @@ -0,0 +1,15 @@ +@inproceedings{qiu-etal-2006-paraphrase, + address = {Sydney, Australia}, + author = {Qiu, Long and +Kan, Min-Yen and +Chua, Tat-Seng}, + booktitle = {Proceedings of the 2006 Conference on Empirical Methods in Natural Language Processing}, + editor = {Jurafsky, Dan and +Gaussier, Eric}, + month = {July}, + pages = {18--26}, + publisher = {Association for Computational Linguistics}, + title = {Paraphrase Recognition via Dissimilarity Significance Classification}, + url = {https://aclanthology.org/W06-1603}, + year = {2006} +} diff --git a/content/publication/qiu-etal-2006-paraphrase/index.md b/content/publication/qiu-etal-2006-paraphrase/index.md new file mode 100644 index 0000000..60f4240 --- /dev/null +++ b/content/publication/qiu-etal-2006-paraphrase/index.md @@ -0,0 +1,16 @@ +--- +title: Paraphrase Recognition via Dissimilarity Significance Classification +authors: +- Long Qiu +- Min-Yen Kan +- Tat-Seng Chua +date: '2006-07-01' +publishDate: '2024-07-11T07:40:56.598259Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 2006 Conference on Empirical Methods in Natural + Language Processing*' +links: +- name: URL + url: https://aclanthology.org/W06-1603 +--- diff --git a/content/publication/qiu-etal-2008-modeling/cite.bib b/content/publication/qiu-etal-2008-modeling/cite.bib new file mode 100644 index 0000000..3c3f25b --- /dev/null +++ b/content/publication/qiu-etal-2008-modeling/cite.bib @@ -0,0 +1,9 @@ +@inproceedings{qiu-etal-2008-modeling, + author = {Qiu, Long and +Kan, Min-Yen and +Chua, Tat-Seng}, + booktitle = {Proceedings of the Third International Joint Conference on Natural Language Processing: Volume-I}, + title = {Modeling Context in Scenario Template Creation}, + url = {https://aclanthology.org/I08-1021}, + year = {2008} +} diff --git a/content/publication/qiu-etal-2008-modeling/index.md b/content/publication/qiu-etal-2008-modeling/index.md new file mode 100644 index 0000000..5fca088 --- /dev/null +++ b/content/publication/qiu-etal-2008-modeling/index.md @@ -0,0 +1,16 @@ +--- +title: Modeling Context in Scenario Template Creation +authors: +- Long Qiu +- Min-Yen Kan +- Tat-Seng Chua +date: '2008-01-01' +publishDate: '2024-07-11T07:40:56.567890Z' +publication_types: +- paper-conference +publication: '*Proceedings of the Third International Joint Conference on Natural + Language Processing: Volume-I*' +links: +- name: URL + url: https://aclanthology.org/I08-1021 +--- diff --git a/content/publication/ramesh-kashyap-etal-2021-analyzing/cite.bib b/content/publication/ramesh-kashyap-etal-2021-analyzing/cite.bib new file mode 100644 index 0000000..6d06f58 --- /dev/null +++ b/content/publication/ramesh-kashyap-etal-2021-analyzing/cite.bib @@ -0,0 +1,25 @@ +@inproceedings{ramesh-kashyap-etal-2021-analyzing, + abstract = {The robustness of pretrained language models(PLMs) is generally measured using performance drops on two or more domains. However, we do not yet understand the inherent robustness achieved by contributions from different layers of a PLM. We systematically analyze the robustness of these representations layer by layer from two perspectives. First, we measure the robustness of representations by using domain divergence between two domains. We find that i) Domain variance increases from the lower to the upper layers for vanilla PLMs; ii) Models continuously pretrained on domain-specific data (DAPT)(Gururangan et al., 2020) exhibit more variance than their pretrained PLM counterparts; and that iii) Distilled models (e.g., DistilBERT) also show greater domain variance. Second, we investigate the robustness of representations by analyzing the encoded syntactic and semantic information using diagnostic probes. We find that similar layers have similar amounts of linguistic information for data from an unseen domain.}, + address = {Kyiv, Ukraine}, + author = {Ramesh Kashyap, Abhinav and +Mehnaz, Laiba and +Malik, Bhavitvya and +Waheed, Abdul and +Hazarika, Devamanyu and +Kan, Min-Yen and +Shah, Rajiv Ratn}, + booktitle = {Proceedings of the Second Workshop on Domain Adaptation for NLP}, + editor = {Ben-David, Eyal and +Cohen, Shay and +McDonald, Ryan and +Plank, Barbara and +Reichart, Roi and +Rotman, Guy and +Ziser, Yftah}, + month = {April}, + pages = {222--244}, + publisher = {Association for Computational Linguistics}, + title = {Analyzing the Domain Robustness of Pretrained Language Models, Layer by Layer}, + url = {https://aclanthology.org/2021.adaptnlp-1.23}, + year = {2021} +} diff --git a/content/publication/ramesh-kashyap-etal-2021-analyzing/index.md b/content/publication/ramesh-kashyap-etal-2021-analyzing/index.md new file mode 100644 index 0000000..27ad5fb --- /dev/null +++ b/content/publication/ramesh-kashyap-etal-2021-analyzing/index.md @@ -0,0 +1,32 @@ +--- +title: Analyzing the Domain Robustness of Pretrained Language Models, Layer by Layer +authors: +- Abhinav Ramesh Kashyap +- Laiba Mehnaz +- Bhavitvya Malik +- Abdul Waheed +- Devamanyu Hazarika +- Min-Yen Kan +- Rajiv Ratn Shah +date: '2021-04-01' +publishDate: '2024-07-11T07:40:56.223020Z' +publication_types: +- paper-conference +publication: '*Proceedings of the Second Workshop on Domain Adaptation for NLP*' +abstract: The robustness of pretrained language models(PLMs) is generally measured + using performance drops on two or more domains. However, we do not yet understand + the inherent robustness achieved by contributions from different layers of a PLM. + We systematically analyze the robustness of these representations layer by layer + from two perspectives. First, we measure the robustness of representations by using + domain divergence between two domains. We find that i) Domain variance increases + from the lower to the upper layers for vanilla PLMs; ii) Models continuously pretrained + on domain-specific data (DAPT)(Gururangan et al., 2020) exhibit more variance than + their pretrained PLM counterparts; and that iii) Distilled models (e.g., DistilBERT) + also show greater domain variance. Second, we investigate the robustness of representations + by analyzing the encoded syntactic and semantic information using diagnostic probes. + We find that similar layers have similar amounts of linguistic information for data + from an unseen domain. +links: +- name: URL + url: https://aclanthology.org/2021.adaptnlp-1.23 +--- diff --git a/content/publication/ramesh-kashyap-kan-2020-sciwing/cite.bib b/content/publication/ramesh-kashyap-kan-2020-sciwing/cite.bib new file mode 100644 index 0000000..32e003d --- /dev/null +++ b/content/publication/ramesh-kashyap-kan-2020-sciwing/cite.bib @@ -0,0 +1,25 @@ +@inproceedings{ramesh-kashyap-kan-2020-sciwing, + abstract = {We introduce SciWING, an open-source soft-ware toolkit which provides access to state-of-the-art pre-trained models for scientific document processing (SDP) tasks, such as citation string parsing, logical structure recovery and citation intent classification. Compared to other toolkits, SciWING follows a full neural pipeline and provides a Python inter-face for SDP. When needed, SciWING provides fine-grained control for rapid experimentation with different models by swapping and stacking different modules. Transfer learning from general and scientific documents specific pre-trained transformers (i.e., BERT, SciBERT, etc.) can be performed. SciWING incorporates ready-to-use web and terminal-based applications and demonstrations to aid adoption and development. The toolkit is available from r̆lhttp://sciwing.io and the demos are available at \l̆http://rebrand.ly/sciwing-demo.}, + address = {Online}, + author = {Ramesh Kashyap, Abhinav and +Kan, Min-Yen}, + booktitle = {Proceedings of the First Workshop on Scholarly Document Processing}, + doi = {10.18653/v1/2020.sdp-1.13}, + editor = {Chandrasekaran, Muthu Kumar and +de Waard, Anita and +Feigenblat, Guy and +Freitag, Dayne and +Ghosal, Tirthankar and +Hovy, Eduard and +Knoth, Petr and +Konopnicki, David and +Mayr, Philipp and +Patton, Robert M. and +Shmueli-Scheuer, Michal}, + month = {November}, + pages = {113--120}, + publisher = {Association for Computational Linguistics}, + title = {SciWING-- A Software Toolkit for Scientific Document Processing}, + url = {https://aclanthology.org/2020.sdp-1.13}, + year = {2020} +} diff --git a/content/publication/ramesh-kashyap-kan-2020-sciwing/index.md b/content/publication/ramesh-kashyap-kan-2020-sciwing/index.md new file mode 100644 index 0000000..11f059d --- /dev/null +++ b/content/publication/ramesh-kashyap-kan-2020-sciwing/index.md @@ -0,0 +1,26 @@ +--- +title: SciWING-- A Software Toolkit for Scientific Document Processing +authors: +- Abhinav Ramesh Kashyap +- Min-Yen Kan +date: '2020-11-01' +publishDate: '2024-07-11T07:40:56.255395Z' +publication_types: +- paper-conference +publication: '*Proceedings of the First Workshop on Scholarly Document Processing*' +doi: 10.18653/v1/2020.sdp-1.13 +abstract: We introduce SciWING, an open-source soft-ware toolkit which provides access + to state-of-the-art pre-trained models for scientific document processing (SDP) + tasks, such as citation string parsing, logical structure recovery and citation + intent classification. Compared to other toolkits, SciWING follows a full neural + pipeline and provides a Python inter-face for SDP. When needed, SciWING provides + fine-grained control for rapid experimentation with different models by swapping + and stacking different modules. Transfer learning from general and scientific documents + specific pre-trained transformers (i.e., BERT, SciBERT, etc.) can be performed. + SciWING incorporates ready-to-use web and terminal-based applications and demonstrations + to aid adoption and development. The toolkit is available from r̆lhttp://sciwing.io + and the demos are available at l̆http://rebrand.ly/sciwing-demo. +links: +- name: URL + url: https://aclanthology.org/2020.sdp-1.13 +--- diff --git a/content/publication/setiawan-etal-2007-ordering/cite.bib b/content/publication/setiawan-etal-2007-ordering/cite.bib new file mode 100644 index 0000000..a8a0401 --- /dev/null +++ b/content/publication/setiawan-etal-2007-ordering/cite.bib @@ -0,0 +1,15 @@ +@inproceedings{setiawan-etal-2007-ordering, + address = {Prague, Czech Republic}, + author = {Setiawan, Hendra and +Kan, Min-Yen and +Li, Haizhou}, + booktitle = {Proceedings of the 45th Annual Meeting of the Association of Computational Linguistics}, + editor = {Zaenen, Annie and +van den Bosch, Antal}, + month = {June}, + pages = {712--719}, + publisher = {Association for Computational Linguistics}, + title = {Ordering Phrases with Function Words}, + url = {https://aclanthology.org/P07-1090}, + year = {2007} +} diff --git a/content/publication/setiawan-etal-2007-ordering/index.md b/content/publication/setiawan-etal-2007-ordering/index.md new file mode 100644 index 0000000..7942305 --- /dev/null +++ b/content/publication/setiawan-etal-2007-ordering/index.md @@ -0,0 +1,16 @@ +--- +title: Ordering Phrases with Function Words +authors: +- Hendra Setiawan +- Min-Yen Kan +- Haizhou Li +date: '2007-06-01' +publishDate: '2024-07-11T07:40:56.586029Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 45th Annual Meeting of the Association of Computational + Linguistics*' +links: +- name: URL + url: https://aclanthology.org/P07-1090 +--- diff --git a/content/publication/setiawan-etal-2009-topological/cite.bib b/content/publication/setiawan-etal-2009-topological/cite.bib new file mode 100644 index 0000000..ea29b9d --- /dev/null +++ b/content/publication/setiawan-etal-2009-topological/cite.bib @@ -0,0 +1,18 @@ +@inproceedings{setiawan-etal-2009-topological, + address = {Suntec, Singapore}, + author = {Setiawan, Hendra and +Kan, Min-Yen and +Li, Haizhou and +Resnik, Philip}, + booktitle = {Proceedings of the Joint Conference of the 47th Annual Meeting of the ACL and the 4th International Joint Conference on Natural Language Processing of the AFNLP}, + editor = {Su, Keh-Yih and +Su, Jian and +Wiebe, Janyce and +Li, Haizhou}, + month = {August}, + pages = {324--332}, + publisher = {Association for Computational Linguistics}, + title = {Topological Ordering of Function Words in Hierarchical Phrase-based Translation}, + url = {https://aclanthology.org/P09-1037}, + year = {2009} +} diff --git a/content/publication/setiawan-etal-2009-topological/index.md b/content/publication/setiawan-etal-2009-topological/index.md new file mode 100644 index 0000000..916bd5f --- /dev/null +++ b/content/publication/setiawan-etal-2009-topological/index.md @@ -0,0 +1,18 @@ +--- +title: Topological Ordering of Function Words in Hierarchical Phrase-based Translation +authors: +- Hendra Setiawan +- Min-Yen Kan +- Haizhou Li +- Philip Resnik +date: '2009-08-01' +publishDate: '2024-07-11T07:40:56.542475Z' +publication_types: +- paper-conference +publication: '*Proceedings of the Joint Conference of the 47th Annual Meeting of the + ACL and the 4th International Joint Conference on Natural Language Processing of + the AFNLP*' +links: +- name: URL + url: https://aclanthology.org/P09-1037 +--- diff --git a/content/publication/si-etal-2019-sentiment/cite.bib b/content/publication/si-etal-2019-sentiment/cite.bib new file mode 100644 index 0000000..140f102 --- /dev/null +++ b/content/publication/si-etal-2019-sentiment/cite.bib @@ -0,0 +1,26 @@ +@inproceedings{si-etal-2019-sentiment, + abstract = {Sentiment ambiguous lexicons refer to words where their polarity depends strongly on con- text. As such, when the context is absent, their translations or their embedded sentence ends up (incorrectly) being dependent on the training data. While neural machine translation (NMT) has achieved great progress in recent years, most systems aim to produce one single correct translation for a given source sentence. We investigate the translation variation in two sentiment scenarios. We perform experiments to study the preservation of sentiment during translation with three different methods that we propose. We conducted tests with both sentiment and non-sentiment bearing contexts to examine the effectiveness of our methods. We show that NMT can generate both positive- and negative-valent translations of a source sentence, based on a given input sentiment label. Empirical evaluations show that our valence-sensitive embedding (VSE) method significantly outperforms a sequence-to-sequence (seq2seq) baseline, both in terms of BLEU score and ambiguous word translation accuracy in test, given non-sentiment bearing contexts.}, + address = {Hong Kong, China}, + author = {Si, Chenglei and +Wu, Kui and +Aw, Ai Ti and +Kan, Min-Yen}, + booktitle = {Proceedings of the 6th Workshop on Asian Translation}, + doi = {10.18653/v1/D19-5227}, + editor = {Nakazawa, Toshiaki and +Ding, Chenchen and +Dabre, Raj and +Kunchukuttan, Anoop and +Doi, Nobushige and +Oda, Yusuke and +Bojar, Ondřej and +Parida, Shantipriya and +Goto, Isao and +Mino, Hidaya}, + month = {November}, + pages = {200--206}, + publisher = {Association for Computational Linguistics}, + title = {Sentiment Aware Neural Machine Translation}, + url = {https://aclanthology.org/D19-5227}, + year = {2019} +} diff --git a/content/publication/si-etal-2019-sentiment/index.md b/content/publication/si-etal-2019-sentiment/index.md new file mode 100644 index 0000000..7928181 --- /dev/null +++ b/content/publication/si-etal-2019-sentiment/index.md @@ -0,0 +1,31 @@ +--- +title: Sentiment Aware Neural Machine Translation +authors: +- Chenglei Si +- Kui Wu +- Ai Ti Aw +- Min-Yen Kan +date: '2019-11-01' +publishDate: '2024-07-11T07:40:56.334066Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 6th Workshop on Asian Translation*' +doi: 10.18653/v1/D19-5227 +abstract: Sentiment ambiguous lexicons refer to words where their polarity depends + strongly on con- text. As such, when the context is absent, their translations or + their embedded sentence ends up (incorrectly) being dependent on the training data. + While neural machine translation (NMT) has achieved great progress in recent years, + most systems aim to produce one single correct translation for a given source sentence. + We investigate the translation variation in two sentiment scenarios. We perform + experiments to study the preservation of sentiment during translation with three + different methods that we propose. We conducted tests with both sentiment and non-sentiment + bearing contexts to examine the effectiveness of our methods. We show that NMT can + generate both positive- and negative-valent translations of a source sentence, based + on a given input sentiment label. Empirical evaluations show that our valence-sensitive + embedding (VSE) method significantly outperforms a sequence-to-sequence (seq2seq) + baseline, both in terms of BLEU score and ambiguous word translation accuracy in + test, given non-sentiment bearing contexts. +links: +- name: URL + url: https://aclanthology.org/D19-5227 +--- diff --git a/content/publication/tan-etal-2006-extending/cite.bib b/content/publication/tan-etal-2006-extending/cite.bib new file mode 100644 index 0000000..53df326 --- /dev/null +++ b/content/publication/tan-etal-2006-extending/cite.bib @@ -0,0 +1,9 @@ +@inproceedings{tan-etal-2006-extending, + author = {Tan, Yee Fan and +Kan, Min-Yen and +Cui, Hang}, + booktitle = {Proceedings of the Workshop on Multi-word-expressions in a multilingual context}, + title = {Extending corpus-based identification of light verb constructions using a supervised learning framework}, + url = {https://aclanthology.org/W06-2407}, + year = {2006} +} diff --git a/content/publication/tan-etal-2006-extending/index.md b/content/publication/tan-etal-2006-extending/index.md new file mode 100644 index 0000000..50feaf7 --- /dev/null +++ b/content/publication/tan-etal-2006-extending/index.md @@ -0,0 +1,17 @@ +--- +title: Extending corpus-based identification of light verb constructions using a supervised + learning framework +authors: +- Yee Fan Tan +- Min-Yen Kan +- Hang Cui +date: '2006-01-01' +publishDate: '2024-07-11T07:40:56.592096Z' +publication_types: +- paper-conference +publication: '*Proceedings of the Workshop on Multi-word-expressions in a multilingual + context*' +links: +- name: URL + url: https://aclanthology.org/W06-2407 +--- diff --git a/content/publication/tan-etal-2020-mind/cite.bib b/content/publication/tan-etal-2020-mind/cite.bib new file mode 100644 index 0000000..7e7ad19 --- /dev/null +++ b/content/publication/tan-etal-2020-mind/cite.bib @@ -0,0 +1,20 @@ +@inproceedings{tan-etal-2020-mind, + abstract = {Inflectional variation is a common feature of World Englishes such as Colloquial Singapore English and African American Vernacular English. Although comprehension by human readers is usually unimpaired by non-standard inflections, current NLP systems are not yet robust. We propose Base-Inflection Encoding (BITE), a method to tokenize English text by reducing inflected words to their base forms before reinjecting the grammatical information as special symbols. Fine-tuning pretrained NLP models for downstream tasks using our encoding defends against inflectional adversaries while maintaining performance on clean data. Models using BITE generalize better to dialects with non-standard inflections without explicit training and translation models converge faster when trained with BITE. Finally, we show that our encoding improves the vocabulary efficiency of popular data-driven subword tokenizers. Since there has been no prior work on quantitatively evaluating vocabulary efficiency, we propose metrics to do so.}, + address = {Online}, + author = {Tan, Samson and +Joty, Shafiq and +Varshney, Lav and +Kan, Min-Yen}, + booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)}, + doi = {10.18653/v1/2020.emnlp-main.455}, + editor = {Webber, Bonnie and +Cohn, Trevor and +He, Yulan and +Liu, Yang}, + month = {November}, + pages = {5647--5663}, + publisher = {Association for Computational Linguistics}, + title = {Mind Your Inflections! Improving NLP for Non-Standard Englishes with Base-Inflection Encoding}, + url = {https://aclanthology.org/2020.emnlp-main.455}, + year = {2020} +} diff --git a/content/publication/tan-etal-2020-mind/index.md b/content/publication/tan-etal-2020-mind/index.md new file mode 100644 index 0000000..be5a5a1 --- /dev/null +++ b/content/publication/tan-etal-2020-mind/index.md @@ -0,0 +1,32 @@ +--- +title: Mind Your Inflections! Improving NLP for Non-Standard Englishes with Base-Inflection + Encoding +authors: +- Samson Tan +- Shafiq Joty +- Lav Varshney +- Min-Yen Kan +date: '2020-11-01' +publishDate: '2024-07-11T07:40:56.262379Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 2020 Conference on Empirical Methods in Natural + Language Processing (EMNLP)*' +doi: 10.18653/v1/2020.emnlp-main.455 +abstract: Inflectional variation is a common feature of World Englishes such as Colloquial + Singapore English and African American Vernacular English. Although comprehension + by human readers is usually unimpaired by non-standard inflections, current NLP + systems are not yet robust. We propose Base-Inflection Encoding (BITE), a method + to tokenize English text by reducing inflected words to their base forms before + reinjecting the grammatical information as special symbols. Fine-tuning pretrained + NLP models for downstream tasks using our encoding defends against inflectional + adversaries while maintaining performance on clean data. Models using BITE generalize + better to dialects with non-standard inflections without explicit training and translation + models converge faster when trained with BITE. Finally, we show that our encoding + improves the vocabulary efficiency of popular data-driven subword tokenizers. Since + there has been no prior work on quantitatively evaluating vocabulary efficiency, + we propose metrics to do so. +links: +- name: URL + url: https://aclanthology.org/2020.emnlp-main.455 +--- diff --git a/content/publication/tan-etal-2020-morphin/cite.bib b/content/publication/tan-etal-2020-morphin/cite.bib new file mode 100644 index 0000000..f69310c --- /dev/null +++ b/content/publication/tan-etal-2020-morphin/cite.bib @@ -0,0 +1,20 @@ +@inproceedings{tan-etal-2020-morphin, + abstract = {Training on only perfect Standard English corpora predisposes pre-trained neural networks to discriminate against minorities from non-standard linguistic backgrounds (e.g., African American Vernacular English, Colloquial Singapore English, etc.). We perturb the inflectional morphology of words to craft plausible and semantically similar adversarial examples that expose these biases in popular NLP models, e.g., BERT and Transformer, and show that adversarially fine-tuning them for a single epoch significantly improves robustness without sacrificing performance on clean data.}, + address = {Online}, + author = {Tan, Samson and +Joty, Shafiq and +Kan, Min-Yen and +Socher, Richard}, + booktitle = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics}, + doi = {10.18653/v1/2020.acl-main.263}, + editor = {Jurafsky, Dan and +Chai, Joyce and +Schluter, Natalie and +Tetreault, Joel}, + month = {July}, + pages = {2920--2935}, + publisher = {Association for Computational Linguistics}, + title = {It′s Morphin′ Time! Combating Linguistic Discrimination with Inflectional Perturbations}, + url = {https://aclanthology.org/2020.acl-main.263}, + year = {2020} +} diff --git a/content/publication/tan-etal-2020-morphin/index.md b/content/publication/tan-etal-2020-morphin/index.md new file mode 100644 index 0000000..dc8f1bd --- /dev/null +++ b/content/publication/tan-etal-2020-morphin/index.md @@ -0,0 +1,26 @@ +--- +title: It′s Morphin′ Time! Combating Linguistic Discrimination with Inflectional Perturbations +authors: +- Samson Tan +- Shafiq Joty +- Min-Yen Kan +- Richard Socher +date: '2020-07-01' +publishDate: '2024-07-11T07:40:56.313557Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 58th Annual Meeting of the Association for Computational + Linguistics*' +doi: 10.18653/v1/2020.acl-main.263 +abstract: Training on only perfect Standard English corpora predisposes pre-trained + neural networks to discriminate against minorities from non-standard linguistic + backgrounds (e.g., African American Vernacular English, Colloquial Singapore English, + etc.). We perturb the inflectional morphology of words to craft plausible and semantically + similar adversarial examples that expose these biases in popular NLP models, e.g., + BERT and Transformer, and show that adversarially fine-tuning them for a single + epoch significantly improves robustness without sacrificing performance on clean + data. +links: +- name: URL + url: https://aclanthology.org/2020.acl-main.263 +--- diff --git a/content/publication/tan-etal-2021-reliability/cite.bib b/content/publication/tan-etal-2021-reliability/cite.bib new file mode 100644 index 0000000..40a512d --- /dev/null +++ b/content/publication/tan-etal-2021-reliability/cite.bib @@ -0,0 +1,22 @@ +@inproceedings{tan-etal-2021-reliability, + abstract = {Questions of fairness, robustness, and transparency are paramount to address before deploying NLP systems. Central to these concerns is the question of reliability: Can NLP systems reliably treat different demographics fairly and function correctly in diverse and noisy environments? To address this, we argue for the need for reliability testing and contextualize it among existing work on improving accountability. We show how adversarial attacks can be reframed for this goal, via a framework for developing reliability tests. We argue that reliability testing --- with an emphasis on interdisciplinary collaboration --- will enable rigorous and targeted testing, and aid in the enactment and enforcement of industry standards.}, + address = {Online}, + author = {Tan, Samson and +Joty, Shafiq and +Baxter, Kathy and +Taeihagh, Araz and +Bennett, Gregory A. and +Kan, Min-Yen}, + booktitle = {Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)}, + doi = {10.18653/v1/2021.acl-long.321}, + editor = {Zong, Chengqing and +Xia, Fei and +Li, Wenjie and +Navigli, Roberto}, + month = {August}, + pages = {4153--4169}, + publisher = {Association for Computational Linguistics}, + title = {Reliability Testing for Natural Language Processing Systems}, + url = {https://aclanthology.org/2021.acl-long.321}, + year = {2021} +} diff --git a/content/publication/tan-etal-2021-reliability/index.md b/content/publication/tan-etal-2021-reliability/index.md new file mode 100644 index 0000000..d305009 --- /dev/null +++ b/content/publication/tan-etal-2021-reliability/index.md @@ -0,0 +1,30 @@ +--- +title: Reliability Testing for Natural Language Processing Systems +authors: +- Samson Tan +- Shafiq Joty +- Kathy Baxter +- Araz Taeihagh +- Gregory A. Bennett +- Min-Yen Kan +date: '2021-08-01' +publishDate: '2024-07-11T07:40:56.247998Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 59th Annual Meeting of the Association for Computational + Linguistics and the 11th International Joint Conference on Natural Language Processing + (Volume 1: Long Papers)*' +doi: 10.18653/v1/2021.acl-long.321 +abstract: 'Questions of fairness, robustness, and transparency are paramount to address + before deploying NLP systems. Central to these concerns is the question of reliability: + Can NLP systems reliably treat different demographics fairly and function correctly + in diverse and noisy environments? To address this, we argue for the need for reliability + testing and contextualize it among existing work on improving accountability. We + show how adversarial attacks can be reframed for this goal, via a framework for + developing reliability tests. We argue that reliability testing --- with an emphasis + on interdisciplinary collaboration --- will enable rigorous and targeted testing, + and aid in the enactment and enforcement of industry standards.' +links: +- name: URL + url: https://aclanthology.org/2021.acl-long.321 +--- diff --git a/content/publication/wang-etal-2012-tweeting/cite.bib b/content/publication/wang-etal-2012-tweeting/cite.bib new file mode 100644 index 0000000..e2fb329 --- /dev/null +++ b/content/publication/wang-etal-2012-tweeting/cite.bib @@ -0,0 +1,16 @@ +@inproceedings{wang-etal-2012-tweeting, + address = {Montréal, Canada}, + author = {Wang, Aobo and +Chen, Tao and +Kan, Min-Yen}, + booktitle = {Proceedings of the Second Workshop on Language in Social Media}, + editor = {Sood, Sara Owsley and +Nagarajan, Meenakshi and +Gamon, Michael}, + month = {June}, + pages = {46--55}, + publisher = {Association for Computational Linguistics}, + title = {Re-tweeting from a linguistic perspective}, + url = {https://aclanthology.org/W12-2106}, + year = {2012} +} diff --git a/content/publication/wang-etal-2012-tweeting/index.md b/content/publication/wang-etal-2012-tweeting/index.md new file mode 100644 index 0000000..6f38c5a --- /dev/null +++ b/content/publication/wang-etal-2012-tweeting/index.md @@ -0,0 +1,15 @@ +--- +title: Re-tweeting from a linguistic perspective +authors: +- Aobo Wang +- Tao Chen +- Min-Yen Kan +date: '2012-06-01' +publishDate: '2024-07-11T07:40:56.450801Z' +publication_types: +- paper-conference +publication: '*Proceedings of the Second Workshop on Language in Social Media*' +links: +- name: URL + url: https://aclanthology.org/W12-2106 +--- diff --git a/content/publication/wang-etal-2013-chinese/cite.bib b/content/publication/wang-etal-2013-chinese/cite.bib new file mode 100644 index 0000000..ceebda2 --- /dev/null +++ b/content/publication/wang-etal-2013-chinese/cite.bib @@ -0,0 +1,17 @@ +@inproceedings{wang-etal-2013-chinese, + address = {Nagoya, Japan}, + author = {Wang, Aobo and +Kan, Min-Yen and +Andrade, Daniel and +Onishi, Takashi and +Ishikawa, Kai}, + booktitle = {Proceedings of the Sixth International Joint Conference on Natural Language Processing}, + editor = {Mitkov, Ruslan and +Park, Jong C.}, + month = {October}, + pages = {127--135}, + publisher = {Asian Federation of Natural Language Processing}, + title = {Chinese Informal Word Normalization: an Experimental Study}, + url = {https://aclanthology.org/I13-1015}, + year = {2013} +} diff --git a/content/publication/wang-etal-2013-chinese/index.md b/content/publication/wang-etal-2013-chinese/index.md new file mode 100644 index 0000000..54d25e5 --- /dev/null +++ b/content/publication/wang-etal-2013-chinese/index.md @@ -0,0 +1,18 @@ +--- +title: 'Chinese Informal Word Normalization: an Experimental Study' +authors: +- Aobo Wang +- Min-Yen Kan +- Daniel Andrade +- Takashi Onishi +- Kai Ishikawa +date: '2013-10-01' +publishDate: '2024-07-11T07:40:56.426038Z' +publication_types: +- paper-conference +publication: '*Proceedings of the Sixth International Joint Conference on Natural + Language Processing*' +links: +- name: URL + url: https://aclanthology.org/I13-1015 +--- diff --git a/content/publication/wang-kan-2013-mining/cite.bib b/content/publication/wang-kan-2013-mining/cite.bib new file mode 100644 index 0000000..ff8f7ef --- /dev/null +++ b/content/publication/wang-kan-2013-mining/cite.bib @@ -0,0 +1,15 @@ +@inproceedings{wang-kan-2013-mining, + address = {Sofia, Bulgaria}, + author = {Wang, Aobo and +Kan, Min-Yen}, + booktitle = {Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, + editor = {Schuetze, Hinrich and +Fung, Pascale and +Poesio, Massimo}, + month = {August}, + pages = {731--741}, + publisher = {Association for Computational Linguistics}, + title = {Mining Informal Language from Chinese Microtext: Joint Word Recognition and Segmentation}, + url = {https://aclanthology.org/P13-1072}, + year = {2013} +} diff --git a/content/publication/wang-kan-2013-mining/index.md b/content/publication/wang-kan-2013-mining/index.md new file mode 100644 index 0000000..06c4f81 --- /dev/null +++ b/content/publication/wang-kan-2013-mining/index.md @@ -0,0 +1,16 @@ +--- +title: 'Mining Informal Language from Chinese Microtext: Joint Word Recognition and + Segmentation' +authors: +- Aobo Wang +- Min-Yen Kan +date: '2013-08-01' +publishDate: '2024-07-11T07:40:56.420009Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 51st Annual Meeting of the Association for Computational + Linguistics (Volume 1: Long Papers)*' +links: +- name: URL + url: https://aclanthology.org/P13-1072 +--- diff --git a/content/publication/ws-2009-2009-text/cite.bib b/content/publication/ws-2009-2009-text/cite.bib new file mode 100644 index 0000000..7ffaec2 --- /dev/null +++ b/content/publication/ws-2009-2009-text/cite.bib @@ -0,0 +1,10 @@ +@proceedings{ws-2009-2009-text, + address = {Suntec City, Singapore}, + editor = {Kan, Min-Yen and +Teufel, Simone}, + month = {August}, + publisher = {Association for Computational Linguistics}, + title = {Proceedings of the 2009 Workshop on Text and Citation Analysis for Scholarly Digital Libraries (NLPIR4DL)}, + url = {https://aclanthology.org/W09-3600}, + year = {2009} +} diff --git a/content/publication/ws-2009-2009-text/index.md b/content/publication/ws-2009-2009-text/index.md new file mode 100644 index 0000000..e0b1ef0 --- /dev/null +++ b/content/publication/ws-2009-2009-text/index.md @@ -0,0 +1,15 @@ +--- +title: Proceedings of the 2009 Workshop on Text and Citation Analysis for Scholarly + Digital Libraries (NLPIR4DL) +authors: +- Min-Yen Kan +- Simone Teufel +date: '2009-08-01' +publishDate: '2024-07-11T07:40:56.518007Z' +publication_types: +- book +publication: '*Association for Computational Linguistics*' +links: +- name: URL + url: https://aclanthology.org/W09-3600 +--- diff --git a/content/publication/ws-2016-joint/cite.bib b/content/publication/ws-2016-joint/cite.bib new file mode 100644 index 0000000..d4267e0 --- /dev/null +++ b/content/publication/ws-2016-joint/cite.bib @@ -0,0 +1,14 @@ +@proceedings{ws-2016-joint, + editor = {Cabanac, Guillaume and +Chandrasekaran, Muthu Kumar and +Frommholz, Ingo and +Jaidka, Kokil and +Kan, Min-Yen and +Mayr, Philipp and +Wolfram, Dietmar}, + month = {June}, + pages = {1--5}, + title = {Proceedings of the Joint Workshop on Bibliometric-enhanced Information Retrieval and Natural Language Processing for Digital Libraries (BIRNDL)}, + url = {https://aclanthology.org/W16-1500}, + year = {2016} +} diff --git a/content/publication/ws-2016-joint/index.md b/content/publication/ws-2016-joint/index.md new file mode 100644 index 0000000..0e3846c --- /dev/null +++ b/content/publication/ws-2016-joint/index.md @@ -0,0 +1,19 @@ +--- +title: Proceedings of the Joint Workshop on Bibliometric-enhanced Information Retrieval + and Natural Language Processing for Digital Libraries (BIRNDL) +authors: +- Guillaume Cabanac +- Muthu Kumar Chandrasekaran +- Ingo Frommholz +- Kokil Jaidka +- Min-Yen Kan +- Philipp Mayr +- Dietmar Wolfram +date: '2016-06-01' +publishDate: '2024-07-11T07:40:56.395203Z' +publication_types: +- book +links: +- name: URL + url: https://aclanthology.org/W16-1500 +--- diff --git a/content/publication/xie-etal-2020-exploring/cite.bib b/content/publication/xie-etal-2020-exploring/cite.bib new file mode 100644 index 0000000..0099ed3 --- /dev/null +++ b/content/publication/xie-etal-2020-exploring/cite.bib @@ -0,0 +1,20 @@ +@inproceedings{xie-etal-2020-exploring, + abstract = {Recent question generation (QG) approaches often utilize the sequence-to-sequence framework (Seq2Seq) to optimize the log likelihood of ground-truth questions using teacher forcing. However, this training objective is inconsistent with actual question quality, which is often reflected by certain global properties such as whether the question can be answered by the document. As such, we directly optimize for QG-specific objectives via reinforcement learning to improve question quality. We design three different rewards that target to improve the fluency, relevance, and answerability of generated questions. We conduct both automatic and human evaluations in addition to thorough analysis to explore the effect of each QG-specific reward. We find that optimizing on question-specific rewards generally leads to better performance in automatic evaluation metrics. However, only the rewards that correlate well with human judgement (e.g., relevance) lead to real improvement in question quality. Optimizing for the others, especially answerability, introduces incorrect bias to the model, resulting in poorer question quality. The code is publicly available at r̆lhttps://github.com/YuxiXie/RL-for-Question-Generation.}, + address = {Barcelona, Spain (Online)}, + author = {Xie, Yuxi and +Pan, Liangming and +Wang, Dongzhe and +Kan, Min-Yen and +Feng, Yansong}, + booktitle = {Proceedings of the 28th International Conference on Computational Linguistics}, + doi = {10.18653/v1/2020.coling-main.228}, + editor = {Scott, Donia and +Bel, Nuria and +Zong, Chengqing}, + month = {December}, + pages = {2534--2546}, + publisher = {International Committee on Computational Linguistics}, + title = {Exploring Question-Specific Rewards for Generating Deep Questions}, + url = {https://aclanthology.org/2020.coling-main.228}, + year = {2020} +} diff --git a/content/publication/xie-etal-2020-exploring/index.md b/content/publication/xie-etal-2020-exploring/index.md new file mode 100644 index 0000000..d272ffc --- /dev/null +++ b/content/publication/xie-etal-2020-exploring/index.md @@ -0,0 +1,33 @@ +--- +title: Exploring Question-Specific Rewards for Generating Deep Questions +authors: +- Yuxi Xie +- Liangming Pan +- Dongzhe Wang +- Min-Yen Kan +- Yansong Feng +date: '2020-12-01' +publishDate: '2024-07-11T07:40:56.276341Z' +publication_types: +- paper-conference +publication: '*Proceedings of the 28th International Conference on Computational Linguistics*' +doi: 10.18653/v1/2020.coling-main.228 +abstract: Recent question generation (QG) approaches often utilize the sequence-to-sequence + framework (Seq2Seq) to optimize the log likelihood of ground-truth questions using + teacher forcing. However, this training objective is inconsistent with actual question + quality, which is often reflected by certain global properties such as whether the + question can be answered by the document. As such, we directly optimize for QG-specific + objectives via reinforcement learning to improve question quality. We design three + different rewards that target to improve the fluency, relevance, and answerability + of generated questions. We conduct both automatic and human evaluations in addition + to thorough analysis to explore the effect of each QG-specific reward. We find that + optimizing on question-specific rewards generally leads to better performance in + automatic evaluation metrics. However, only the rewards that correlate well with + human judgement (e.g., relevance) lead to real improvement in question quality. + Optimizing for the others, especially answerability, introduces incorrect bias to + the model, resulting in poorer question quality. The code is publicly available + at r̆lhttps://github.com/YuxiXie/RL-for-Question-Generation. +links: +- name: URL + url: https://aclanthology.org/2020.coling-main.228 +---