diff --git a/content/_index.md b/content/_index.md
index 6ef4b49..2c7429c 100644
--- a/content/_index.md
+++ b/content/_index.md
@@ -8,13 +8,11 @@ sections:
   - block: hero
     content:
       title: |
-        Wowchemy
-        Research Group
+        Web IR / NLP Group (WING) @ NUS
       image:
         filename: welcome.jpg
       text: |
-        <br>
-        The **Web, Information Retrieval / Natural Language Processing Group (WING)** explores the research area of applied language processing and information retrieval to the Web and related technologies. Areas of current interest are question answering, scholarly digital libraries, verb similarity, focused crawling, citation parsing and spidering, web page classification and division, text segmentation, and full text analysis. WING is headed by A/P Min-Yen KAN. We are based in the Computational Linguistics Laboratory of the School of Computing at the National University of Singapore. We often work with the Natural Language Processing Group and the (Lab for Media Search)[http://lms.comp.nus.edu.sg/]. We are part of the Media Technologies research group umbrella.
+        The <strong>Web, Information Retrieval / Natural Language Processing Group (WING)</strong> explores the research area of applied language processing and information retrieval to the Web and related technologies. Areas of current interest are question answering, scholarly digital libraries, verb similarity, focused crawling, citation parsing and spidering, web page classification and division, text segmentation, and full text analysis. WING is headed by <A HREF="authors/min-yen-kan">Min</a> (A/P Min-Yen KAN). We are based in the Computational Linguistics Laboratory of the <a href="https://www.comp.nus.edu.sg">School of Computing</a> at the National University of Singapore. We often work with the Natural Language Processing Group and the <a href="http://lms.comp.nus.edu.sg/">Lab for Media Search</a>. We are part of the Media Technologies research group umbrella.
   
   - block: collection
     content:
@@ -73,7 +71,7 @@ sections:
       title:
       subtitle:
       text: |
-        {{% cta cta_link="./people/" cta_text="Meet the team →" %}}
+        {{% cta cta_link="./people/" %}}
     design:
       columns: '1'
 ---
diff --git a/content/authors/alumnus/_index.md b/content/authors/alumnus/_index.md
index 7fa329a..1d3a1e9 100644
--- a/content/authors/alumnus/_index.md
+++ b/content/authors/alumnus/_index.md
@@ -3,8 +3,8 @@
 title: Somebody
 
 # Full Name (for SEO)
-first_name: Min-Yen
-last_name: Kan
+first_name: Somebody
+last_name: Somewhere
 
 # Is this the primary user of the site?
 superuser: false
@@ -14,9 +14,9 @@ superuser: false
 #   For an email link, use "fas" icon pack, "envelope" icon, and a link in the
 #   form "mailto:your-email@example.com" or "#contact" for contact widget.
 social:
-  - icon: house
-    icon_pack: fas
-    link: https://www.comp.nus.edu.sg/~kanmy/
+  # - icon: house
+  #  icon_pack: fas
+  #  link: https://www.comp.nus.edu.sg/~kanmy/
 
 # Highlight the author in author lists? (true/false)
 highlight_name: false
@@ -27,5 +27,4 @@ user_groups:
   - Alumni
 
 ---
-
 An alumnus
\ No newline at end of file
diff --git a/content/authors/jason/_index.md b/content/authors/jason/_index.md
new file mode 100644
index 0000000..78b3832
--- /dev/null
+++ b/content/authors/jason/_index.md
@@ -0,0 +1,62 @@
+---
+# Display name
+title: Jason Qiu
+
+# Full Name (for SEO)
+first_name: Jason
+last_name: Qiu
+
+# Is this the primary user of the site?
+superuser: true
+
+# Role/position
+role: Undergraduate Student
+
+# Organizations/Affiliations
+organizations:
+  - name: National University of Singapore, School of Computing
+    url: 'http://www.comp.nus.edu.sg'
+
+# Short bio (displayed in user profile at end of posts)
+bio: FYP student
+
+interests:
+  - Artificial Intelligence
+  - Information Retrieval
+
+
+
+
+# Social/Academic Networking
+# For available icons, see: https://docs.hugoblox.com/getting-started/page-builder/#icons
+#   For an email link, use "fas" icon pack, "envelope" icon, and a link in the
+#   form "mailto:your-email@example.com" or "#contact" for contact widget.
+social:
+  - icon: house
+    icon_pack: fas
+    link: https://www.linkedin.com/in/jasonqiu212/
+  - icon: envelope
+    icon_pack: fas
+    link: 'mailto:jason.qiu@u.nus.edu'
+  
+# Link to a PDF of your resume/CV from the About widget.
+# To enable, copy your resume/CV to `static/files/cv.pdf` and uncomment the lines below.
+# - icon: cv
+#   icon_pack: ai
+#   link: files/cv.pdf
+
+# Enter email to display Gravatar (if Gravatar enabled in Config)
+email: 'jason.qiu@u.nus.edu'
+
+# Highlight the author in author lists? (true/false)
+highlight_name: false
+
+# Organizational groups that you belong to (for People widget)
+#   Set this to `[]` or comment out if you are not using People widget.
+user_groups:
+  - Undergraduate Students
+#  - Researchers
+---
+
+Jason Qiu is a FYP student joing our group in 2024. 
+
diff --git a/content/authors/jason/avatar.jpg b/content/authors/jason/avatar.jpg
new file mode 100644
index 0000000..bff0dcd
Binary files /dev/null and b/content/authors/jason/avatar.jpg differ
diff --git a/content/authors/min/_index.md b/content/authors/min/_index.md
index 083dbef..5e403d0 100644
--- a/content/authors/min/_index.md
+++ b/content/authors/min/_index.md
@@ -18,7 +18,7 @@ organizations:
     url: 'http://www.comp.nus.edu.sg'
 
 # Short bio (displayed in user profile at end of posts)
-bio: My research interests include distributed robotics, mobile computing and programmable matter.
+bio: WING lead; interests include Digital Libraries, Information Retrieval and Natural Language Processing. 
 
 interests:
   - Artificial Intelligence
@@ -76,11 +76,11 @@ highlight_name: false
 # Organizational groups that you belong to (for People widget)
 #   Set this to `[]` or comment out if you are not using People widget.
 user_groups:
-  - Principal Investigators
-  - Researchers
+  - Principal Investigator / Staff
+#  - Researchers
 ---
 
-Min-Yen Kan (BS;MS;PhD Columbia Univ.; SACM, SIEEE) is an Associate Professor and Vice Dean of Undergraduate Studies at the National University of Singapore. Min is an active member of the Association of Computational Linguistics (ACL), currently serving as a co-chair for the ACL Ethics Committee, and previously as the ACL Anthology Director (2008–2018). He is an associate editor for Information Retrieval and the survey editor for the Journal of AI Research (JAIR).
+Min-Yen Kan (BS;MS;PhD Columbia Univ.; SACM, SIEEE) is an Associate Professor and Vice Dean of Undergraduate Studies at the National University of Singapore. Min is an active member of the Association of Computational Linguistics (ACL), currently serving as a co-chair for the ACL Ethics Committee (ACL AEC), and previously as the ACL Anthology Director (2008–2018). He is an associate editor for Information Retrieval and the survey editor for the Journal of AI Research (JAIR).
 
 His research interests include digital libraries, natural language processing and information retrieval. He was recognized as a distinguished speaker by the ACM for natural language processing and digital libraries research. Specific projects include work in the areas of scientific discourse analysis, fact verification, full-text literature mining, lexical semantics and large language models. He leads the Web Information Retrieval / Natural Language Processing Group (WING.NUS) http://wing.comp.nus.edu.sg/ 
 
diff --git a/content/people/index.md b/content/people/index.md
index 9cdf184..a37488c 100644
--- a/content/people/index.md
+++ b/content/people/index.md
@@ -7,12 +7,11 @@ type: landing
 sections:
   - block: people
     content:
-      title: Meet the Team
+      # title: Meet the Team
       # Choose which groups/teams of users to display.
       #   Edit `user_groups` in each user's profile to add them to one or more of these groups.
       user_groups:
-          - Principal Investigators
-          - Staff
+          - Principal Investigator / Staff
           - Graduate Students
           - Undergraduate Students
           - Visitors / Interns
@@ -20,7 +19,7 @@ sections:
       sort_by: Params.last_name
       sort_ascending: true
     design:
-      show_interests: false
+      show_interests: true
       show_role: true
       show_social: true
 ---
\ No newline at end of file
diff --git a/content/publication/8743365/cite.bib b/content/publication/8743365/cite.bib
new file mode 100644
index 0000000..b164507
--- /dev/null
+++ b/content/publication/8743365/cite.bib
@@ -0,0 +1,11 @@
+@article{8743365,
+ author = {An, Ya-Hui and Pan, Liangming and Kan, Min-Yen and Dong, Qiang and Fu, Yan},
+ doi = {10.1109/ACCESS.2019.2924250},
+ journal = {IEEE Access},
+ keywords = {Context;Task analysis;Tagging;Message systems;Discussion forums;Context modeling;Semantics;Artificial intelligence;deep learning;hyperlinking;learning resources;MOOC discussion forums;name entity recognition},
+ number = {},
+ pages = {87887-87900},
+ title = {Resource Mention Extraction for MOOC Discussion Forums},
+ volume = {7},
+ year = {2019}
+}
diff --git a/content/publication/8743365/index.md b/content/publication/8743365/index.md
new file mode 100644
index 0000000..92f14cc
--- /dev/null
+++ b/content/publication/8743365/index.md
@@ -0,0 +1,19 @@
+---
+title: Resource Mention Extraction for MOOC Discussion Forums
+authors:
+- Ya-Hui An
+- Liangming Pan
+- min
+- Qiang Dong
+- Yan Fu
+date: '2019-01-01'
+publishDate: '2024-07-12T07:37:44.756935Z'
+publication_types:
+- article-journal
+publication: '*IEEE Access*'
+doi: 10.1109/ACCESS.2019.2924250
+tags:
+- Context;Task analysis;Tagging;Message systems;Discussion forums;Context modeling;Semantics;Artificial
+  intelligence;deep learning;hyperlinking;learning resources;MOOC discussion forums;name
+  entity recognition
+---
diff --git a/content/publication/acl-2017-association-linguistics/cite.bib b/content/publication/acl-2017-association-linguistics/cite.bib
new file mode 100644
index 0000000..c47d1af
--- /dev/null
+++ b/content/publication/acl-2017-association-linguistics/cite.bib
@@ -0,0 +1,11 @@
+@proceedings{acl-2017-association-linguistics,
+ address = {Vancouver, Canada},
+ doi = {10.18653/v1/P17-2},
+ editor = {Barzilay, Regina  and
+Kan, Min-Yen},
+ month = {July},
+ publisher = {Association for Computational Linguistics},
+ title = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
+ url = {https://aclanthology.org/P17-2000},
+ year = {2017}
+}
diff --git a/content/publication/acl-2017-association-linguistics/index.md b/content/publication/acl-2017-association-linguistics/index.md
new file mode 100644
index 0000000..a1e1bd1
--- /dev/null
+++ b/content/publication/acl-2017-association-linguistics/index.md
@@ -0,0 +1,16 @@
+---
+title: 'Proceedings of the 55th Annual Meeting of the Association for Computational
+  Linguistics (Volume 2: Short Papers)'
+authors:
+- Regina Barzilay
+- min
+date: '2017-07-01'
+publishDate: '2024-07-11T07:40:56.389063Z'
+publication_types:
+- book
+publication: '*Association for Computational Linguistics*'
+doi: 10.18653/v1/P17-2
+links:
+- name: URL
+  url: https://aclanthology.org/P17-2000
+---
diff --git a/content/publication/aksu-etal-2021-velocidapter/cite.bib b/content/publication/aksu-etal-2021-velocidapter/cite.bib
new file mode 100644
index 0000000..0603cb7
--- /dev/null
+++ b/content/publication/aksu-etal-2021-velocidapter/cite.bib
@@ -0,0 +1,26 @@
+@inproceedings{aksu-etal-2021-velocidapter,
+ abstract = {We introduce a synthetic dialogue generation framework, Velocidapter, which addresses the corpus availability problem for dialogue comprehension. Velocidapter augments datasets by simulating synthetic conversations for a task-oriented dialogue domain, requiring a small amount of bootstrapping work for each new domain. We evaluate the efficacy of our framework on a task-oriented dialogue comprehension dataset, MRCWOZ, which we curate by annotating questions for slots in the restaurant, taxi, and hotel domains of the MultiWOZ 2.2 dataset (Zang et al., 2020). We run experiments within a low-resource setting, where we pretrain a model on SQuAD, fine-tuning it on either a small original data or on the synthetic data generated by our framework. Velocidapter shows significant improvements using both the transformer-based BERTBase and BiDAF as base models. We further show that the framework is easy to use by novice users and conclude that Velocidapter can greatly help training over task-oriented dialogues, especially for low-resourced emerging domains.},
+ address = {Singapore and Online},
+ author = {Aksu, Ibrahim Taha  and
+Liu, Zhengyuan  and
+Kan, Min-Yen  and
+Chen, Nancy},
+ booktitle = {Proceedings of the 22nd Annual Meeting of the Special Interest Group on Discourse and Dialogue},
+ doi = {10.18653/v1/2021.sigdial-1.14},
+ editor = {Li, Haizhou  and
+Levow, Gina-Anne  and
+Yu, Zhou  and
+Gupta, Chitralekha  and
+Sisman, Berrak  and
+Cai, Siqi  and
+Vandyke, David  and
+Dethlefs, Nina  and
+Wu, Yan  and
+Li, Junyi Jessy},
+ month = {July},
+ pages = {133--143},
+ publisher = {Association for Computational Linguistics},
+ title = {Velocidapter: Task-oriented Dialogue Comprehension Modeling Pairing Synthetic Text Generation with Domain Adaptation},
+ url = {https://aclanthology.org/2021.sigdial-1.14},
+ year = {2021}
+}
diff --git a/content/publication/aksu-etal-2021-velocidapter/index.md b/content/publication/aksu-etal-2021-velocidapter/index.md
new file mode 100644
index 0000000..2b07848
--- /dev/null
+++ b/content/publication/aksu-etal-2021-velocidapter/index.md
@@ -0,0 +1,32 @@
+---
+title: 'Velocidapter: Task-oriented Dialogue Comprehension Modeling Pairing Synthetic
+  Text Generation with Domain Adaptation'
+authors:
+- Ibrahim Taha Aksu
+- Zhengyuan Liu
+- min  
+- Nancy Chen
+date: '2021-07-01'
+publishDate: '2024-07-05T17:09:42.645613Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 22nd Annual Meeting of the Special Interest Group
+  on Discourse and Dialogue*'
+doi: 10.18653/v1/2021.sigdial-1.14
+abstract: We introduce a synthetic dialogue generation framework, Velocidapter, which
+  addresses the corpus availability problem for dialogue comprehension. Velocidapter
+  augments datasets by simulating synthetic conversations for a task-oriented dialogue
+  domain, requiring a small amount of bootstrapping work for each new domain. We evaluate
+  the efficacy of our framework on a task-oriented dialogue comprehension dataset,
+  MRCWOZ, which we curate by annotating questions for slots in the restaurant, taxi,
+  and hotel domains of the MultiWOZ 2.2 dataset (Zang et al., 2020). We run experiments
+  within a low-resource setting, where we pretrain a model on SQuAD, fine-tuning it
+  on either a small original data or on the synthetic data generated by our framework.
+  Velocidapter shows significant improvements using both the transformer-based BERTBase
+  and BiDAF as base models. We further show that the framework is easy to use by novice
+  users and conclude that Velocidapter can greatly help training over task-oriented
+  dialogues, especially for low-resourced emerging domains.
+links:
+- name: URL
+  url: https://aclanthology.org/2021.sigdial-1.14
+---
diff --git a/content/publication/aksu-etal-2022-n/cite.bib b/content/publication/aksu-etal-2022-n/cite.bib
new file mode 100644
index 0000000..f789173
--- /dev/null
+++ b/content/publication/aksu-etal-2022-n/cite.bib
@@ -0,0 +1,19 @@
+@inproceedings{aksu-etal-2022-n,
+ abstract = {Augmentation of task-oriented dialogues has followed standard methods used for plain-text such as back-translation, word-level manipulation, and paraphrasing despite its richly annotated structure. In this work, we introduce an augmentation framework that utilizes belief state annotations to match turns from various dialogues and form new synthetic dialogues in a bottom-up manner. Unlike other augmentation strategies, it operates with as few as five examples. Our augmentation strategy yields significant improvements when both adapting a DST model to a new domain, and when adapting a language model to the DST task, on evaluations with TRADE and TOD-BERT models. Further analysis shows that our model performs better on seen values during training, and it is also more robust to unseen values. We conclude that exploiting belief state annotations enhances dialogue augmentation and results in improved models in n-shot training scenarios.},
+ address = {Dublin, Ireland},
+ author = {Aksu, Ibrahim  and
+Liu, Zhengyuan  and
+Kan, Min-Yen  and
+Chen, Nancy},
+ booktitle = {Findings of the Association for Computational Linguistics: ACL 2022},
+ doi = {10.18653/v1/2022.findings-acl.131},
+ editor = {Muresan, Smaranda  and
+Nakov, Preslav  and
+Villavicencio, Aline},
+ month = {May},
+ pages = {1659--1671},
+ publisher = {Association for Computational Linguistics},
+ title = {N-Shot Learning for Augmenting Task-Oriented Dialogue State Tracking},
+ url = {https://aclanthology.org/2022.findings-acl.131},
+ year = {2022}
+}
diff --git a/content/publication/aksu-etal-2022-n/index.md b/content/publication/aksu-etal-2022-n/index.md
new file mode 100644
index 0000000..230eec6
--- /dev/null
+++ b/content/publication/aksu-etal-2022-n/index.md
@@ -0,0 +1,29 @@
+---
+title: N-Shot Learning for Augmenting Task-Oriented Dialogue State Tracking
+authors:
+- Ibrahim Aksu
+- Zhengyuan Liu
+- min
+- Nancy Chen
+date: '2022-05-01'
+publishDate: '2024-07-05T17:09:42.588862Z'
+publication_types:
+- paper-conference
+publication: '*Findings of the Association for Computational Linguistics: ACL 2022*'
+doi: 10.18653/v1/2022.findings-acl.131
+abstract: Augmentation of task-oriented dialogues has followed standard methods used
+  for plain-text such as back-translation, word-level manipulation, and paraphrasing
+  despite its richly annotated structure. In this work, we introduce an augmentation
+  framework that utilizes belief state annotations to match turns from various dialogues
+  and form new synthetic dialogues in a bottom-up manner. Unlike other augmentation
+  strategies, it operates with as few as five examples. Our augmentation strategy
+  yields significant improvements when both adapting a DST model to a new domain,
+  and when adapting a language model to the DST task, on evaluations with TRADE and
+  TOD-BERT models. Further analysis shows that our model performs better on seen values
+  during training, and it is also more robust to unseen values. We conclude that exploiting
+  belief state annotations enhances dialogue augmentation and results in improved
+  models in n-shot training scenarios.
+links:
+- name: URL
+  url: https://aclanthology.org/2022.findings-acl.131
+---
diff --git a/content/publication/aksu-etal-2023-prompter/cite.bib b/content/publication/aksu-etal-2023-prompter/cite.bib
new file mode 100644
index 0000000..f47bdd0
--- /dev/null
+++ b/content/publication/aksu-etal-2023-prompter/cite.bib
@@ -0,0 +1,18 @@
+@inproceedings{aksu-etal-2023-prompter,
+ abstract = {A challenge in the Dialogue State Tracking (DST) field is adapting models to new domains without using any supervised data --- zero-shot domain adaptation. Parameter-Efficient Transfer Learning (PETL) has the potential to address this problem due to its robustness. However, it has yet to be applied to the zero-shot scenarios, as it is not clear how to apply it unsupervisedly. Our method, Prompter, uses descriptions of target domain slots to generate dynamic prefixes that are concatenated to the key and values at each layer′s self-attention mechanism. This allows for the use of prefix-tuning in zero-shot. Prompter outperforms previous methods on both the MultiWOZ and SGD benchmarks. In generating prefixes, our analyses find that Prompter not only utilizes the semantics of slot descriptions but also how often the slots appear together in conversation. Moreover, Prompter′s gains are due to its improved ability to distinguish ″none″-valued dialogue slots, compared against baselines.},
+ address = {Toronto, Canada},
+ author = {Aksu, Ibrahim Taha  and
+Kan, Min-Yen  and
+Chen, Nancy},
+ booktitle = {Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
+ doi = {10.18653/v1/2023.acl-long.252},
+ editor = {Rogers, Anna  and
+Boyd-Graber, Jordan  and
+Okazaki, Naoaki},
+ month = {July},
+ pages = {4588--4603},
+ publisher = {Association for Computational Linguistics},
+ title = {Prompter: Zero-shot Adaptive Prefixes for Dialogue State Tracking Domain Adaptation},
+ url = {https://aclanthology.org/2023.acl-long.252},
+ year = {2023}
+}
diff --git a/content/publication/aksu-etal-2023-prompter/index.md b/content/publication/aksu-etal-2023-prompter/index.md
new file mode 100644
index 0000000..f1ba6cd
--- /dev/null
+++ b/content/publication/aksu-etal-2023-prompter/index.md
@@ -0,0 +1,29 @@
+---
+title: 'Prompter: Zero-shot Adaptive Prefixes for Dialogue State Tracking Domain Adaptation'
+authors:
+- Ibrahim Taha Aksu
+- min
+- Nancy Chen
+date: '2023-07-01'
+publishDate: '2024-07-06T02:22:24.632344Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 61st Annual Meeting of the Association for Computational
+  Linguistics (Volume 1: Long Papers)*'
+doi: 10.18653/v1/2023.acl-long.252
+abstract: A challenge in the Dialogue State Tracking (DST) field is adapting models
+  to new domains without using any supervised data --- zero-shot domain adaptation.
+  Parameter-Efficient Transfer Learning (PETL) has the potential to address this problem
+  due to its robustness. However, it has yet to be applied to the zero-shot scenarios,
+  as it is not clear how to apply it unsupervisedly. Our method, Prompter, uses descriptions
+  of target domain slots to generate dynamic prefixes that are concatenated to the
+  key and values at each layer′s self-attention mechanism. This allows for the use
+  of prefix-tuning in zero-shot. Prompter outperforms previous methods on both the
+  MultiWOZ and SGD benchmarks. In generating prefixes, our analyses find that Prompter
+  not only utilizes the semantics of slot descriptions but also how often the slots
+  appear together in conversation. Moreover, Prompter′s gains are due to its improved
+  ability to distinguish ″none″-valued dialogue slots, compared against baselines.
+links:
+- name: URL
+  url: https://aclanthology.org/2023.acl-long.252
+---
diff --git a/content/publication/benotti-etal-2023-understanding/cite.bib b/content/publication/benotti-etal-2023-understanding/cite.bib
new file mode 100644
index 0000000..1770d61
--- /dev/null
+++ b/content/publication/benotti-etal-2023-understanding/cite.bib
@@ -0,0 +1,18 @@
+@inproceedings{benotti-etal-2023-understanding,
+ abstract = {With NLP research now quickly being transferred into real-world applications, it is important to be aware of and think through the consequences of our scientific investigation. Such ethical considerations are important in both authoring and reviewing. This tutorial will equip participants with basic guidelines for thinking deeply about ethical issues and review common considerations that recur in NLP research. The methodology is interactive and participatory, including case studies and working in groups. Importantly, the participants will be co-building the tutorial outcomes and will be working to create further tutorial materials to share as public outcomes.},
+ address = {Dubrovnik, Croatia},
+ author = {Benotti, Luciana  and
+Fort, Karën  and
+Kan, Min-Yen  and
+Tsvetkov, Yulia},
+ booktitle = {Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics: Tutorial Abstracts},
+ doi = {10.18653/v1/2023.eacl-tutorials.4},
+ editor = {Zanzotto, Fabio Massimo  and
+Pradhan, Sameer},
+ month = {May},
+ pages = {19--24},
+ publisher = {Association for Computational Linguistics},
+ title = {Understanding Ethics in NLP Authoring and Reviewing},
+ url = {https://aclanthology.org/2023.eacl-tutorials.4},
+ year = {2023}
+}
diff --git a/content/publication/benotti-etal-2023-understanding/index.md b/content/publication/benotti-etal-2023-understanding/index.md
new file mode 100644
index 0000000..66cd2b2
--- /dev/null
+++ b/content/publication/benotti-etal-2023-understanding/index.md
@@ -0,0 +1,26 @@
+---
+title: Understanding Ethics in NLP Authoring and Reviewing
+authors:
+- Luciana Benotti
+- Karën Fort
+- min
+- Yulia Tsvetkov
+date: '2023-05-01'
+publishDate: '2024-07-06T02:22:24.603387Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 17th Conference of the European Chapter of the Association
+  for Computational Linguistics: Tutorial Abstracts*'
+doi: 10.18653/v1/2023.eacl-tutorials.4
+abstract: With NLP research now quickly being transferred into real-world applications,
+  it is important to be aware of and think through the consequences of our scientific
+  investigation. Such ethical considerations are important in both authoring and reviewing.
+  This tutorial will equip participants with basic guidelines for thinking deeply
+  about ethical issues and review common considerations that recur in NLP research.
+  The methodology is interactive and participatory, including case studies and working
+  in groups. Importantly, the participants will be co-building the tutorial outcomes
+  and will be working to create further tutorial materials to share as public outcomes.
+links:
+- name: URL
+  url: https://aclanthology.org/2023.eacl-tutorials.4
+---
diff --git a/content/publication/bhola-etal-2020-retrieving/cite.bib b/content/publication/bhola-etal-2020-retrieving/cite.bib
new file mode 100644
index 0000000..d75806b
--- /dev/null
+++ b/content/publication/bhola-etal-2020-retrieving/cite.bib
@@ -0,0 +1,19 @@
+@inproceedings{bhola-etal-2020-retrieving,
+ abstract = {We introduce a deep learning model to learn the set of enumerated job skills associated with a job description. In our analysis of a large-scale government job portal mycareersfuture.sg, we observe that as much as 65% of job descriptions miss describing a significant number of relevant skills. Our model addresses this task from the perspective of an extreme multi-label classification (XMLC) problem, where descriptions are the evidence for the binary relevance of thousands of individual skills. Building upon the current state-of-the-art language modeling approaches such as BERT, we show our XMLC method improves on an existing baseline solution by over 9% and 7% absolute improvements in terms of recall and normalized discounted cumulative gain. We further show that our approach effectively addresses the missing skills problem, and helps in recovering relevant skills that were missed out in the job postings by taking into account the structured semantic representation of skills and their co-occurrences through a Correlation Aware Bootstrapping process. We further show that our approach, to ensure the BERT-XMLC model accounts for structured semantic representation of skills and their co-occurrences through a Correlation Aware Bootstrapping process, effectively addresses the missing skills problem, and helps in recovering relevant skills that were missed out in the job postings. To facilitate future research and replication of our work, we have made the dataset and the implementation of our model publicly available.},
+ address = {Barcelona, Spain (Online)},
+ author = {Bhola, Akshay  and
+Halder, Kishaloy  and
+Prasad, Animesh  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the 28th International Conference on Computational Linguistics},
+ doi = {10.18653/v1/2020.coling-main.513},
+ editor = {Scott, Donia  and
+Bel, Nuria  and
+Zong, Chengqing},
+ month = {December},
+ pages = {5832--5842},
+ publisher = {International Committee on Computational Linguistics},
+ title = {Retrieving Skills from Job Descriptions: A Language Model Based Extreme Multi-label Classification Framework},
+ url = {https://aclanthology.org/2020.coling-main.513},
+ year = {2020}
+}
diff --git a/content/publication/bhola-etal-2020-retrieving/index.md b/content/publication/bhola-etal-2020-retrieving/index.md
new file mode 100644
index 0000000..010da95
--- /dev/null
+++ b/content/publication/bhola-etal-2020-retrieving/index.md
@@ -0,0 +1,37 @@
+---
+title: 'Retrieving Skills from Job Descriptions: A Language Model Based Extreme Multi-label
+  Classification Framework'
+authors:
+- Akshay Bhola
+- Kishaloy Halder
+- Animesh Prasad
+- min
+date: '2020-12-01'
+publishDate: '2024-07-11T07:40:56.291153Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 28th International Conference on Computational Linguistics*'
+doi: 10.18653/v1/2020.coling-main.513
+abstract: We introduce a deep learning model to learn the set of enumerated job skills
+  associated with a job description. In our analysis of a large-scale government job
+  portal mycareersfuture.sg, we observe that as much as 65% of job descriptions miss
+  describing a significant number of relevant skills. Our model addresses this task
+  from the perspective of an extreme multi-label classification (XMLC) problem, where
+  descriptions are the evidence for the binary relevance of thousands of individual
+  skills. Building upon the current state-of-the-art language modeling approaches
+  such as BERT, we show our XMLC method improves on an existing baseline solution
+  by over 9% and 7% absolute improvements in terms of recall and normalized discounted
+  cumulative gain. We further show that our approach effectively addresses the missing
+  skills problem, and helps in recovering relevant skills that were missed out in
+  the job postings by taking into account the structured semantic representation of
+  skills and their co-occurrences through a Correlation Aware Bootstrapping process.
+  We further show that our approach, to ensure the BERT-XMLC model accounts for structured
+  semantic representation of skills and their co-occurrences through a Correlation
+  Aware Bootstrapping process, effectively addresses the missing skills problem, and
+  helps in recovering relevant skills that were missed out in the job postings. To
+  facilitate future research and replication of our work, we have made the dataset
+  and the implementation of our model publicly available.
+links:
+- name: URL
+  url: https://aclanthology.org/2020.coling-main.513
+---
diff --git a/content/publication/bird-etal-2008-acl/cite.bib b/content/publication/bird-etal-2008-acl/cite.bib
new file mode 100644
index 0000000..77787d9
--- /dev/null
+++ b/content/publication/bird-etal-2008-acl/cite.bib
@@ -0,0 +1,27 @@
+@inproceedings{bird-etal-2008-acl,
+ abstract = {The ACL Anthology is a digital archive of conference and journal papers in natural language processing and computational linguistics. Its primary purpose is to serve as a reference repository of research results, but we believe that it can also be an object of study and a platform for research in its own right. We describe an enriched and standardized reference corpus derived from the ACL Anthology that can be used for research in scholarly document processing. This corpus, which we call the ACL Anthology Reference Corpus (ACL ARC), brings together the recent activities of a number of research groups around the world. Our goal is to make the corpus widely available, and to encourage other researchers to use it as a standard testbed for experiments in both bibliographic and bibliometric research.},
+ address = {Marrakech, Morocco},
+ author = {Bird, Steven  and
+Dale, Robert  and
+Dorr, Bonnie  and
+Gibson, Bryan  and
+Joseph, Mark  and
+Kan, Min-Yen  and
+Lee, Dongwon  and
+Powley, Brett  and
+Radev, Dragomir  and
+Tan, Yee Fan},
+ booktitle = {Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08)},
+ editor = {Calzolari, Nicoletta  and
+Choukri, Khalid  and
+Maegaard, Bente  and
+Mariani, Joseph  and
+Odijk, Jan  and
+Piperidis, Stelios  and
+Tapias, Daniel},
+ month = {May},
+ publisher = {European Language Resources Association (ELRA)},
+ title = {The ACL Anthology Reference Corpus: A Reference Dataset for Bibliographic Research in Computational Linguistics},
+ url = {http://www.lrec-conf.org/proceedings/lrec2008/pdf/445_paper.pdf},
+ year = {2008}
+}
diff --git a/content/publication/bird-etal-2008-acl/index.md b/content/publication/bird-etal-2008-acl/index.md
new file mode 100644
index 0000000..84f8935
--- /dev/null
+++ b/content/publication/bird-etal-2008-acl/index.md
@@ -0,0 +1,32 @@
+---
+title: 'The ACL Anthology Reference Corpus: A Reference Dataset for Bibliographic
+  Research in Computational Linguistics'
+authors:
+- Steven Bird
+- Robert Dale
+- Bonnie Dorr
+- Bryan Gibson
+- Mark Joseph
+- min
+- Dongwon Lee
+- Brett Powley
+- Dragomir Radev
+- Yee Fan Tan
+date: '2008-05-01'
+publishDate: '2024-07-11T07:40:56.554775Z'
+publication_types:
+- paper-conference
+publication: "*Proceedings of the Sixth International Conference on Language Resources
+  and Evaluation (LREC'08)*"
+abstract: The ACL Anthology is a digital archive of conference and journal papers
+  in natural language processing and computational linguistics. Its primary purpose
+  is to serve as a reference repository of research results, but we believe that it
+  can also be an object of study and a platform for research in its own right. We
+  describe an enriched and standardized reference corpus derived from the ACL Anthology
+  that can be used for research in scholarly document processing. This corpus, which
+  we call the ACL Anthology Reference Corpus (ACL ARC), brings together the recent
+  activities of a number of research groups around the world. Our goal is to make
+  the corpus widely available, and to encourage other researchers to use it as a standard
+  testbed for experiments in both bibliographic and bibliometric research.
+url_pdf: http://www.lrec-conf.org/proceedings/lrec2008/pdf/445_paper.pdf
+---
diff --git a/content/publication/bysani-kan-2012-integrating/cite.bib b/content/publication/bysani-kan-2012-integrating/cite.bib
new file mode 100644
index 0000000..b20d1a8
--- /dev/null
+++ b/content/publication/bysani-kan-2012-integrating/cite.bib
@@ -0,0 +1,13 @@
+@inproceedings{bysani-kan-2012-integrating,
+ address = {Jeju Island, Korea},
+ author = {Bysani, Praveen  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the ACL-2012 Special Workshop on Rediscovering 50 Years of Discoveries},
+ editor = {Banchs, Rafael E.},
+ month = {July},
+ pages = {83--87},
+ publisher = {Association for Computational Linguistics},
+ title = {Integrating User-Generated Content in the ACL Anthology},
+ url = {https://aclanthology.org/W12-3209},
+ year = {2012}
+}
diff --git a/content/publication/bysani-kan-2012-integrating/index.md b/content/publication/bysani-kan-2012-integrating/index.md
new file mode 100644
index 0000000..6e2864b
--- /dev/null
+++ b/content/publication/bysani-kan-2012-integrating/index.md
@@ -0,0 +1,15 @@
+---
+title: Integrating User-Generated Content in the ACL Anthology
+authors:
+- Praveen Bysani
+- min
+date: '2012-07-01'
+publishDate: '2024-07-11T07:40:56.444716Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the ACL-2012 Special Workshop on Rediscovering 50 Years
+  of Discoveries*'
+links:
+- name: URL
+  url: https://aclanthology.org/W12-3209
+---
diff --git a/content/publication/cao-etal-2020-expertise/cite.bib b/content/publication/cao-etal-2020-expertise/cite.bib
new file mode 100644
index 0000000..5f265cc
--- /dev/null
+++ b/content/publication/cao-etal-2020-expertise/cite.bib
@@ -0,0 +1,22 @@
+@inproceedings{cao-etal-2020-expertise,
+ abstract = {The curse of knowledge can impede communication between experts and laymen. We propose a new task of expertise style transfer and contribute a manually annotated dataset with the goal of alleviating such cognitive biases. Solving this task not only simplifies the professional language, but also improves the accuracy and expertise level of laymen descriptions using simple words. This is a challenging task, unaddressed in previous work, as it requires the models to have expert intelligence in order to modify text with a deep understanding of domain knowledge and structures. We establish the benchmark performance of five state-of-the-art models for style transfer and text simplification. The results demonstrate a significant gap between machine and human performance. We also discuss the challenges of automatic evaluation, to provide insights into future research directions. The dataset is publicly available at r̆lhttps://srhthu.github.io/expertise-style-transfer/.},
+ address = {Online},
+ author = {Cao, Yixin  and
+Shui, Ruihao  and
+Pan, Liangming  and
+Kan, Min-Yen  and
+Liu, Zhiyuan  and
+Chua, Tat-Seng},
+ booktitle = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
+ doi = {10.18653/v1/2020.acl-main.100},
+ editor = {Jurafsky, Dan  and
+Chai, Joyce  and
+Schluter, Natalie  and
+Tetreault, Joel},
+ month = {July},
+ pages = {1061--1071},
+ publisher = {Association for Computational Linguistics},
+ title = {Expertise Style Transfer: A New Task Towards Better Communication between Experts and Laymen},
+ url = {https://aclanthology.org/2020.acl-main.100},
+ year = {2020}
+}
diff --git a/content/publication/cao-etal-2020-expertise/index.md b/content/publication/cao-etal-2020-expertise/index.md
new file mode 100644
index 0000000..964b048
--- /dev/null
+++ b/content/publication/cao-etal-2020-expertise/index.md
@@ -0,0 +1,33 @@
+---
+title: 'Expertise Style Transfer: A New Task Towards Better Communication between
+  Experts and Laymen'
+authors:
+- Yixin Cao
+- Ruihao Shui
+- Liangming Pan
+- min
+- Zhiyuan Liu
+- Tat-Seng Chua
+date: '2020-07-01'
+publishDate: '2024-07-11T07:40:56.298355Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 58th Annual Meeting of the Association for Computational
+  Linguistics*'
+doi: 10.18653/v1/2020.acl-main.100
+abstract: The curse of knowledge can impede communication between experts and laymen.
+  We propose a new task of expertise style transfer and contribute a manually annotated
+  dataset with the goal of alleviating such cognitive biases. Solving this task not
+  only simplifies the professional language, but also improves the accuracy and expertise
+  level of laymen descriptions using simple words. This is a challenging task, unaddressed
+  in previous work, as it requires the models to have expert intelligence in order
+  to modify text with a deep understanding of domain knowledge and structures. We
+  establish the benchmark performance of five state-of-the-art models for style transfer
+  and text simplification. The results demonstrate a significant gap between machine
+  and human performance. We also discuss the challenges of automatic evaluation, to
+  provide insights into future research directions. The dataset is publicly available
+  at r̆lhttps://srhthu.github.io/expertise-style-transfer/.
+links:
+- name: URL
+  url: https://aclanthology.org/2020.acl-main.100
+---
diff --git a/content/publication/chandrasekaran-kan-2018-countering/cite.bib b/content/publication/chandrasekaran-kan-2018-countering/cite.bib
new file mode 100644
index 0000000..e564431
--- /dev/null
+++ b/content/publication/chandrasekaran-kan-2018-countering/cite.bib
@@ -0,0 +1,18 @@
+@inproceedings{chandrasekaran-kan-2018-countering,
+ abstract = {We systematically confirm that instructors are strongly influenced by the user interface presentation of Massive Online Open Course (MOOC) discussion forums. In a large scale dataset, we conclusively show that instructor interventions exhibit strong position bias, as measured by the position where the thread appeared on the user interface at the time of intervention. We measure and remove this bias, enabling unbiased statistical modelling and evaluation. We show that our de-biased classifier improves predicting interventions over the state-of-the-art on courses with sufficient number of interventions by 8.2% in F1 and 24.4% in recall on average.},
+ address = {Melbourne, Australia},
+ author = {Chandrasekaran, Muthu Kumar  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the 5th Workshop on Natural Language Processing Techniques for Educational Applications},
+ doi = {10.18653/v1/W18-3720},
+ editor = {Tseng, Yuen-Hsien  and
+Chen, Hsin-Hsi  and
+Ng, Vincent  and
+Komachi, Mamoru},
+ month = {July},
+ pages = {135--142},
+ publisher = {Association for Computational Linguistics},
+ title = {Countering Position Bias in Instructor Interventions in MOOC Discussion Forums},
+ url = {https://aclanthology.org/W18-3720},
+ year = {2018}
+}
diff --git a/content/publication/chandrasekaran-kan-2018-countering/index.md b/content/publication/chandrasekaran-kan-2018-countering/index.md
new file mode 100644
index 0000000..d9529f4
--- /dev/null
+++ b/content/publication/chandrasekaran-kan-2018-countering/index.md
@@ -0,0 +1,24 @@
+---
+title: Countering Position Bias in Instructor Interventions in MOOC Discussion Forums
+authors:
+- Muthu Kumar Chandrasekaran
+- min
+date: '2018-07-01'
+publishDate: '2024-07-11T07:40:56.347855Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 5th Workshop on Natural Language Processing Techniques
+  for Educational Applications*'
+doi: 10.18653/v1/W18-3720
+abstract: We systematically confirm that instructors are strongly influenced by the
+  user interface presentation of Massive Online Open Course (MOOC) discussion forums.
+  In a large scale dataset, we conclusively show that instructor interventions exhibit
+  strong position bias, as measured by the position where the thread appeared on the
+  user interface at the time of intervention. We measure and remove this bias, enabling
+  unbiased statistical modelling and evaluation. We show that our de-biased classifier
+  improves predicting interventions over the state-of-the-art on courses with sufficient
+  number of interventions by 8.2% in F1 and 24.4% in recall on average.
+links:
+- name: URL
+  url: https://aclanthology.org/W18-3720
+---
diff --git a/content/publication/chen-etal-2015-interactive/cite.bib b/content/publication/chen-etal-2015-interactive/cite.bib
new file mode 100644
index 0000000..3eb7c92
--- /dev/null
+++ b/content/publication/chen-etal-2015-interactive/cite.bib
@@ -0,0 +1,20 @@
+@inproceedings{chen-etal-2015-interactive,
+ address = {Beijing, China},
+ author = {Chen, Tao  and
+Zheng, Naijia  and
+Zhao, Yue  and
+Chandrasekaran, Muthu Kumar  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the 2nd Workshop on Natural Language Processing Techniques for Educational Applications},
+ doi = {10.18653/v1/W15-4406},
+ editor = {Chen, Hsin-Hsi  and
+Tseng, Yuen-Hsien  and
+Matsumoto, Yuji  and
+Wong, Lung Hsiang},
+ month = {July},
+ pages = {34--42},
+ publisher = {Association for Computational Linguistics},
+ title = {Interactive Second Language Learning from News Websites},
+ url = {https://aclanthology.org/W15-4406},
+ year = {2015}
+}
diff --git a/content/publication/chen-etal-2015-interactive/index.md b/content/publication/chen-etal-2015-interactive/index.md
new file mode 100644
index 0000000..4f11bff
--- /dev/null
+++ b/content/publication/chen-etal-2015-interactive/index.md
@@ -0,0 +1,19 @@
+---
+title: Interactive Second Language Learning from News Websites
+authors:
+- Tao Chen
+- Naijia Zheng
+- Yue Zhao
+- Muthu Kumar Chandrasekaran
+- min
+date: '2015-07-01'
+publishDate: '2024-07-11T07:40:56.401303Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 2nd Workshop on Natural Language Processing Techniques
+  for Educational Applications*'
+doi: 10.18653/v1/W15-4406
+links:
+- name: URL
+  url: https://aclanthology.org/W15-4406
+---
diff --git a/content/publication/chow-etal-2023-travlr/cite.bib b/content/publication/chow-etal-2023-travlr/cite.bib
new file mode 100644
index 0000000..6121057
--- /dev/null
+++ b/content/publication/chow-etal-2023-travlr/cite.bib
@@ -0,0 +1,17 @@
+@inproceedings{chow-etal-2023-travlr,
+ abstract = {Numerous visio-linguistic (V+L) representation learning methods have been developed, yet existing datasets do not adequately evaluate the extent to which they represent visual and linguistic concepts in a unified space. We propose several novel evaluation settings for V+L models, including cross-modal transfer. Furthermore, existing V+L benchmarks often report global accuracy scores on the entire dataset, making it difficult to pinpoint the specific reasoning tasks that models fail and succeed at. We present TraVLR, a synthetic dataset comprising four V+L reasoning tasks. TraVLR′s synthetic nature allows us to constrain its training and testing distributions along task-relevant dimensions, enabling the evaluation of out-of-distribution generalisation. Each example in TraVLR redundantly encodes the scene in two modalities, allowing either to be dropped or added during training or testing without losing relevant information. We compare the performance of four state-of-the-art V+L models, finding that while they perform well on test examples from the same modality, they all fail at cross-modal transfer and have limited success accommodating the addition or deletion of one modality. We release TraVLR as an open challenge for the research community.},
+ address = {Dubrovnik, Croatia},
+ author = {Chow, Keng Ji  and
+Tan, Samson  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics},
+ doi = {10.18653/v1/2023.eacl-main.242},
+ editor = {Vlachos, Andreas  and
+Augenstein, Isabelle},
+ month = {May},
+ pages = {3322--3347},
+ publisher = {Association for Computational Linguistics},
+ title = {TraVLR: Now You See It, Now You Don′t! A Bimodal Dataset for Evaluating Visio-Linguistic Reasoning},
+ url = {https://aclanthology.org/2023.eacl-main.242},
+ year = {2023}
+}
diff --git a/content/publication/chow-etal-2023-travlr/index.md b/content/publication/chow-etal-2023-travlr/index.md
new file mode 100644
index 0000000..97ce7c8
--- /dev/null
+++ b/content/publication/chow-etal-2023-travlr/index.md
@@ -0,0 +1,34 @@
+---
+title: 'TraVLR: Now You See It, Now You Don′t! A Bimodal Dataset for Evaluating Visio-Linguistic
+  Reasoning'
+authors:
+- Keng Ji Chow
+- Samson Tan
+- min
+date: '2023-05-01'
+publishDate: '2024-07-06T02:22:24.618335Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 17th Conference of the European Chapter of the Association
+  for Computational Linguistics*'
+doi: 10.18653/v1/2023.eacl-main.242
+abstract: Numerous visio-linguistic (V+L) representation learning methods have been
+  developed, yet existing datasets do not adequately evaluate the extent to which
+  they represent visual and linguistic concepts in a unified space. We propose several
+  novel evaluation settings for V+L models, including cross-modal transfer. Furthermore,
+  existing V+L benchmarks often report global accuracy scores on the entire dataset,
+  making it difficult to pinpoint the specific reasoning tasks that models fail and
+  succeed at. We present TraVLR, a synthetic dataset comprising four V+L reasoning
+  tasks. TraVLR′s synthetic nature allows us to constrain its training and testing
+  distributions along task-relevant dimensions, enabling the evaluation of out-of-distribution
+  generalisation. Each example in TraVLR redundantly encodes the scene in two modalities,
+  allowing either to be dropped or added during training or testing without losing
+  relevant information. We compare the performance of four state-of-the-art V+L models,
+  finding that while they perform well on test examples from the same modality, they
+  all fail at cross-modal transfer and have limited success accommodating the addition
+  or deletion of one modality. We release TraVLR as an open challenge for the research
+  community.
+links:
+- name: URL
+  url: https://aclanthology.org/2023.eacl-main.242
+---
diff --git a/content/publication/councill-etal-2008-parscit/cite.bib b/content/publication/councill-etal-2008-parscit/cite.bib
new file mode 100644
index 0000000..88d06a2
--- /dev/null
+++ b/content/publication/councill-etal-2008-parscit/cite.bib
@@ -0,0 +1,20 @@
+@inproceedings{councill-etal-2008-parscit,
+ abstract = {We describe ParsCit, a freely available, open-source implementation of a reference string parsing package. At the core of ParsCit is a trained conditional random field (CRF) model used to label the token sequences in the reference string. A heuristic model wraps this core with added functionality to identify reference strings from a plain text file, and to retrieve the citation contexts. The package comes with utilities to run it as a web service or as a standalone utility. We compare ParsCit on three distinct reference string datasets and show that it compares well with other previously published work.},
+ address = {Marrakech, Morocco},
+ author = {Councill, Isaac  and
+Giles, C. Lee  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08)},
+ editor = {Calzolari, Nicoletta  and
+Choukri, Khalid  and
+Maegaard, Bente  and
+Mariani, Joseph  and
+Odijk, Jan  and
+Piperidis, Stelios  and
+Tapias, Daniel},
+ month = {May},
+ publisher = {European Language Resources Association (ELRA)},
+ title = {ParsCit: an Open-source CRF Reference String Parsing Package},
+ url = {http://www.lrec-conf.org/proceedings/lrec2008/pdf/166_paper.pdf},
+ year = {2008}
+}
diff --git a/content/publication/councill-etal-2008-parscit/index.md b/content/publication/councill-etal-2008-parscit/index.md
new file mode 100644
index 0000000..a58d1f1
--- /dev/null
+++ b/content/publication/councill-etal-2008-parscit/index.md
@@ -0,0 +1,22 @@
+---
+title: 'ParsCit: an Open-source CRF Reference String Parsing Package'
+authors:
+- Isaac Councill
+- C. Lee Giles
+- min
+date: '2008-05-01'
+publishDate: '2024-07-11T07:40:56.561520Z'
+publication_types:
+- paper-conference
+publication: "*Proceedings of the Sixth International Conference on Language Resources
+  and Evaluation (LREC'08)*"
+abstract: We describe ParsCit, a freely available, open-source implementation of a
+  reference string parsing package. At the core of ParsCit is a trained conditional
+  random field (CRF) model used to label the token sequences in the reference string.
+  A heuristic model wraps this core with added functionality to identify reference
+  strings from a plain text file, and to retrieve the citation contexts. The package
+  comes with utilities to run it as a web service or as a standalone utility. We compare
+  ParsCit on three distinct reference string datasets and show that it compares well
+  with other previously published work.
+url_pdf: http://www.lrec-conf.org/proceedings/lrec2008/pdf/166_paper.pdf
+---
diff --git a/content/publication/diao-etal-2023-doolittle/cite.bib b/content/publication/diao-etal-2023-doolittle/cite.bib
new file mode 100644
index 0000000..6b7a6b4
--- /dev/null
+++ b/content/publication/diao-etal-2023-doolittle/cite.bib
@@ -0,0 +1,23 @@
+@inproceedings{diao-etal-2023-doolittle,
+ abstract = {Improving the quality of academic writing is a meaningful but challenging task. Conventional methods of language refinement focus on narrow, specific linguistic features within isolated sentences, such as grammatical errors and improper word use. We propose a more general task, Academic Writing Formalization (AWF), to improve the overall quality of formal academic writing at the paragraph level. We formulate this language refinement task as a formal text style transfer task which transfers informal-academic text to formal-academic and contribute a large-scale non-parallel dataset, Doolittle, for this purpose. Concurrently, we apply a method named metric-oriented reinforcement learning (MORL) to two large language models (LLM) where we incorporate different levels of automatic feedback into the training process. Our experiments reveal that existing text transfer models and grammatical error correction models address certain aspects of AWF but still have a significant performance gap compared to human performance. Meanwhile, language models fine-tuned with our MORL method exhibit considerably improved performance, rivaling the latest chatbot ChatGPT, but still have a non-negligible gap compared to the ground truth formal-academic texts in Doolittle.},
+ address = {Singapore},
+ author = {Diao, Shizhe  and
+Lei, Yongyu  and
+Pan, Liangming  and
+Fang, Tianqing  and
+Zhou, Wangchunshu  and
+Keh, Sedrick  and
+Kan, Min-Yen  and
+Zhang, Tong},
+ booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing},
+ doi = {10.18653/v1/2023.emnlp-main.809},
+ editor = {Bouamor, Houda  and
+Pino, Juan  and
+Bali, Kalika},
+ month = {December},
+ pages = {13093--13111},
+ publisher = {Association for Computational Linguistics},
+ title = {Doolittle: Benchmarks and Corpora for Academic Writing Formalization},
+ url = {https://aclanthology.org/2023.emnlp-main.809},
+ year = {2023}
+}
diff --git a/content/publication/diao-etal-2023-doolittle/index.md b/content/publication/diao-etal-2023-doolittle/index.md
new file mode 100644
index 0000000..a81512a
--- /dev/null
+++ b/content/publication/diao-etal-2023-doolittle/index.md
@@ -0,0 +1,38 @@
+---
+title: 'Doolittle: Benchmarks and Corpora for Academic Writing Formalization'
+authors:
+- Shizhe Diao
+- Yongyu Lei
+- Liangming Pan
+- Tianqing Fang
+- Wangchunshu Zhou
+- Sedrick Keh
+- min
+- Tong Zhang
+date: '2023-12-01'
+publishDate: '2024-07-06T02:22:24.582277Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 2023 Conference on Empirical Methods in Natural
+  Language Processing*'
+doi: 10.18653/v1/2023.emnlp-main.809
+abstract: Improving the quality of academic writing is a meaningful but challenging
+  task. Conventional methods of language refinement focus on narrow, specific linguistic
+  features within isolated sentences, such as grammatical errors and improper word
+  use. We propose a more general task, Academic Writing Formalization (AWF), to improve
+  the overall quality of formal academic writing at the paragraph level. We formulate
+  this language refinement task as a formal text style transfer task which transfers
+  informal-academic text to formal-academic and contribute a large-scale non-parallel
+  dataset, Doolittle, for this purpose. Concurrently, we apply a method named metric-oriented
+  reinforcement learning (MORL) to two large language models (LLM) where we incorporate
+  different levels of automatic feedback into the training process. Our experiments
+  reveal that existing text transfer models and grammatical error correction models
+  address certain aspects of AWF but still have a significant performance gap compared
+  to human performance. Meanwhile, language models fine-tuned with our MORL method
+  exhibit considerably improved performance, rivaling the latest chatbot ChatGPT,
+  but still have a non-negligible gap compared to the ground truth formal-academic
+  texts in Doolittle.
+links:
+- name: URL
+  url: https://aclanthology.org/2023.emnlp-main.809
+---
diff --git a/content/publication/ding-etal-2023-cocoscisum/cite.bib b/content/publication/ding-etal-2023-cocoscisum/cite.bib
new file mode 100644
index 0000000..180f8f3
--- /dev/null
+++ b/content/publication/ding-etal-2023-cocoscisum/cite.bib
@@ -0,0 +1,18 @@
+@inproceedings{ding-etal-2023-cocoscisum,
+ abstract = {We present a novel toolkit for controlled summarization of scientific documents, designed for the specific needs of the scientific community. Our system generates summaries based on user preferences, adjusting key attributes specifically of length and keyword inclusion. A distinguishing feature is its ability to manage multiple attributes concurrently, demonstrating Compositional Controllability for Scientific Summarization (CocoSciSum). Benchmarked against the strong Flan-T5 baseline, CocoSciSum exhibits superior performance on both the quality of summaries generated and the control over single and multiple attributes. Moreover, CocoSciSum is a user-centric toolkit, supporting user preferences expressed in natural language instructions, and accommodating diverse input document formats. CocoSciSum is available on GitHub (https://github.com/WING-NUS/SciAssist/tree/CocoSciSum) with an introduction video (https://youtu.be/YC1YDeEjAbQ).},
+ address = {Singapore},
+ author = {Ding, Yixi  and
+Qin, Yanxia  and
+Liu, Qian  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: System Demonstrations},
+ doi = {10.18653/v1/2023.emnlp-demo.47},
+ editor = {Feng, Yansong  and
+Lefever, Els},
+ month = {December},
+ pages = {518--526},
+ publisher = {Association for Computational Linguistics},
+ title = {CocoSciSum: A Scientific Summarization Toolkit with Compositional Controllability},
+ url = {https://aclanthology.org/2023.emnlp-demo.47},
+ year = {2023}
+}
diff --git a/content/publication/ding-etal-2023-cocoscisum/index.md b/content/publication/ding-etal-2023-cocoscisum/index.md
new file mode 100644
index 0000000..a4cdd25
--- /dev/null
+++ b/content/publication/ding-etal-2023-cocoscisum/index.md
@@ -0,0 +1,30 @@
+---
+title: 'CocoSciSum: A Scientific Summarization Toolkit with Compositional Controllability'
+authors:
+- Yixi Ding
+- Yanxia Qin
+- Qian Liu
+- min
+date: '2023-12-01'
+publishDate: '2024-07-06T02:22:24.596464Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 2023 Conference on Empirical Methods in Natural
+  Language Processing: System Demonstrations*'
+doi: 10.18653/v1/2023.emnlp-demo.47
+abstract: We present a novel toolkit for controlled summarization of scientific documents,
+  designed for the specific needs of the scientific community. Our system generates
+  summaries based on user preferences, adjusting key attributes specifically of length
+  and keyword inclusion. A distinguishing feature is its ability to manage multiple
+  attributes concurrently, demonstrating Compositional Controllability for Scientific
+  Summarization (CocoSciSum). Benchmarked against the strong Flan-T5 baseline, CocoSciSum
+  exhibits superior performance on both the quality of summaries generated and the
+  control over single and multiple attributes. Moreover, CocoSciSum is a user-centric
+  toolkit, supporting user preferences expressed in natural language instructions,
+  and accommodating diverse input document formats. CocoSciSum is available on GitHub
+  (https://github.com/WING-NUS/SciAssist/tree/CocoSciSum) with an introduction video
+  (https://youtu.be/YC1YDeEjAbQ).
+links:
+- name: URL
+  url: https://aclanthology.org/2023.emnlp-demo.47
+---
diff --git a/content/publication/dou-etal-2022-towards/cite.bib b/content/publication/dou-etal-2022-towards/cite.bib
new file mode 100644
index 0000000..b6f3706
--- /dev/null
+++ b/content/publication/dou-etal-2022-towards/cite.bib
@@ -0,0 +1,24 @@
+@inproceedings{dou-etal-2022-towards,
+ abstract = {In this paper, we study the problem of knowledge-intensive text-to-SQL, in which domain knowledge is necessary to parse expert questions into SQL queries over domain-specific tables. We formalize this scenario by building a new benchmark KnowSQL consisting of domain-specific questions covering various domains. We then address this problem by representing formulaic knowledge rather than by annotating additional data examples. More concretely, we construct a formulaic knowledge bank as a domain knowledge base and propose a framework (ReGrouP) to leverage this formulaic knowledge during parsing. Experiments using ReGrouP demonstrate a significant 28.2% improvement overall on KnowSQL.},
+ address = {Abu Dhabi, United Arab Emirates},
+ author = {Dou, Longxu  and
+Gao, Yan  and
+Liu, Xuqi  and
+Pan, Mingyang  and
+Wang, Dingzirui  and
+Che, Wanxiang  and
+Zhan, Dechen  and
+Kan, Min-Yen  and
+Lou, Jian-Guang},
+ booktitle = {Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing},
+ doi = {10.18653/v1/2022.emnlp-main.350},
+ editor = {Goldberg, Yoav  and
+Kozareva, Zornitsa  and
+Zhang, Yue},
+ month = {December},
+ pages = {5240--5253},
+ publisher = {Association for Computational Linguistics},
+ title = {Towards Knowledge-Intensive Text-to-SQL Semantic Parsing with Formulaic Knowledge},
+ url = {https://aclanthology.org/2022.emnlp-main.350},
+ year = {2022}
+}
diff --git a/content/publication/dou-etal-2022-towards/index.md b/content/publication/dou-etal-2022-towards/index.md
new file mode 100644
index 0000000..7a92e1d
--- /dev/null
+++ b/content/publication/dou-etal-2022-towards/index.md
@@ -0,0 +1,32 @@
+---
+title: Towards Knowledge-Intensive Text-to-SQL Semantic Parsing with Formulaic Knowledge
+authors:
+- Longxu Dou
+- Yan Gao
+- Xuqi Liu
+- Mingyang Pan
+- Dingzirui Wang
+- Wanxiang Che
+- Dechen Zhan
+- min
+- Jian-Guang Lou
+date: '2022-12-01'
+publishDate: '2024-07-05T17:09:42.603419Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 2022 Conference on Empirical Methods in Natural
+  Language Processing*'
+doi: 10.18653/v1/2022.emnlp-main.350
+abstract: In this paper, we study the problem of knowledge-intensive text-to-SQL,
+  in which domain knowledge is necessary to parse expert questions into SQL queries
+  over domain-specific tables. We formalize this scenario by building a new benchmark
+  KnowSQL consisting of domain-specific questions covering various domains. We then
+  address this problem by representing formulaic knowledge rather than by annotating
+  additional data examples. More concretely, we construct a formulaic knowledge bank
+  as a domain knowledge base and propose a framework (ReGrouP) to leverage this formulaic
+  knowledge during parsing. Experiments using ReGrouP demonstrate a significant 28.2%
+  improvement overall on KnowSQL.
+links:
+- name: URL
+  url: https://aclanthology.org/2022.emnlp-main.350
+---
diff --git a/content/publication/elmacioglu-etal-2007-psnus/cite.bib b/content/publication/elmacioglu-etal-2007-psnus/cite.bib
new file mode 100644
index 0000000..e8215c0
--- /dev/null
+++ b/content/publication/elmacioglu-etal-2007-psnus/cite.bib
@@ -0,0 +1,18 @@
+@inproceedings{elmacioglu-etal-2007-psnus,
+ address = {Prague, Czech Republic},
+ author = {Elmacioglu, Ergin  and
+Tan, Yee Fan  and
+Yan, Su  and
+Kan, Min-Yen  and
+Lee, Dongwon},
+ booktitle = {Proceedings of the Fourth International Workshop on Semantic Evaluations (SemEval-2007)},
+ editor = {Agirre, Eneko  and
+Màrquez, Lluı́s  and
+Wicentowski, Richard},
+ month = {June},
+ pages = {268--271},
+ publisher = {Association for Computational Linguistics},
+ title = {PSNUS: Web People Name Disambiguation by Simple Clustering with Rich Features},
+ url = {https://aclanthology.org/S07-1058},
+ year = {2007}
+}
diff --git a/content/publication/elmacioglu-etal-2007-psnus/index.md b/content/publication/elmacioglu-etal-2007-psnus/index.md
new file mode 100644
index 0000000..f967639
--- /dev/null
+++ b/content/publication/elmacioglu-etal-2007-psnus/index.md
@@ -0,0 +1,18 @@
+---
+title: 'PSNUS: Web People Name Disambiguation by Simple Clustering with Rich Features'
+authors:
+- Ergin Elmacioglu
+- Yee Fan Tan
+- Su Yan
+- min
+- Dongwon Lee
+date: '2007-06-01'
+publishDate: '2024-07-11T07:40:56.579914Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the Fourth International Workshop on Semantic Evaluations
+  (SemEval-2007)*'
+links:
+- name: URL
+  url: https://aclanthology.org/S07-1058
+---
diff --git a/content/publication/gildea-etal-2018-acl/cite.bib b/content/publication/gildea-etal-2018-acl/cite.bib
new file mode 100644
index 0000000..09ae4cf
--- /dev/null
+++ b/content/publication/gildea-etal-2018-acl/cite.bib
@@ -0,0 +1,21 @@
+@inproceedings{gildea-etal-2018-acl,
+ abstract = {The Association of Computational Linguistic′s Anthology is the open source archive, and the main source for computational linguistics and natural language processing′s scientific literature. The ACL Anthology is currently maintained exclusively by community volunteers and has to be available and up-to-date at all times. We first discuss the current, open source approach used to achieve this, and then discuss how the planned use of Docker images will improve the Anthology′s long-term stability. This change will make it easier for researchers to utilize Anthology data for experimentation. We believe the ACL community can directly benefit from the extension-friendly architecture of the Anthology. We end by issuing an open challenge of reviewer matching we encourage the community to rally towards.},
+ address = {Melbourne, Australia},
+ author = {Gildea, Daniel  and
+Kan, Min-Yen  and
+Madnani, Nitin  and
+Teichmann, Christoph  and
+Villalba, Martı́n},
+ booktitle = {Proceedings of Workshop for NLP Open Source Software (NLP-OSS)},
+ doi = {10.18653/v1/W18-2504},
+ editor = {Park, Eunjeong L.  and
+Hagiwara, Masato  and
+Milajevs, Dmitrijs  and
+Tan, Liling},
+ month = {July},
+ pages = {23--28},
+ publisher = {Association for Computational Linguistics},
+ title = {The ACL Anthology: Current State and Future Directions},
+ url = {https://aclanthology.org/W18-2504},
+ year = {2018}
+}
diff --git a/content/publication/gildea-etal-2018-acl/index.md b/content/publication/gildea-etal-2018-acl/index.md
new file mode 100644
index 0000000..8184574
--- /dev/null
+++ b/content/publication/gildea-etal-2018-acl/index.md
@@ -0,0 +1,28 @@
+---
+title: 'The ACL Anthology: Current State and Future Directions'
+authors:
+- Daniel Gildea
+- min
+- Nitin Madnani
+- Christoph Teichmann
+- Martı́n Villalba
+date: '2018-07-01'
+publishDate: '2024-07-11T07:40:56.354411Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of Workshop for NLP Open Source Software (NLP-OSS)*'
+doi: 10.18653/v1/W18-2504
+abstract: The Association of Computational Linguistic′s Anthology is the open source
+  archive, and the main source for computational linguistics and natural language
+  processing′s scientific literature. The ACL Anthology is currently maintained exclusively
+  by community volunteers and has to be available and up-to-date at all times. We
+  first discuss the current, open source approach used to achieve this, and then discuss
+  how the planned use of Docker images will improve the Anthology′s long-term stability.
+  This change will make it easier for researchers to utilize Anthology data for experimentation.
+  We believe the ACL community can directly benefit from the extension-friendly architecture
+  of the Anthology. We end by issuing an open challenge of reviewer matching we encourage
+  the community to rally towards.
+links:
+- name: URL
+  url: https://aclanthology.org/W18-2504
+---
diff --git a/content/publication/halder-etal-2017-modeling/cite.bib b/content/publication/halder-etal-2017-modeling/cite.bib
new file mode 100644
index 0000000..f6bdf6d
--- /dev/null
+++ b/content/publication/halder-etal-2017-modeling/cite.bib
@@ -0,0 +1,18 @@
+@inproceedings{halder-etal-2017-modeling,
+ abstract = {Patients turn to Online Health Communities not only for information on specific conditions but also for emotional support. Previous research has indicated that the progression of emotional status can be studied through the linguistic patterns of an individual′s posts. We analyze a real-world dataset from the Mental Health section of HealthBoards.com. Estimated from the word usages in their posts, we find that the emotional progress across patients vary widely. We study the problem of predicting a patient′s emotional status in the future from her past posts and we propose a Recurrent Neural Network (RNN) based architecture to address it. We find that the future emotional status can be predicted with reasonable accuracy given her historical posts and participation features. Our evaluation results demonstrate the efficacy of our proposed architecture, by outperforming state-of-the-art approaches with over 0.13 reduction in Mean Absolute Error.},
+ address = {Copenhagen, Denmark},
+ author = {Halder, Kishaloy  and
+Poddar, Lahari  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the 8th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis},
+ doi = {10.18653/v1/W17-5217},
+ editor = {Balahur, Alexandra  and
+Mohammad, Saif M.  and
+van der Goot, Erik},
+ month = {September},
+ pages = {127--135},
+ publisher = {Association for Computational Linguistics},
+ title = {Modeling Temporal Progression of Emotional Status in Mental Health Forum: A Recurrent Neural Net Approach},
+ url = {https://aclanthology.org/W17-5217},
+ year = {2017}
+}
diff --git a/content/publication/halder-etal-2017-modeling/index.md b/content/publication/halder-etal-2017-modeling/index.md
new file mode 100644
index 0000000..7bced06
--- /dev/null
+++ b/content/publication/halder-etal-2017-modeling/index.md
@@ -0,0 +1,30 @@
+---
+title: 'Modeling Temporal Progression of Emotional Status in Mental Health Forum:
+  A Recurrent Neural Net Approach'
+authors:
+- Kishaloy Halder
+- Lahari Poddar
+- min
+date: '2017-09-01'
+publishDate: '2024-07-11T07:40:56.375219Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 8th Workshop on Computational Approaches to Subjectivity,
+  Sentiment and Social Media Analysis*'
+doi: 10.18653/v1/W17-5217
+abstract: Patients turn to Online Health Communities not only for information on specific
+  conditions but also for emotional support. Previous research has indicated that
+  the progression of emotional status can be studied through the linguistic patterns
+  of an individual′s posts. We analyze a real-world dataset from the Mental Health
+  section of HealthBoards.com. Estimated from the word usages in their posts, we find
+  that the emotional progress across patients vary widely. We study the problem of
+  predicting a patient′s emotional status in the future from her past posts and we
+  propose a Recurrent Neural Network (RNN) based architecture to address it. We find
+  that the future emotional status can be predicted with reasonable accuracy given
+  her historical posts and participation features. Our evaluation results demonstrate
+  the efficacy of our proposed architecture, by outperforming state-of-the-art approaches
+  with over 0.13 reduction in Mean Absolute Error.
+links:
+- name: URL
+  url: https://aclanthology.org/W17-5217
+---
diff --git a/content/publication/halder-etal-2019-predicting/cite.bib b/content/publication/halder-etal-2019-predicting/cite.bib
new file mode 100644
index 0000000..ec8cf42
--- /dev/null
+++ b/content/publication/halder-etal-2019-predicting/cite.bib
@@ -0,0 +1,18 @@
+@inproceedings{halder-etal-2019-predicting,
+ abstract = {Users participate in online discussion forums to learn from others and share their knowledge with the community. They often start a thread with a question or by sharing their new findings on a certain topic. We find that, unlike Community Question Answering, where questions are mostly factoid based, the threads in a forum are often open-ended (e.g., asking for recommendations from others) without a single correct answer. In this paper, we address the task of identifying helpful posts in a forum thread to help users comprehend long running discussion threads, which often contain repetitive or irrelevant posts. We propose a recurrent neural network based architecture to model (i) the relevance of a post regarding the original post starting the thread and (ii) the novelty it brings to the discussion, compared to the previous posts in the thread. Experimental results on different types of online forum datasets show that our model significantly outperforms the state-of-the-art neural network models for text classification.},
+ address = {Minneapolis, Minnesota},
+ author = {Halder, Kishaloy  and
+Kan, Min-Yen  and
+Sugiyama, Kazunari},
+ booktitle = {Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)},
+ doi = {10.18653/v1/N19-1318},
+ editor = {Burstein, Jill  and
+Doran, Christy  and
+Solorio, Thamar},
+ month = {June},
+ pages = {3148--3157},
+ publisher = {Association for Computational Linguistics},
+ title = {Predicting Helpful Posts in Open-Ended Discussion Forums: A Neural Architecture},
+ url = {https://aclanthology.org/N19-1318},
+ year = {2019}
+}
diff --git a/content/publication/halder-etal-2019-predicting/index.md b/content/publication/halder-etal-2019-predicting/index.md
new file mode 100644
index 0000000..fcdcdc1
--- /dev/null
+++ b/content/publication/halder-etal-2019-predicting/index.md
@@ -0,0 +1,31 @@
+---
+title: 'Predicting Helpful Posts in Open-Ended Discussion Forums: A Neural Architecture'
+authors:
+- Kishaloy Halder
+- min
+- Kazunari Sugiyama
+date: '2019-06-01'
+publishDate: '2024-07-11T07:40:56.327069Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 2019 Conference of the North American Chapter of
+  the Association for Computational Linguistics: Human Language Technologies, Volume
+  1 (Long and Short Papers)*'
+doi: 10.18653/v1/N19-1318
+abstract: Users participate in online discussion forums to learn from others and share
+  their knowledge with the community. They often start a thread with a question or
+  by sharing their new findings on a certain topic. We find that, unlike Community
+  Question Answering, where questions are mostly factoid based, the threads in a forum
+  are often open-ended (e.g., asking for recommendations from others) without a single
+  correct answer. In this paper, we address the task of identifying helpful posts
+  in a forum thread to help users comprehend long running discussion threads, which
+  often contain repetitive or irrelevant posts. We propose a recurrent neural network
+  based architecture to model (i) the relevance of a post regarding the original post
+  starting the thread and (ii) the novelty it brings to the discussion, compared to
+  the previous posts in the thread. Experimental results on different types of online
+  forum datasets show that our model significantly outperforms the state-of-the-art
+  neural network models for text classification.
+links:
+- name: URL
+  url: https://aclanthology.org/N19-1318
+---
diff --git a/content/publication/han-etal-2022-mm/cite.bib b/content/publication/han-etal-2022-mm/cite.bib
new file mode 100644
index 0000000..3bebc61
--- /dev/null
+++ b/content/publication/han-etal-2022-mm/cite.bib
@@ -0,0 +1,19 @@
+@inproceedings{han-etal-2022-mm,
+ abstract = {Existing multimodal tasks mostly target at the complete input modality setting, i.e., each modality is either complete or completely missing in both training and test sets. However, the randomly missing situations have still been underexplored. In this paper, we present a novel approach named MM-Align to address the missing-modality inference problem. Concretely, we propose 1) an alignment dynamics learning module based on the theory of optimal transport (OT) for missing data imputation; 2) a denoising training algorithm to enhance the quality of imputation as well as the accuracy of model predictions. Compared with previous generative methods which devote to restoring the missing inputs, MM-Align learns to capture and imitate the alignment dynamics between modality sequences. Results of comprehensive experiments on two multimodal tasks empirically demonstrate that our method can perform more accurate and faster inference and alleviate the overfitting issue under different missing conditions.},
+ address = {Abu Dhabi, United Arab Emirates},
+ author = {Han, Wei  and
+Chen, Hui  and
+Kan, Min-Yen  and
+Poria, Soujanya},
+ booktitle = {Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing},
+ doi = {10.18653/v1/2022.emnlp-main.717},
+ editor = {Goldberg, Yoav  and
+Kozareva, Zornitsa  and
+Zhang, Yue},
+ month = {December},
+ pages = {10498--10511},
+ publisher = {Association for Computational Linguistics},
+ title = {MM-Align: Learning Optimal Transport-based Alignment Dynamics for Fast and Accurate Inference on Missing Modality Sequences},
+ url = {https://aclanthology.org/2022.emnlp-main.717},
+ year = {2022}
+}
diff --git a/content/publication/han-etal-2022-mm/index.md b/content/publication/han-etal-2022-mm/index.md
new file mode 100644
index 0000000..9cb0ca8
--- /dev/null
+++ b/content/publication/han-etal-2022-mm/index.md
@@ -0,0 +1,32 @@
+---
+title: 'MM-Align: Learning Optimal Transport-based Alignment Dynamics for Fast and
+  Accurate Inference on Missing Modality Sequences'
+authors:
+- Wei Han
+- Hui Chen
+- min
+- Soujanya Poria
+date: '2022-12-01'
+publishDate: '2024-07-05T17:09:42.610472Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 2022 Conference on Empirical Methods in Natural
+  Language Processing*'
+doi: 10.18653/v1/2022.emnlp-main.717
+abstract: Existing multimodal tasks mostly target at the complete input modality setting,
+  i.e., each modality is either complete or completely missing in both training and
+  test sets. However, the randomly missing situations have still been underexplored.
+  In this paper, we present a novel approach named MM-Align to address the missing-modality
+  inference problem. Concretely, we propose 1) an alignment dynamics learning module
+  based on the theory of optimal transport (OT) for missing data imputation; 2) a
+  denoising training algorithm to enhance the quality of imputation as well as the
+  accuracy of model predictions. Compared with previous generative methods which devote
+  to restoring the missing inputs, MM-Align learns to capture and imitate the alignment
+  dynamics between modality sequences. Results of comprehensive experiments on two
+  multimodal tasks empirically demonstrate that our method can perform more accurate
+  and faster inference and alleviate the overfitting issue under different missing
+  conditions.
+links:
+- name: URL
+  url: https://aclanthology.org/2022.emnlp-main.717
+---
diff --git a/content/publication/han-etal-2024-self/cite.bib b/content/publication/han-etal-2024-self/cite.bib
new file mode 100644
index 0000000..c2645d0
--- /dev/null
+++ b/content/publication/han-etal-2024-self/cite.bib
@@ -0,0 +1,18 @@
+@inproceedings{han-etal-2024-self,
+ abstract = {Image--text models (ITMs) is the prevalent architecture to solve video question--answering tasks, which requires only a few input frames to save huge computational cost compared to video--language models.However, we find existent ITM video question--answering solutions either 1) adopt simplistic and unintentional sampling strategies, which may miss key frames to offer the answer clues; or 2) sample a large number of frames into divided groups, which the computational sources can not accommodate. In this work, we aim at an efficient sampling method towards the few-frame situations.We first summarize a family of prior sampling methods based on question--frame correlation into a unified one, dubbed *Most Implied Frames* (MIF). Through some primary results and analysis, Through analysis, we form a hypothesis that question-aware sampling is not necessary, from which we further propose the other method *Most Dominant Frames* (MDF).Experimental results on four public datasets and three advanced ITMs demonstrate that our proposed strategies can boost the performance for image--text pretrained models, and have a wide application scenario in terms of model architectures and dataset types. Our code is available at https://github.com/declare-lab/Sealingr̆lhttps://github.com/declare-lab/Sealing.},
+ address = {Mexico City, Mexico},
+ author = {Han, Wei  and
+Chen, Hui  and
+Kan, Min-Yen  and
+Poria, Soujanya},
+ booktitle = {Findings of the Association for Computational Linguistics: NAACL 2024},
+ editor = {Duh, Kevin  and
+Gomez, Helena  and
+Bethard, Steven},
+ month = {June},
+ pages = {2522--2534},
+ publisher = {Association for Computational Linguistics},
+ title = {Self-Adaptive Sampling for Accurate Video Question Answering on Image Text Models},
+ url = {https://aclanthology.org/2024.findings-naacl.162},
+ year = {2024}
+}
diff --git a/content/publication/han-etal-2024-self/index.md b/content/publication/han-etal-2024-self/index.md
new file mode 100644
index 0000000..0688a16
--- /dev/null
+++ b/content/publication/han-etal-2024-self/index.md
@@ -0,0 +1,32 @@
+---
+title: Self-Adaptive Sampling for Accurate Video Question Answering on Image Text
+  Models
+authors:
+- Wei Han
+- Hui Chen
+- min
+- Soujanya Poria
+date: '2024-06-01'
+publishDate: '2024-07-05T17:09:42.578623Z'
+publication_types:
+- paper-conference
+publication: '*Findings of the Association for Computational Linguistics: NAACL 2024*'
+abstract: Image--text models (ITMs) is the prevalent architecture to solve video question--answering
+  tasks, which requires only a few input frames to save huge computational cost compared
+  to video--language models.However, we find existent ITM video question--answering
+  solutions either 1) adopt simplistic and unintentional sampling strategies, which
+  may miss key frames to offer the answer clues; or 2) sample a large number of frames
+  into divided groups, which the computational sources can not accommodate. In this
+  work, we aim at an efficient sampling method towards the few-frame situations.We
+  first summarize a family of prior sampling methods based on question--frame correlation
+  into a unified one, dubbed *Most Implied Frames* (MIF). Through some primary results
+  and analysis, Through analysis, we form a hypothesis that question-aware sampling
+  is not necessary, from which we further propose the other method *Most Dominant
+  Frames* (MDF).Experimental results on four public datasets and three advanced ITMs
+  demonstrate that our proposed strategies can boost the performance for image--text
+  pretrained models, and have a wide application scenario in terms of model architectures
+  and dataset types. Our code is available at https://github.com/declare-lab/Sealingr̆lhttps://github.com/declare-lab/Sealing.
+links:
+- name: URL
+  url: https://aclanthology.org/2024.findings-naacl.162
+---
diff --git a/content/publication/hoang-etal-2009-examination/cite.bib b/content/publication/hoang-etal-2009-examination/cite.bib
new file mode 100644
index 0000000..c39ec7c
--- /dev/null
+++ b/content/publication/hoang-etal-2009-examination/cite.bib
@@ -0,0 +1,17 @@
+@inproceedings{hoang-etal-2009-examination,
+ address = {Singapore},
+ author = {Hoang, Hung Huu  and
+Kim, Su Nam  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the Workshop on Multiword Expressions: Identification, Interpretation, Disambiguation and Applications (MWE 2009)},
+ editor = {Anastasiou, Dimitra  and
+Hashimoto, Chikara  and
+Nakov, Preslav  and
+Kim, Su Nam},
+ month = {August},
+ pages = {31--39},
+ publisher = {Association for Computational Linguistics},
+ title = {A re-examination of lexical association measures},
+ url = {https://aclanthology.org/W09-2905},
+ year = {2009}
+}
diff --git a/content/publication/hoang-etal-2009-examination/index.md b/content/publication/hoang-etal-2009-examination/index.md
new file mode 100644
index 0000000..ff1255a
--- /dev/null
+++ b/content/publication/hoang-etal-2009-examination/index.md
@@ -0,0 +1,16 @@
+---
+title: A re-examination of lexical association measures
+authors:
+- Hung Huu Hoang
+- Su Nam Kim
+- min
+date: '2009-08-01'
+publishDate: '2024-07-11T07:40:56.530100Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the Workshop on Multiword Expressions: Identification,
+  Interpretation, Disambiguation and Applications (MWE 2009)*'
+links:
+- name: URL
+  url: https://aclanthology.org/W09-2905
+---
diff --git a/content/publication/hoang-kan-2010-towards/cite.bib b/content/publication/hoang-kan-2010-towards/cite.bib
new file mode 100644
index 0000000..870bd89
--- /dev/null
+++ b/content/publication/hoang-kan-2010-towards/cite.bib
@@ -0,0 +1,14 @@
+@inproceedings{hoang-kan-2010-towards,
+ address = {Beijing, China},
+ author = {Hoang, Cong Duy Vu  and
+Kan, Min-Yen},
+ booktitle = {Coling 2010: Posters},
+ editor = {Huang, Chu-Ren  and
+Jurafsky, Dan},
+ month = {August},
+ pages = {427--435},
+ publisher = {Coling 2010 Organizing Committee},
+ title = {Towards Automated Related Work Summarization},
+ url = {https://aclanthology.org/C10-2049},
+ year = {2010}
+}
diff --git a/content/publication/hoang-kan-2010-towards/index.md b/content/publication/hoang-kan-2010-towards/index.md
new file mode 100644
index 0000000..19aaf6b
--- /dev/null
+++ b/content/publication/hoang-kan-2010-towards/index.md
@@ -0,0 +1,14 @@
+---
+title: Towards Automated Related Work Summarization
+authors:
+- Cong Duy Vu Hoang
+- min
+date: '2010-08-01'
+publishDate: '2024-07-11T07:40:56.499873Z'
+publication_types:
+- paper-conference
+publication: '*Coling 2010: Posters*'
+links:
+- name: URL
+  url: https://aclanthology.org/C10-2049
+---
diff --git a/content/publication/huang-etal-2022-lightweight/cite.bib b/content/publication/huang-etal-2022-lightweight/cite.bib
new file mode 100644
index 0000000..d803db8
--- /dev/null
+++ b/content/publication/huang-etal-2022-lightweight/cite.bib
@@ -0,0 +1,27 @@
+@inproceedings{huang-etal-2022-lightweight,
+ abstract = {Logical structure recovery in scientific articles associates text with a semantic section of the article. Although previous work has disregarded the surrounding context of a line, we model this important information by employing line-level attention on top of a transformer-based scientific document processing pipeline. With the addition of loss function engineering and data augmentation techniques with semi-supervised learning, our method improves classification performance by 10% compared to a recent state-of-the-art model. Our parsimonious, text-only method achieves a performance comparable to that of other works that use rich document features such as font and spatial position, using less data without sacrificing performance, resulting in a lightweight training pipeline.},
+ address = {Gyeongju, Republic of Korea},
+ author = {Huang, Po-Wei  and
+Ramesh Kashyap, Abhinav  and
+Qin, Yanxia  and
+Yang, Yajing  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the Third Workshop on Scholarly Document Processing},
+ editor = {Cohan, Arman  and
+Feigenblat, Guy  and
+Freitag, Dayne  and
+Ghosal, Tirthankar  and
+Herrmannova, Drahomira  and
+Knoth, Petr  and
+Lo, Kyle  and
+Mayr, Philipp  and
+Shmueli-Scheuer, Michal  and
+de Waard, Anita  and
+Wang, Lucy Lu},
+ month = {October},
+ pages = {37--48},
+ publisher = {Association for Computational Linguistics},
+ title = {Lightweight Contextual Logical Structure Recovery},
+ url = {https://aclanthology.org/2022.sdp-1.5},
+ year = {2022}
+}
diff --git a/content/publication/huang-etal-2022-lightweight/index.md b/content/publication/huang-etal-2022-lightweight/index.md
new file mode 100644
index 0000000..a456c54
--- /dev/null
+++ b/content/publication/huang-etal-2022-lightweight/index.md
@@ -0,0 +1,27 @@
+---
+title: Lightweight Contextual Logical Structure Recovery
+authors:
+- Po-Wei Huang
+- Abhinav Ramesh Kashyap
+- Yanxia Qin
+- Yajing Yang
+- min
+date: '2022-10-01'
+publishDate: '2024-07-05T10:15:26.841390Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the Third Workshop on Scholarly Document Processing*'
+abstract: Logical structure recovery in scientific articles associates text with a
+  semantic section of the article. Although previous work has disregarded the surrounding
+  context of a line, we model this important information by employing line-level attention
+  on top of a transformer-based scientific document processing pipeline. With the
+  addition of loss function engineering and data augmentation techniques with semi-supervised
+  learning, our method improves classification performance by 10% compared to a recent
+  state-of-the-art model. Our parsimonious, text-only method achieves a performance
+  comparable to that of other works that use rich document features such as font and
+  spatial position, using less data without sacrificing performance, resulting in
+  a lightweight training pipeline.
+links:
+- name: URL
+  url: https://aclanthology.org/2022.sdp-1.5
+---
diff --git a/content/publication/jain-etal-2022-comparative/cite.bib b/content/publication/jain-etal-2022-comparative/cite.bib
new file mode 100644
index 0000000..d26e042
--- /dev/null
+++ b/content/publication/jain-etal-2022-comparative/cite.bib
@@ -0,0 +1,21 @@
+@inproceedings{jain-etal-2022-comparative,
+ abstract = {We model products′ reviews to generate comparative responses consisting of positive and negative experiences regarding the product. Specifically, we generate a single-sentence, comparative response from a given positive and a negative opinion. We contribute the first dataset for this task of Comparative Snippet Generation from contrasting opinions regarding a product, and an analysis of performance of a pre-trained BERT model to generate such snippets.},
+ address = {Dublin, Ireland},
+ author = {Jain, Saurabh  and
+Miao, Yisong  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the Fifth Workshop on e-Commerce and NLP (ECNLP 5)},
+ doi = {10.18653/v1/2022.ecnlp-1.7},
+ editor = {Malmasi, Shervin  and
+Rokhlenko, Oleg  and
+Ueffing, Nicola  and
+Guy, Ido  and
+Agichtein, Eugene  and
+Kallumadi, Surya},
+ month = {May},
+ pages = {49--57},
+ publisher = {Association for Computational Linguistics},
+ title = {Comparative Snippet Generation},
+ url = {https://aclanthology.org/2022.ecnlp-1.7},
+ year = {2022}
+}
diff --git a/content/publication/jain-etal-2022-comparative/index.md b/content/publication/jain-etal-2022-comparative/index.md
new file mode 100644
index 0000000..cb6c493
--- /dev/null
+++ b/content/publication/jain-etal-2022-comparative/index.md
@@ -0,0 +1,22 @@
+---
+title: Comparative Snippet Generation
+authors:
+- Saurabh Jain
+- Yisong Miao
+- min
+date: '2022-05-01'
+publishDate: '2024-07-05T17:09:42.617512Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the Fifth Workshop on e-Commerce and NLP (ECNLP 5)*'
+doi: 10.18653/v1/2022.ecnlp-1.7
+abstract: We model products′ reviews to generate comparative responses consisting
+  of positive and negative experiences regarding the product. Specifically, we generate
+  a single-sentence, comparative response from a given positive and a negative opinion.
+  We contribute the first dataset for this task of Comparative Snippet Generation
+  from contrasting opinions regarding a product, and an analysis of performance of
+  a pre-trained BERT model to generate such snippets.
+links:
+- name: URL
+  url: https://aclanthology.org/2022.ecnlp-1.7
+---
diff --git a/content/publication/jiang-etal-2018-identifying/cite.bib b/content/publication/jiang-etal-2018-identifying/cite.bib
new file mode 100644
index 0000000..9526671
--- /dev/null
+++ b/content/publication/jiang-etal-2018-identifying/cite.bib
@@ -0,0 +1,18 @@
+@inproceedings{jiang-etal-2018-identifying,
+ abstract = {Identifying emergent research trends is a key issue for both primary researchers as well as secondary research managers. Such processes can uncover the historical development of an area, and yield insight on developing topics. We propose an embedded trend detection framework for this task which incorporates our bijunctive hypothesis that important phrases are written by important authors within a field and vice versa. By ranking both author and phrase information in a multigraph, our method jointly determines key phrases and authoritative authors. We represent this intermediate output as phrasal embeddings, and feed this to a recurrent neural network (RNN) to compute trend scores that identify research trends. Over two large datasets of scientific articles, we demonstrate that our approach successfully detects past trends from the field, outperforming baselines based solely on text centrality or citation.},
+ address = {Santa Fe, New Mexico, USA},
+ author = {Jiang, Shenhao  and
+Prasad, Animesh  and
+Kan, Min-Yen  and
+Sugiyama, Kazunari},
+ booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
+ editor = {Bender, Emily M.  and
+Derczynski, Leon  and
+Isabelle, Pierre},
+ month = {August},
+ pages = {259--269},
+ publisher = {Association for Computational Linguistics},
+ title = {Identifying Emergent Research Trends by Key Authors and Phrases},
+ url = {https://aclanthology.org/C18-1022},
+ year = {2018}
+}
diff --git a/content/publication/jiang-etal-2018-identifying/index.md b/content/publication/jiang-etal-2018-identifying/index.md
new file mode 100644
index 0000000..e041afe
--- /dev/null
+++ b/content/publication/jiang-etal-2018-identifying/index.md
@@ -0,0 +1,28 @@
+---
+title: Identifying Emergent Research Trends by Key Authors and Phrases
+authors:
+- Shenhao Jiang
+- Animesh Prasad
+- min
+- Kazunari Sugiyama
+date: '2018-08-01'
+publishDate: '2024-07-11T07:40:56.368249Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 27th International Conference on Computational Linguistics*'
+abstract: Identifying emergent research trends is a key issue for both primary researchers
+  as well as secondary research managers. Such processes can uncover the historical
+  development of an area, and yield insight on developing topics. We propose an embedded
+  trend detection framework for this task which incorporates our bijunctive hypothesis
+  that important phrases are written by important authors within a field and vice
+  versa. By ranking both author and phrase information in a multigraph, our method
+  jointly determines key phrases and authoritative authors. We represent this intermediate
+  output as phrasal embeddings, and feed this to a recurrent neural network (RNN)
+  to compute trend scores that identify research trends. Over two large datasets of
+  scientific articles, we demonstrate that our approach successfully detects past
+  trends from the field, outperforming baselines based solely on text centrality or
+  citation.
+links:
+- name: URL
+  url: https://aclanthology.org/C18-1022
+---
diff --git a/content/publication/jin-etal-2013-mining/cite.bib b/content/publication/jin-etal-2013-mining/cite.bib
new file mode 100644
index 0000000..3817a6a
--- /dev/null
+++ b/content/publication/jin-etal-2013-mining/cite.bib
@@ -0,0 +1,19 @@
+@inproceedings{jin-etal-2013-mining,
+ address = {Seattle, Washington, USA},
+ author = {Jin, Yiping  and
+Kan, Min-Yen  and
+Ng, Jun-Ping  and
+He, Xiangnan},
+ booktitle = {Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing},
+ editor = {Yarowsky, David  and
+Baldwin, Timothy  and
+Korhonen, Anna  and
+Livescu, Karen  and
+Bethard, Steven},
+ month = {October},
+ pages = {780--790},
+ publisher = {Association for Computational Linguistics},
+ title = {Mining Scientific Terms and their Definitions: A Study of the ACL Anthology},
+ url = {https://aclanthology.org/D13-1073},
+ year = {2013}
+}
diff --git a/content/publication/jin-etal-2013-mining/index.md b/content/publication/jin-etal-2013-mining/index.md
new file mode 100644
index 0000000..189a05b
--- /dev/null
+++ b/content/publication/jin-etal-2013-mining/index.md
@@ -0,0 +1,17 @@
+---
+title: 'Mining Scientific Terms and their Definitions: A Study of the ACL Anthology'
+authors:
+- Yiping Jin
+- min
+- Jun-Ping Ng
+- Xiangnan He
+date: '2013-10-01'
+publishDate: '2024-07-11T07:40:56.438607Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 2013 Conference on Empirical Methods in Natural
+  Language Processing*'
+links:
+- name: URL
+  url: https://aclanthology.org/D13-1073
+---
diff --git a/content/publication/kan-2015-keywords/cite.bib b/content/publication/kan-2015-keywords/cite.bib
new file mode 100644
index 0000000..e4c010d
--- /dev/null
+++ b/content/publication/kan-2015-keywords/cite.bib
@@ -0,0 +1,16 @@
+@inproceedings{kan-2015-keywords,
+ address = {Beijing, China},
+ author = {Kan, Min-Yen},
+ booktitle = {Proceedings of the ACL 2015 Workshop on Novel Computational Approaches to Keyphrase Extraction},
+ doi = {10.18653/v1/W15-3601},
+ editor = {Gollapalli, Sujatha Das  and
+Caragea, Cornelia  and
+Li, Xiaoli  and
+Giles, C. Lee},
+ month = {July},
+ pages = {1},
+ publisher = {Association for Computational Linguistics},
+ title = {Keywords, phrases, clauses and sentences: topicality, indicativeness and informativeness at scales},
+ url = {https://aclanthology.org/W15-3601},
+ year = {2015}
+}
diff --git a/content/publication/kan-2015-keywords/index.md b/content/publication/kan-2015-keywords/index.md
new file mode 100644
index 0000000..bbe1599
--- /dev/null
+++ b/content/publication/kan-2015-keywords/index.md
@@ -0,0 +1,16 @@
+---
+title: 'Keywords, phrases, clauses and sentences: topicality, indicativeness and informativeness
+  at scales'
+authors:
+- min
+date: '2015-07-01'
+publishDate: '2024-07-11T07:40:56.407539Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the ACL 2015 Workshop on Novel Computational Approaches
+  to Keyphrase Extraction*'
+doi: 10.18653/v1/W15-3601
+links:
+- name: URL
+  url: https://aclanthology.org/W15-3601
+---
diff --git a/content/publication/kan-etal-1998-linear/cite.bib b/content/publication/kan-etal-1998-linear/cite.bib
new file mode 100644
index 0000000..f0d6790
--- /dev/null
+++ b/content/publication/kan-etal-1998-linear/cite.bib
@@ -0,0 +1,9 @@
+@inproceedings{kan-etal-1998-linear,
+ author = {Kan, Min-Yen  and
+Klavans, Judith L.  and
+McKeown, Kathleen R.},
+ booktitle = {Sixth Workshop on Very Large Corpora},
+ title = {Linear Segmentation and Segment Significance},
+ url = {https://aclanthology.org/W98-1123},
+ year = {1998}
+}
diff --git a/content/publication/kan-etal-1998-linear/index.md b/content/publication/kan-etal-1998-linear/index.md
new file mode 100644
index 0000000..f05ae17
--- /dev/null
+++ b/content/publication/kan-etal-1998-linear/index.md
@@ -0,0 +1,15 @@
+---
+title: Linear Segmentation and Segment Significance
+authors:
+- min
+- Judith L. Klavans
+- Kathleen R. McKeown
+date: '1998-01-01'
+publishDate: '2024-07-11T07:40:56.632433Z'
+publication_types:
+- paper-conference
+publication: '*Sixth Workshop on Very Large Corpora*'
+links:
+- name: URL
+  url: https://aclanthology.org/W98-1123
+---
diff --git a/content/publication/kan-etal-2001-applying/cite.bib b/content/publication/kan-etal-2001-applying/cite.bib
new file mode 100644
index 0000000..f5b920a
--- /dev/null
+++ b/content/publication/kan-etal-2001-applying/cite.bib
@@ -0,0 +1,14 @@
+@inproceedings{kan-etal-2001-applying,
+ address = {Toulouse, France},
+ author = {Kan, Min-Yen  and
+McKeown, Kathleen R.  and
+Klavans, Judith L.},
+ booktitle = {Proceedings of the ACL 2001 Eighth European Workshop on Natural Language Generation (EWNLG)},
+ editor = {Horacek, Helmut  and
+Nicolov, Nicolas  and
+Wanner, Leo},
+ publisher = {Association for Computational Linguistics},
+ title = {Applying Natural Language Generation to Indicative Summarization},
+ url = {https://aclanthology.org/W01-0813},
+ year = {2001}
+}
diff --git a/content/publication/kan-etal-2001-applying/index.md b/content/publication/kan-etal-2001-applying/index.md
new file mode 100644
index 0000000..4d4d4e9
--- /dev/null
+++ b/content/publication/kan-etal-2001-applying/index.md
@@ -0,0 +1,16 @@
+---
+title: Applying Natural Language Generation to Indicative Summarization
+authors:
+- min
+- Kathleen R. McKeown
+- Judith L. Klavans
+date: '2001-01-01'
+publishDate: '2024-07-11T07:40:56.622249Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the ACL 2001 Eighth European Workshop on Natural Language
+  Generation (EWNLG)*'
+links:
+- name: URL
+  url: https://aclanthology.org/W01-0813
+---
diff --git a/content/publication/kan-etal-2002-using/cite.bib b/content/publication/kan-etal-2002-using/cite.bib
new file mode 100644
index 0000000..0b972b7
--- /dev/null
+++ b/content/publication/kan-etal-2002-using/cite.bib
@@ -0,0 +1,14 @@
+@inproceedings{kan-etal-2002-using,
+ address = {Las Palmas, Canary Islands - Spain},
+ author = {Kan, Min-Yen  and
+Klavans, Judith L.  and
+McKeown, Kathleen R.},
+ booktitle = {Proceedings of the Third International Conference on Language Resources and Evaluation (LREC′02)},
+ editor = {González Rodrı́guez, Manuel  and
+Suarez Araujo, Carmen Paz},
+ month = {May},
+ publisher = {European Language Resources Association (ELRA)},
+ title = {Using the Annotated Bibliography as a Resource for Indicative Summarization},
+ url = {http://www.lrec-conf.org/proceedings/lrec2002/pdf/7.pdf},
+ year = {2002}
+}
diff --git a/content/publication/kan-etal-2002-using/index.md b/content/publication/kan-etal-2002-using/index.md
new file mode 100644
index 0000000..876b82d
--- /dev/null
+++ b/content/publication/kan-etal-2002-using/index.md
@@ -0,0 +1,14 @@
+---
+title: Using the Annotated Bibliography as a Resource for Indicative Summarization
+authors:
+- min
+- Judith L. Klavans
+- Kathleen R. McKeown
+date: '2002-05-01'
+publishDate: '2024-07-11T07:40:56.616248Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the Third International Conference on Language Resources
+  and Evaluation (LREC′02)*'
+url_pdf: http://www.lrec-conf.org/proceedings/lrec2002/pdf/7.pdf
+---
diff --git a/content/publication/kan-mckeown-2002-corpus/cite.bib b/content/publication/kan-mckeown-2002-corpus/cite.bib
new file mode 100644
index 0000000..7724024
--- /dev/null
+++ b/content/publication/kan-mckeown-2002-corpus/cite.bib
@@ -0,0 +1,13 @@
+@inproceedings{kan-mckeown-2002-corpus,
+ address = {Harriman, New York, USA},
+ author = {Kan, Min-Yen  and
+McKeown, Kathleen R.},
+ booktitle = {Proceedings of the International Natural Language Generation Conference},
+ editor = {McKeown, Kathleen},
+ month = {July},
+ pages = {1--8},
+ publisher = {Association for Computational Linguistics},
+ title = {Corpus-trained Text Generation for Summarization},
+ url = {https://aclanthology.org/W02-2101},
+ year = {2002}
+}
diff --git a/content/publication/kan-mckeown-2002-corpus/index.md b/content/publication/kan-mckeown-2002-corpus/index.md
new file mode 100644
index 0000000..ff4ebf0
--- /dev/null
+++ b/content/publication/kan-mckeown-2002-corpus/index.md
@@ -0,0 +1,14 @@
+---
+title: Corpus-trained Text Generation for Summarization
+authors:
+- min
+- Kathleen R. McKeown
+date: '2002-07-01'
+publishDate: '2024-07-11T07:40:56.610197Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the International Natural Language Generation Conference*'
+links:
+- name: URL
+  url: https://aclanthology.org/W02-2101
+---
diff --git a/content/publication/kim-etal-2009-extracting/cite.bib b/content/publication/kim-etal-2009-extracting/cite.bib
new file mode 100644
index 0000000..eddf76c
--- /dev/null
+++ b/content/publication/kim-etal-2009-extracting/cite.bib
@@ -0,0 +1,14 @@
+@inproceedings{kim-etal-2009-extracting,
+ address = {Sydney, Australia},
+ author = {Kim, Su Nam  and
+Baldwin, Timothy  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the Australasian Language Technology Association Workshop 2009},
+ editor = {Pizzato, Luiz Augusto  and
+Schwitter, Rolf},
+ month = {December},
+ pages = {94--98},
+ title = {Extracting Domain-Specific Words - A Statistical Approach},
+ url = {https://aclanthology.org/U09-1013},
+ year = {2009}
+}
diff --git a/content/publication/kim-etal-2009-extracting/index.md b/content/publication/kim-etal-2009-extracting/index.md
new file mode 100644
index 0000000..f4117be
--- /dev/null
+++ b/content/publication/kim-etal-2009-extracting/index.md
@@ -0,0 +1,16 @@
+---
+title: Extracting Domain-Specific Words - A Statistical Approach
+authors:
+- Su Nam Kim
+- Timothy Baldwin
+- min
+date: '2009-12-01'
+publishDate: '2024-07-11T07:40:56.536274Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the Australasian Language Technology Association Workshop
+  2009*'
+links:
+- name: URL
+  url: https://aclanthology.org/U09-1013
+---
diff --git a/content/publication/kim-etal-2010-evaluating/cite.bib b/content/publication/kim-etal-2010-evaluating/cite.bib
new file mode 100644
index 0000000..e93eb27
--- /dev/null
+++ b/content/publication/kim-etal-2010-evaluating/cite.bib
@@ -0,0 +1,15 @@
+@inproceedings{kim-etal-2010-evaluating,
+ address = {Beijing, China},
+ author = {Kim, Su Nam  and
+Baldwin, Timothy  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the 23rd International Conference on Computational Linguistics (Coling 2010)},
+ editor = {Huang, Chu-Ren  and
+Jurafsky, Dan},
+ month = {August},
+ pages = {572--580},
+ publisher = {Coling 2010 Organizing Committee},
+ title = {Evaluating N-gram based Evaluation Metrics for Automatic Keyphrase Extraction},
+ url = {https://aclanthology.org/C10-1065},
+ year = {2010}
+}
diff --git a/content/publication/kim-etal-2010-evaluating/index.md b/content/publication/kim-etal-2010-evaluating/index.md
new file mode 100644
index 0000000..c2b5a5e
--- /dev/null
+++ b/content/publication/kim-etal-2010-evaluating/index.md
@@ -0,0 +1,16 @@
+---
+title: Evaluating N-gram based Evaluation Metrics for Automatic Keyphrase Extraction
+authors:
+- Su Nam Kim
+- Timothy Baldwin
+- min
+date: '2010-08-01'
+publishDate: '2024-07-11T07:40:56.505844Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 23rd International Conference on Computational Linguistics
+  (Coling 2010)*'
+links:
+- name: URL
+  url: https://aclanthology.org/C10-1065
+---
diff --git a/content/publication/kim-etal-2010-semeval/cite.bib b/content/publication/kim-etal-2010-semeval/cite.bib
new file mode 100644
index 0000000..1c33576
--- /dev/null
+++ b/content/publication/kim-etal-2010-semeval/cite.bib
@@ -0,0 +1,16 @@
+@inproceedings{kim-etal-2010-semeval,
+ address = {Uppsala, Sweden},
+ author = {Kim, Su Nam  and
+Medelyan, Olena  and
+Kan, Min-Yen  and
+Baldwin, Timothy},
+ booktitle = {Proceedings of the 5th International Workshop on Semantic Evaluation},
+ editor = {Erk, Katrin  and
+Strapparava, Carlo},
+ month = {July},
+ pages = {21--26},
+ publisher = {Association for Computational Linguistics},
+ title = {SemEval-2010 Task 5 : Automatic Keyphrase Extraction from Scientific Articles},
+ url = {https://aclanthology.org/S10-1004},
+ year = {2010}
+}
diff --git a/content/publication/kim-etal-2010-semeval/index.md b/content/publication/kim-etal-2010-semeval/index.md
new file mode 100644
index 0000000..844c860
--- /dev/null
+++ b/content/publication/kim-etal-2010-semeval/index.md
@@ -0,0 +1,16 @@
+---
+title: 'SemEval-2010 Task 5 : Automatic Keyphrase Extraction from Scientific Articles'
+authors:
+- Su Nam Kim
+- Olena Medelyan
+- min
+- Timothy Baldwin
+date: '2010-07-01'
+publishDate: '2024-07-11T07:40:56.487584Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 5th International Workshop on Semantic Evaluation*'
+links:
+- name: URL
+  url: https://aclanthology.org/S10-1004
+---
diff --git a/content/publication/kim-kan-2009-examining/cite.bib b/content/publication/kim-kan-2009-examining/cite.bib
new file mode 100644
index 0000000..359245c
--- /dev/null
+++ b/content/publication/kim-kan-2009-examining/cite.bib
@@ -0,0 +1,16 @@
+@inproceedings{kim-kan-2009-examining,
+ address = {Singapore},
+ author = {Kim, Su Nam  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the Workshop on Multiword Expressions: Identification, Interpretation, Disambiguation and Applications (MWE 2009)},
+ editor = {Anastasiou, Dimitra  and
+Hashimoto, Chikara  and
+Nakov, Preslav  and
+Kim, Su Nam},
+ month = {August},
+ pages = {9--16},
+ publisher = {Association for Computational Linguistics},
+ title = {Re-examining Automatic Keyphrase Extraction Approaches in Scientific Articles},
+ url = {https://aclanthology.org/W09-2902},
+ year = {2009}
+}
diff --git a/content/publication/kim-kan-2009-examining/index.md b/content/publication/kim-kan-2009-examining/index.md
new file mode 100644
index 0000000..2131025
--- /dev/null
+++ b/content/publication/kim-kan-2009-examining/index.md
@@ -0,0 +1,15 @@
+---
+title: Re-examining Automatic Keyphrase Extraction Approaches in Scientific Articles
+authors:
+- Su Nam Kim
+- min
+date: '2009-08-01'
+publishDate: '2024-07-11T07:40:56.524050Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the Workshop on Multiword Expressions: Identification,
+  Interpretation, Disambiguation and Applications (MWE 2009)*'
+links:
+- name: URL
+  url: https://aclanthology.org/W09-2902
+---
diff --git a/content/publication/klavans-kan-1998-role-verbs/cite.bib b/content/publication/klavans-kan-1998-role-verbs/cite.bib
new file mode 100644
index 0000000..dcde309
--- /dev/null
+++ b/content/publication/klavans-kan-1998-role-verbs/cite.bib
@@ -0,0 +1,13 @@
+@inproceedings{klavans-kan-1998-role-verbs,
+ address = {Montreal, Quebec, Canada},
+ author = {Klavans, Judith L.  and
+Kan, Min-Yen},
+ booktitle = {36th Annual Meeting of the Association for Computational Linguistics and 17th International Conference on Computational Linguistics, Volume 1},
+ doi = {10.3115/980845.980959},
+ month = {August},
+ pages = {680--686},
+ publisher = {Association for Computational Linguistics},
+ title = {Role of Verbs in Document Analysis},
+ url = {https://aclanthology.org/P98-1112},
+ year = {1998}
+}
diff --git a/content/publication/klavans-kan-1998-role-verbs/index.md b/content/publication/klavans-kan-1998-role-verbs/index.md
new file mode 100644
index 0000000..9d9ad46
--- /dev/null
+++ b/content/publication/klavans-kan-1998-role-verbs/index.md
@@ -0,0 +1,16 @@
+---
+title: Role of Verbs in Document Analysis
+authors:
+- Judith L. Klavans
+- min
+date: '1998-08-01'
+publishDate: '2024-07-11T07:40:56.643091Z'
+publication_types:
+- paper-conference
+publication: '*36th Annual Meeting of the Association for Computational Linguistics
+  and 17th International Conference on Computational Linguistics, Volume 1*'
+doi: 10.3115/980845.980959
+links:
+- name: URL
+  url: https://aclanthology.org/P98-1112
+---
diff --git a/content/publication/lei-etal-2018-sequicity/cite.bib b/content/publication/lei-etal-2018-sequicity/cite.bib
new file mode 100644
index 0000000..54d0742
--- /dev/null
+++ b/content/publication/lei-etal-2018-sequicity/cite.bib
@@ -0,0 +1,20 @@
+@inproceedings{lei-etal-2018-sequicity,
+ abstract = {Existing solutions to task-oriented dialogue systems follow pipeline designs which introduces architectural complexity and fragility. We propose a novel, holistic, extendable framework based on a single sequence-to-sequence (seq2seq) model which can be optimized with supervised or reinforcement learning. A key contribution is that we design text spans named belief spans to track dialogue believes, allowing task-oriented dialogue systems to be modeled in a seq2seq way. Based on this, we propose a simplistic Two Stage CopyNet instantiation which emonstrates good scalability: significantly reducing model complexity in terms of number of parameters and training time by a magnitude. It significantly outperforms state-of-the-art pipeline-based methods on large datasets and retains a satisfactory entity match rate on out-of-vocabulary (OOV) cases where pipeline-designed competitors totally fail.},
+ address = {Melbourne, Australia},
+ author = {Lei, Wenqiang  and
+Jin, Xisen  and
+Kan, Min-Yen  and
+Ren, Zhaochun  and
+He, Xiangnan  and
+Yin, Dawei},
+ booktitle = {Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
+ doi = {10.18653/v1/P18-1133},
+ editor = {Gurevych, Iryna  and
+Miyao, Yusuke},
+ month = {July},
+ pages = {1437--1447},
+ publisher = {Association for Computational Linguistics},
+ title = {Sequicity: Simplifying Task-oriented Dialogue Systems with Single Sequence-to-Sequence Architectures},
+ url = {https://aclanthology.org/P18-1133},
+ year = {2018}
+}
diff --git a/content/publication/lei-etal-2018-sequicity/index.md b/content/publication/lei-etal-2018-sequicity/index.md
new file mode 100644
index 0000000..faf0c88
--- /dev/null
+++ b/content/publication/lei-etal-2018-sequicity/index.md
@@ -0,0 +1,32 @@
+---
+title: 'Sequicity: Simplifying Task-oriented Dialogue Systems with Single Sequence-to-Sequence
+  Architectures'
+authors:
+- Wenqiang Lei
+- Xisen Jin
+- min
+- Zhaochun Ren
+- Xiangnan He
+- Dawei Yin
+date: '2018-07-01'
+publishDate: '2024-07-11T07:40:56.361300Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 56th Annual Meeting of the Association for Computational
+  Linguistics (Volume 1: Long Papers)*'
+doi: 10.18653/v1/P18-1133
+abstract: 'Existing solutions to task-oriented dialogue systems follow pipeline designs
+  which introduces architectural complexity and fragility. We propose a novel, holistic,
+  extendable framework based on a single sequence-to-sequence (seq2seq) model which
+  can be optimized with supervised or reinforcement learning. A key contribution is
+  that we design text spans named belief spans to track dialogue believes, allowing
+  task-oriented dialogue systems to be modeled in a seq2seq way. Based on this, we
+  propose a simplistic Two Stage CopyNet instantiation which emonstrates good scalability:
+  significantly reducing model complexity in terms of number of parameters and training
+  time by a magnitude. It significantly outperforms state-of-the-art pipeline-based
+  methods on large datasets and retains a satisfactory entity match rate on out-of-vocabulary
+  (OOV) cases where pipeline-designed competitors totally fail.'
+links:
+- name: URL
+  url: https://aclanthology.org/P18-1133
+---
diff --git a/content/publication/lei-etal-2020-examining/cite.bib b/content/publication/lei-etal-2020-examining/cite.bib
new file mode 100644
index 0000000..7777c7e
--- /dev/null
+++ b/content/publication/lei-etal-2020-examining/cite.bib
@@ -0,0 +1,23 @@
+@inproceedings{lei-etal-2020-examining,
+ abstract = {In existing sophisticated text-to-SQL models, schema linking is often considered as a simple, minor component, belying its importance. By providing a schema linking corpus based on the Spider text-to-SQL dataset, we systematically study the role of schema linking. We also build a simple BERT-based baseline, called Schema-Linking SQL (SLSQL) to perform a data-driven study. We find when schema linking is done well, SLSQL demonstrates good performance on Spider despite its structural simplicity. Many remaining errors are attributable to corpus noise. This suggests schema linking is the crux for the current text-to-SQL task. Our analytic studies provide insights on the characteristics of schema linking for future developments of text-to-SQL tasks.},
+ address = {Online},
+ author = {Lei, Wenqiang  and
+Wang, Weixin  and
+Ma, Zhixin  and
+Gan, Tian  and
+Lu, Wei  and
+Kan, Min-Yen  and
+Chua, Tat-Seng},
+ booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)},
+ doi = {10.18653/v1/2020.emnlp-main.564},
+ editor = {Webber, Bonnie  and
+Cohn, Trevor  and
+He, Yulan  and
+Liu, Yang},
+ month = {November},
+ pages = {6943--6954},
+ publisher = {Association for Computational Linguistics},
+ title = {Re-examining the Role of Schema Linking in Text-to-SQL},
+ url = {https://aclanthology.org/2020.emnlp-main.564},
+ year = {2020}
+}
diff --git a/content/publication/lei-etal-2020-examining/index.md b/content/publication/lei-etal-2020-examining/index.md
new file mode 100644
index 0000000..bce6aee
--- /dev/null
+++ b/content/publication/lei-etal-2020-examining/index.md
@@ -0,0 +1,31 @@
+---
+title: Re-examining the Role of Schema Linking in Text-to-SQL
+authors:
+- Wenqiang Lei
+- Weixin Wang
+- Zhixin Ma
+- Tian Gan
+- Wei Lu
+- min
+- Tat-Seng Chua
+date: '2020-11-01'
+publishDate: '2024-07-11T07:40:56.269384Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 2020 Conference on Empirical Methods in Natural
+  Language Processing (EMNLP)*'
+doi: 10.18653/v1/2020.emnlp-main.564
+abstract: In existing sophisticated text-to-SQL models, schema linking is often considered
+  as a simple, minor component, belying its importance. By providing a schema linking
+  corpus based on the Spider text-to-SQL dataset, we systematically study the role
+  of schema linking. We also build a simple BERT-based baseline, called Schema-Linking
+  SQL (SLSQL) to perform a data-driven study. We find when schema linking is done
+  well, SLSQL demonstrates good performance on Spider despite its structural simplicity.
+  Many remaining errors are attributable to corpus noise. This suggests schema linking
+  is the crux for the current text-to-SQL task. Our analytic studies provide insights
+  on the characteristics of schema linking for future developments of text-to-SQL
+  tasks.
+links:
+- name: URL
+  url: https://aclanthology.org/2020.emnlp-main.564
+---
diff --git a/content/publication/li-etal-2020-molweni/cite.bib b/content/publication/li-etal-2020-molweni/cite.bib
new file mode 100644
index 0000000..ff5f44e
--- /dev/null
+++ b/content/publication/li-etal-2020-molweni/cite.bib
@@ -0,0 +1,23 @@
+@inproceedings{li-etal-2020-molweni,
+ abstract = {Research into the area of multiparty dialog has grown considerably over recent years. We present the Molweni dataset, a machine reading comprehension (MRC) dataset with discourse structure built over multiparty dialog. Molweni′s source samples from the Ubuntu Chat Corpus, including 10,000 dialogs comprising 88,303 utterances. We annotate 30,066 questions on this corpus, including both answerable and unanswerable questions. Molweni also uniquely contributes discourse dependency annotations in a modified Segmented Discourse Representation Theory (SDRT; Asher et al., 2016) style for all of its multiparty dialogs, contributing large-scale (78,245 annotated discourse relations) data to bear on the task of multiparty dialog discourse parsing. Our experiments show that Molweni is a challenging dataset for current MRC models: BERT-wwm, a current, strong SQuAD 2.0 performer, achieves only 67.7% F1 on Molweni′s questions, a 20+% significant drop as compared against its SQuAD 2.0 performance.},
+ address = {Barcelona, Spain (Online)},
+ author = {Li, Jiaqi  and
+Liu, Ming  and
+Kan, Min-Yen  and
+Zheng, Zihao  and
+Wang, Zekun  and
+Lei, Wenqiang  and
+Liu, Ting  and
+Qin, Bing},
+ booktitle = {Proceedings of the 28th International Conference on Computational Linguistics},
+ doi = {10.18653/v1/2020.coling-main.238},
+ editor = {Scott, Donia  and
+Bel, Nuria  and
+Zong, Chengqing},
+ month = {December},
+ pages = {2642--2652},
+ publisher = {International Committee on Computational Linguistics},
+ title = {Molweni: A Challenge Multiparty Dialogues-based Machine Reading Comprehension Dataset with Discourse Structure},
+ url = {https://aclanthology.org/2020.coling-main.238},
+ year = {2020}
+}
diff --git a/content/publication/li-etal-2020-molweni/index.md b/content/publication/li-etal-2020-molweni/index.md
new file mode 100644
index 0000000..b1c57ae
--- /dev/null
+++ b/content/publication/li-etal-2020-molweni/index.md
@@ -0,0 +1,35 @@
+---
+title: 'Molweni: A Challenge Multiparty Dialogues-based Machine Reading Comprehension
+  Dataset with Discourse Structure'
+authors:
+- Jiaqi Li
+- Ming Liu
+- min
+- Zihao Zheng
+- Zekun Wang
+- Wenqiang Lei
+- Ting Liu
+- Bing Qin
+date: '2020-12-01'
+publishDate: '2024-07-11T07:40:56.284002Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 28th International Conference on Computational Linguistics*'
+doi: 10.18653/v1/2020.coling-main.238
+abstract: 'Research into the area of multiparty dialog has grown considerably over
+  recent years. We present the Molweni dataset, a machine reading comprehension (MRC)
+  dataset with discourse structure built over multiparty dialog. Molweni′s source
+  samples from the Ubuntu Chat Corpus, including 10,000 dialogs comprising 88,303
+  utterances. We annotate 30,066 questions on this corpus, including both answerable
+  and unanswerable questions. Molweni also uniquely contributes discourse dependency
+  annotations in a modified Segmented Discourse Representation Theory (SDRT; Asher
+  et al., 2016) style for all of its multiparty dialogs, contributing large-scale
+  (78,245 annotated discourse relations) data to bear on the task of multiparty dialog
+  discourse parsing. Our experiments show that Molweni is a challenging dataset for
+  current MRC models: BERT-wwm, a current, strong SQuAD 2.0 performer, achieves only
+  67.7% F1 on Molweni′s questions, a 20+% significant drop as compared against its
+  SQuAD 2.0 performance.'
+links:
+- name: URL
+  url: https://aclanthology.org/2020.coling-main.238
+---
diff --git a/content/publication/li-etal-2023-coannotating/cite.bib b/content/publication/li-etal-2023-coannotating/cite.bib
new file mode 100644
index 0000000..b6324da
--- /dev/null
+++ b/content/publication/li-etal-2023-coannotating/cite.bib
@@ -0,0 +1,22 @@
+@inproceedings{li-etal-2023-coannotating,
+ abstract = {Annotated data plays a critical role in Natural Language Processing (NLP) in training models and evaluating their performance. Given recent developments in Large Language Models (LLMs), models such as ChatGPT demonstrate zero-shot capability on many text-annotation tasks, comparable with or even exceeding human annotators. Such LLMs can serve as alternatives for manual annotation, due to lower costs and higher scalability. However, limited work has leveraged LLMs as complementary annotators, nor explored how annotation work is best allocated among humans and LLMs to achieve both quality and cost objectives. We propose CoAnnotating, a novel paradigm for Human-LLM co-annotation of unstructured texts at scale. Under this framework, we utilize uncertainty to estimate LLMs′ annotation capability. Our empirical study shows CoAnnotating to be an effective means to allocate work from results on different datasets, with up to 21% performance improvement over random baseline. For code implementation, see https://github.com/SALT-NLP/CoAnnotating.},
+ address = {Singapore},
+ author = {Li, Minzhi  and
+Shi, Taiwei  and
+Ziems, Caleb  and
+Kan, Min-Yen  and
+Chen, Nancy  and
+Liu, Zhengyuan  and
+Yang, Diyi},
+ booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing},
+ doi = {10.18653/v1/2023.emnlp-main.92},
+ editor = {Bouamor, Houda  and
+Pino, Juan  and
+Bali, Kalika},
+ month = {December},
+ pages = {1487--1505},
+ publisher = {Association for Computational Linguistics},
+ title = {CoAnnotating: Uncertainty-Guided Work Allocation between Human and Large Language Models for Data Annotation},
+ url = {https://aclanthology.org/2023.emnlp-main.92},
+ year = {2023}
+}
diff --git a/content/publication/li-etal-2023-coannotating/index.md b/content/publication/li-etal-2023-coannotating/index.md
new file mode 100644
index 0000000..b0244dc
--- /dev/null
+++ b/content/publication/li-etal-2023-coannotating/index.md
@@ -0,0 +1,35 @@
+---
+title: 'CoAnnotating: Uncertainty-Guided Work Allocation between Human and Large Language
+  Models for Data Annotation'
+authors:
+- Minzhi Li
+- Taiwei Shi
+- Caleb Ziems
+- min
+- Nancy Chen
+- Zhengyuan Liu
+- Diyi Yang
+date: '2023-12-01'
+publishDate: '2024-07-06T02:22:24.561215Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 2023 Conference on Empirical Methods in Natural
+  Language Processing*'
+doi: 10.18653/v1/2023.emnlp-main.92
+abstract: Annotated data plays a critical role in Natural Language Processing (NLP)
+  in training models and evaluating their performance. Given recent developments in
+  Large Language Models (LLMs), models such as ChatGPT demonstrate zero-shot capability
+  on many text-annotation tasks, comparable with or even exceeding human annotators.
+  Such LLMs can serve as alternatives for manual annotation, due to lower costs and
+  higher scalability. However, limited work has leveraged LLMs as complementary annotators,
+  nor explored how annotation work is best allocated among humans and LLMs to achieve
+  both quality and cost objectives. We propose CoAnnotating, a novel paradigm for
+  Human-LLM co-annotation of unstructured texts at scale. Under this framework, we
+  utilize uncertainty to estimate LLMs′ annotation capability. Our empirical study
+  shows CoAnnotating to be an effective means to allocate work from results on different
+  datasets, with up to 21% performance improvement over random baseline. For code
+  implementation, see https://github.com/SALT-NLP/CoAnnotating.
+links:
+- name: URL
+  url: https://aclanthology.org/2023.emnlp-main.92
+---
diff --git a/content/publication/li-etal-2024-uno/cite.bib b/content/publication/li-etal-2024-uno/cite.bib
new file mode 100644
index 0000000..c58e5fe
--- /dev/null
+++ b/content/publication/li-etal-2024-uno/cite.bib
@@ -0,0 +1,18 @@
+@inproceedings{li-etal-2024-uno,
+ abstract = {Previous zero-shot dialogue state tracking (DST) methods only apply transfer learning, but ignore unlabelled data in the target domain.We transform zero-shot DST into few-shot DST by utilising such unlabelled data via joint and self-training methods. Our method incorporates auxiliary tasks that generate slot types as inverse prompts for main tasks, creating slot values during joint training. Cycle consistency between these two tasks enables the generation and selection of quality samples in unknown target domains for subsequent fine-tuning. This approach also facilitates automatic label creation, thereby optimizing the training and fine-tuning of DST models. We demonstrate this method′s effectiveness on general language models in zero-shot scenarios, improving average joint goal accuracy by 8% across all domains in MultiWOZ.},
+ address = {Mexico City, Mexico},
+ author = {Li, Chuang  and
+Zhang, Yan  and
+Kan, Min-Yen  and
+Li, Haizhou},
+ booktitle = {Findings of the Association for Computational Linguistics: NAACL 2024},
+ editor = {Duh, Kevin  and
+Gomez, Helena  and
+Bethard, Steven},
+ month = {June},
+ pages = {2972--2983},
+ publisher = {Association for Computational Linguistics},
+ title = {UNO-DST: Leveraging Unlabelled Data in Zero-Shot Dialogue State Tracking},
+ url = {https://aclanthology.org/2024.findings-naacl.187},
+ year = {2024}
+}
diff --git a/content/publication/li-etal-2024-uno/index.md b/content/publication/li-etal-2024-uno/index.md
new file mode 100644
index 0000000..0f25488
--- /dev/null
+++ b/content/publication/li-etal-2024-uno/index.md
@@ -0,0 +1,27 @@
+---
+title: 'UNO-DST: Leveraging Unlabelled Data in Zero-Shot Dialogue State Tracking'
+authors:
+- Chuang Li
+- Yan Zhang
+- min
+- Haizhou Li
+date: '2024-06-01'
+publishDate: '2024-07-06T02:22:24.510368Z'
+publication_types:
+- paper-conference
+publication: '*Findings of the Association for Computational Linguistics: NAACL 2024*'
+abstract: Previous zero-shot dialogue state tracking (DST) methods only apply transfer
+  learning, but ignore unlabelled data in the target domain.We transform zero-shot
+  DST into few-shot DST by utilising such unlabelled data via joint and self-training
+  methods. Our method incorporates auxiliary tasks that generate slot types as inverse
+  prompts for main tasks, creating slot values during joint training. Cycle consistency
+  between these two tasks enables the generation and selection of quality samples
+  in unknown target domains for subsequent fine-tuning. This approach also facilitates
+  automatic label creation, thereby optimizing the training and fine-tuning of DST
+  models. We demonstrate this method′s effectiveness on general language models in
+  zero-shot scenarios, improving average joint goal accuracy by 8% across all domains
+  in MultiWOZ.
+links:
+- name: URL
+  url: https://aclanthology.org/2024.findings-naacl.187
+---
diff --git a/content/publication/lin-etal-2009-recognizing/cite.bib b/content/publication/lin-etal-2009-recognizing/cite.bib
new file mode 100644
index 0000000..5b42002
--- /dev/null
+++ b/content/publication/lin-etal-2009-recognizing/cite.bib
@@ -0,0 +1,15 @@
+@inproceedings{lin-etal-2009-recognizing,
+ address = {Singapore},
+ author = {Lin, Ziheng  and
+Kan, Min-Yen  and
+Ng, Hwee Tou},
+ booktitle = {Proceedings of the 2009 Conference on Empirical Methods in Natural Language Processing},
+ editor = {Koehn, Philipp  and
+Mihalcea, Rada},
+ month = {August},
+ pages = {343--351},
+ publisher = {Association for Computational Linguistics},
+ title = {Recognizing Implicit Discourse Relations in the Penn Discourse Treebank},
+ url = {https://aclanthology.org/D09-1036},
+ year = {2009}
+}
diff --git a/content/publication/lin-etal-2009-recognizing/index.md b/content/publication/lin-etal-2009-recognizing/index.md
new file mode 100644
index 0000000..9e7a1f2
--- /dev/null
+++ b/content/publication/lin-etal-2009-recognizing/index.md
@@ -0,0 +1,16 @@
+---
+title: Recognizing Implicit Discourse Relations in the Penn Discourse Treebank
+authors:
+- Ziheng Lin
+- min
+- Hwee Tou Ng
+date: '2009-08-01'
+publishDate: '2024-07-11T07:40:56.548673Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 2009 Conference on Empirical Methods in Natural
+  Language Processing*'
+links:
+- name: URL
+  url: https://aclanthology.org/D09-1036
+---
diff --git a/content/publication/lin-etal-2010-extracting/cite.bib b/content/publication/lin-etal-2010-extracting/cite.bib
new file mode 100644
index 0000000..bac74a4
--- /dev/null
+++ b/content/publication/lin-etal-2010-extracting/cite.bib
@@ -0,0 +1,19 @@
+@inproceedings{lin-etal-2010-extracting,
+ address = {Los Angeles, California, USA},
+ author = {Lin, Sein  and
+Ng, Jun-Ping  and
+Pradhan, Shreyasee  and
+Shah, Jatin  and
+Pietrobon, Ricardo  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the NAACL HLT 2010 Second Louhi Workshop on Text and Data Mining of Health Documents},
+ editor = {Dalianis, Hercules  and
+Hassel, Martin  and
+Nilsson, Gunnar},
+ month = {June},
+ pages = {90--95},
+ publisher = {Association for Computational Linguistics},
+ title = {Extracting Formulaic and Free Text Clinical Research Articles Metadata using Conditional Random Fields},
+ url = {https://aclanthology.org/W10-1114},
+ year = {2010}
+}
diff --git a/content/publication/lin-etal-2010-extracting/index.md b/content/publication/lin-etal-2010-extracting/index.md
new file mode 100644
index 0000000..d9135c2
--- /dev/null
+++ b/content/publication/lin-etal-2010-extracting/index.md
@@ -0,0 +1,20 @@
+---
+title: Extracting Formulaic and Free Text Clinical Research Articles Metadata using
+  Conditional Random Fields
+authors:
+- Sein Lin
+- Jun-Ping Ng
+- Shreyasee Pradhan
+- Jatin Shah
+- Ricardo Pietrobon
+- min
+date: '2010-06-01'
+publishDate: '2024-07-11T07:40:56.481260Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the NAACL HLT 2010 Second Louhi Workshop on Text and
+  Data Mining of Health Documents*'
+links:
+- name: URL
+  url: https://aclanthology.org/W10-1114
+---
diff --git a/content/publication/lin-etal-2011-automatically/cite.bib b/content/publication/lin-etal-2011-automatically/cite.bib
new file mode 100644
index 0000000..ea76fc9
--- /dev/null
+++ b/content/publication/lin-etal-2011-automatically/cite.bib
@@ -0,0 +1,16 @@
+@inproceedings{lin-etal-2011-automatically,
+ address = {Portland, Oregon, USA},
+ author = {Lin, Ziheng  and
+Ng, Hwee Tou  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies},
+ editor = {Lin, Dekang  and
+Matsumoto, Yuji  and
+Mihalcea, Rada},
+ month = {June},
+ pages = {997--1006},
+ publisher = {Association for Computational Linguistics},
+ title = {Automatically Evaluating Text Coherence Using Discourse Relations},
+ url = {https://aclanthology.org/P11-1100},
+ year = {2011}
+}
diff --git a/content/publication/lin-etal-2011-automatically/index.md b/content/publication/lin-etal-2011-automatically/index.md
new file mode 100644
index 0000000..7556325
--- /dev/null
+++ b/content/publication/lin-etal-2011-automatically/index.md
@@ -0,0 +1,16 @@
+---
+title: Automatically Evaluating Text Coherence Using Discourse Relations
+authors:
+- Ziheng Lin
+- Hwee Tou Ng
+- min
+date: '2011-06-01'
+publishDate: '2024-07-11T07:40:56.475209Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 49th Annual Meeting of the Association for Computational
+  Linguistics: Human Language Technologies*'
+links:
+- name: URL
+  url: https://aclanthology.org/P11-1100
+---
diff --git a/content/publication/lin-etal-2012-combining/cite.bib b/content/publication/lin-etal-2012-combining/cite.bib
new file mode 100644
index 0000000..b85149a
--- /dev/null
+++ b/content/publication/lin-etal-2012-combining/cite.bib
@@ -0,0 +1,19 @@
+@inproceedings{lin-etal-2012-combining,
+ address = {Jeju Island, Korea},
+ author = {Lin, Ziheng  and
+Liu, Chang  and
+Ng, Hwee Tou  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the 50th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
+ editor = {Li, Haizhou  and
+Lin, Chin-Yew  and
+Osborne, Miles  and
+Lee, Gary Geunbae  and
+Park, Jong C.},
+ month = {July},
+ pages = {1006--1014},
+ publisher = {Association for Computational Linguistics},
+ title = {Combining Coherence Models and Machine Translation Evaluation Metrics for Summarization Evaluation},
+ url = {https://aclanthology.org/P12-1106},
+ year = {2012}
+}
diff --git a/content/publication/lin-etal-2012-combining/index.md b/content/publication/lin-etal-2012-combining/index.md
new file mode 100644
index 0000000..52501ad
--- /dev/null
+++ b/content/publication/lin-etal-2012-combining/index.md
@@ -0,0 +1,18 @@
+---
+title: Combining Coherence Models and Machine Translation Evaluation Metrics for Summarization
+  Evaluation
+authors:
+- Ziheng Lin
+- Chang Liu
+- Hwee Tou Ng
+- min
+date: '2012-07-01'
+publishDate: '2024-07-11T07:40:56.456833Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 50th Annual Meeting of the Association for Computational
+  Linguistics (Volume 1: Long Papers)*'
+links:
+- name: URL
+  url: https://aclanthology.org/P12-1106
+---
diff --git a/content/publication/lin-kan-2007-timestamped/cite.bib b/content/publication/lin-kan-2007-timestamped/cite.bib
new file mode 100644
index 0000000..5cce4c0
--- /dev/null
+++ b/content/publication/lin-kan-2007-timestamped/cite.bib
@@ -0,0 +1,15 @@
+@inproceedings{lin-kan-2007-timestamped,
+ address = {Rochester, NY, USA},
+ author = {Lin, Ziheng  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the Second Workshop on TextGraphs: Graph-Based Algorithms for Natural Language Processing},
+ editor = {Biemann, Chris  and
+Matveeva, Irina  and
+Mihalcea, Rada  and
+Radev, Dragomir},
+ pages = {25--32},
+ publisher = {Association for Computational Linguistics},
+ title = {Timestamped Graphs: Evolutionary Models of Text for Multi-Document Summarization},
+ url = {https://aclanthology.org/W07-0204},
+ year = {2007}
+}
diff --git a/content/publication/lin-kan-2007-timestamped/index.md b/content/publication/lin-kan-2007-timestamped/index.md
new file mode 100644
index 0000000..1f18733
--- /dev/null
+++ b/content/publication/lin-kan-2007-timestamped/index.md
@@ -0,0 +1,15 @@
+---
+title: 'Timestamped Graphs: Evolutionary Models of Text for Multi-Document Summarization'
+authors:
+- Ziheng Lin
+- min
+date: '2007-01-01'
+publishDate: '2024-07-11T07:40:56.573882Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the Second Workshop on TextGraphs: Graph-Based Algorithms
+  for Natural Language Processing*'
+links:
+- name: URL
+  url: https://aclanthology.org/W07-0204
+---
diff --git a/content/publication/lu-etal-2023-scitab/cite.bib b/content/publication/lu-etal-2023-scitab/cite.bib
new file mode 100644
index 0000000..1811361
--- /dev/null
+++ b/content/publication/lu-etal-2023-scitab/cite.bib
@@ -0,0 +1,20 @@
+@inproceedings{lu-etal-2023-scitab,
+ abstract = {Current scientific fact-checking benchmarks exhibit several shortcomings, such as biases arising from crowd-sourced claims and an over-reliance on text-based evidence. We present SCITAB, a challenging evaluation dataset consisting of 1.2K expert-verified scientific claims that 1) originate from authentic scientific publications and 2) require compositional reasoning for verification. The claims are paired with evidence-containing scientific tables annotated with labels. Through extensive evaluations, we demonstrate that SCITAB poses a significant challenge to state-of-the-art models, including table-based pretraining models and large language models. All models except GPT-4 achieved performance barely above random guessing. Popular prompting techniques, such as Chain-of-Thought, do not achieve much performance gains on SCITAB. Our analysis uncovers several unique challenges posed by SCITAB, including table grounding, claim ambiguity, and compositional reasoning. Our codes and data are publicly available at https://github.com/XinyuanLu00/SciTab.},
+ address = {Singapore},
+ author = {Lu, Xinyuan  and
+Pan, Liangming  and
+Liu, Qian  and
+Nakov, Preslav  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing},
+ doi = {10.18653/v1/2023.emnlp-main.483},
+ editor = {Bouamor, Houda  and
+Pino, Juan  and
+Bali, Kalika},
+ month = {December},
+ pages = {7787--7813},
+ publisher = {Association for Computational Linguistics},
+ title = {SCITAB: A Challenging Benchmark for Compositional Reasoning and Claim Verification on Scientific Tables},
+ url = {https://aclanthology.org/2023.emnlp-main.483},
+ year = {2023}
+}
diff --git a/content/publication/lu-etal-2023-scitab/index.md b/content/publication/lu-etal-2023-scitab/index.md
new file mode 100644
index 0000000..b42e6c3
--- /dev/null
+++ b/content/publication/lu-etal-2023-scitab/index.md
@@ -0,0 +1,33 @@
+---
+title: 'SCITAB: A Challenging Benchmark for Compositional Reasoning and Claim Verification
+  on Scientific Tables'
+authors:
+- Xinyuan Lu
+- Liangming Pan
+- Qian Liu
+- Preslav Nakov
+- min
+date: '2023-12-01'
+publishDate: '2024-07-06T02:22:24.568376Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 2023 Conference on Empirical Methods in Natural
+  Language Processing*'
+doi: 10.18653/v1/2023.emnlp-main.483
+abstract: Current scientific fact-checking benchmarks exhibit several shortcomings,
+  such as biases arising from crowd-sourced claims and an over-reliance on text-based
+  evidence. We present SCITAB, a challenging evaluation dataset consisting of 1.2K
+  expert-verified scientific claims that 1) originate from authentic scientific publications
+  and 2) require compositional reasoning for verification. The claims are paired with
+  evidence-containing scientific tables annotated with labels. Through extensive evaluations,
+  we demonstrate that SCITAB poses a significant challenge to state-of-the-art models,
+  including table-based pretraining models and large language models. All models except
+  GPT-4 achieved performance barely above random guessing. Popular prompting techniques,
+  such as Chain-of-Thought, do not achieve much performance gains on SCITAB. Our analysis
+  uncovers several unique challenges posed by SCITAB, including table grounding, claim
+  ambiguity, and compositional reasoning. Our codes and data are publicly available
+  at https://github.com/XinyuanLu00/SciTab.
+links:
+- name: URL
+  url: https://aclanthology.org/2023.emnlp-main.483
+---
diff --git a/content/publication/luong-etal-2010-hybrid/cite.bib b/content/publication/luong-etal-2010-hybrid/cite.bib
new file mode 100644
index 0000000..b0f7251
--- /dev/null
+++ b/content/publication/luong-etal-2010-hybrid/cite.bib
@@ -0,0 +1,15 @@
+@inproceedings{luong-etal-2010-hybrid,
+ address = {Cambridge, MA},
+ author = {Luong, Minh-Thang  and
+Nakov, Preslav  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the 2010 Conference on Empirical Methods in Natural Language Processing},
+ editor = {Li, Hang  and
+Màrquez, Lluı́s},
+ month = {October},
+ pages = {148--157},
+ publisher = {Association for Computational Linguistics},
+ title = {A Hybrid Morpheme-Word Representation for Machine Translation of Morphologically Rich Languages},
+ url = {https://aclanthology.org/D10-1015},
+ year = {2010}
+}
diff --git a/content/publication/luong-etal-2010-hybrid/index.md b/content/publication/luong-etal-2010-hybrid/index.md
new file mode 100644
index 0000000..09449b7
--- /dev/null
+++ b/content/publication/luong-etal-2010-hybrid/index.md
@@ -0,0 +1,17 @@
+---
+title: A Hybrid Morpheme-Word Representation for Machine Translation of Morphologically
+  Rich Languages
+authors:
+- Minh-Thang Luong
+- Preslav Nakov
+- min
+date: '2010-10-01'
+publishDate: '2024-07-11T07:40:56.493708Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 2010 Conference on Empirical Methods in Natural
+  Language Processing*'
+links:
+- name: URL
+  url: https://aclanthology.org/D10-1015
+---
diff --git a/content/publication/luong-kan-2010-enhancing/cite.bib b/content/publication/luong-kan-2010-enhancing/cite.bib
new file mode 100644
index 0000000..88ad20c
--- /dev/null
+++ b/content/publication/luong-kan-2010-enhancing/cite.bib
@@ -0,0 +1,14 @@
+@inproceedings{luong-kan-2010-enhancing,
+ address = {Beijing, China},
+ author = {Luong, Minh-Thang  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the 23rd International Conference on Computational Linguistics (Coling 2010)},
+ editor = {Huang, Chu-Ren  and
+Jurafsky, Dan},
+ month = {August},
+ pages = {743--751},
+ publisher = {Coling 2010 Organizing Committee},
+ title = {Enhancing Morphological Alignment for Translating Highly Inflected Languages},
+ url = {https://aclanthology.org/C10-1084},
+ year = {2010}
+}
diff --git a/content/publication/luong-kan-2010-enhancing/index.md b/content/publication/luong-kan-2010-enhancing/index.md
new file mode 100644
index 0000000..853427b
--- /dev/null
+++ b/content/publication/luong-kan-2010-enhancing/index.md
@@ -0,0 +1,15 @@
+---
+title: Enhancing Morphological Alignment for Translating Highly Inflected Languages
+authors:
+- Minh-Thang Luong
+- min
+date: '2010-08-01'
+publishDate: '2024-07-11T07:40:56.511952Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 23rd International Conference on Computational Linguistics
+  (Coling 2010)*'
+links:
+- name: URL
+  url: https://aclanthology.org/C10-1084
+---
diff --git a/content/publication/malik-etal-2023-udapter/cite.bib b/content/publication/malik-etal-2023-udapter/cite.bib
new file mode 100644
index 0000000..b83e52f
--- /dev/null
+++ b/content/publication/malik-etal-2023-udapter/cite.bib
@@ -0,0 +1,18 @@
+@inproceedings{malik-etal-2023-udapter,
+ abstract = {We propose two methods to make unsupervised domain adaptation (UDA) more parameter efficient using adapters -- small bottleneck layers interspersed with every layer of the large-scale pre-trained language model (PLM). The first method deconstructs UDA into a two-step process: first by adding a domain adapter to learn domain-invariant information and then by adding a task adapter that uses domain-invariant information to learn task representations in the source domain. The second method jointly learns a supervised classifier while reducing the divergence measure. Compared to strong baselines, our simple methods perform well in natural language inference (MNLI) and the cross-domain sentiment classification task. We even outperform unsupervised domain adaptation methods such as DANN and DSN in sentiment classification, and we are within 0.85% F1 for natural language inference task, by fine-tuning only a fraction of the full model parameters. We release our code at this URL.},
+ address = {Dubrovnik, Croatia},
+ author = {Malik, Bhavitvya  and
+Ramesh Kashyap, Abhinav  and
+Kan, Min-Yen  and
+Poria, Soujanya},
+ booktitle = {Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics},
+ doi = {10.18653/v1/2023.eacl-main.165},
+ editor = {Vlachos, Andreas  and
+Augenstein, Isabelle},
+ month = {May},
+ pages = {2249--2263},
+ publisher = {Association for Computational Linguistics},
+ title = {UDAPTER - Efficient Domain Adaptation Using Adapters},
+ url = {https://aclanthology.org/2023.eacl-main.165},
+ year = {2023}
+}
diff --git a/content/publication/malik-etal-2023-udapter/index.md b/content/publication/malik-etal-2023-udapter/index.md
new file mode 100644
index 0000000..2d5646a
--- /dev/null
+++ b/content/publication/malik-etal-2023-udapter/index.md
@@ -0,0 +1,30 @@
+---
+title: UDAPTER - Efficient Domain Adaptation Using Adapters
+authors:
+- Bhavitvya Malik
+- Abhinav Ramesh Kashyap
+- min
+- Soujanya Poria
+date: '2023-05-01'
+publishDate: '2024-07-06T02:22:24.610292Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 17th Conference of the European Chapter of the Association
+  for Computational Linguistics*'
+doi: 10.18653/v1/2023.eacl-main.165
+abstract: 'We propose two methods to make unsupervised domain adaptation (UDA) more
+  parameter efficient using adapters -- small bottleneck layers interspersed with
+  every layer of the large-scale pre-trained language model (PLM). The first method
+  deconstructs UDA into a two-step process: first by adding a domain adapter to learn
+  domain-invariant information and then by adding a task adapter that uses domain-invariant
+  information to learn task representations in the source domain. The second method
+  jointly learns a supervised classifier while reducing the divergence measure. Compared
+  to strong baselines, our simple methods perform well in natural language inference
+  (MNLI) and the cross-domain sentiment classification task. We even outperform unsupervised
+  domain adaptation methods such as DANN and DSN in sentiment classification, and
+  we are within 0.85% F1 for natural language inference task, by fine-tuning only
+  a fraction of the full model parameters. We release our code at this URL.'
+links:
+- name: URL
+  url: https://aclanthology.org/2023.eacl-main.165
+---
diff --git a/content/publication/meng-etal-2023-followupqg/cite.bib b/content/publication/meng-etal-2023-followupqg/cite.bib
new file mode 100644
index 0000000..fbd1845
--- /dev/null
+++ b/content/publication/meng-etal-2023-followupqg/cite.bib
@@ -0,0 +1,22 @@
+@inproceedings{meng-etal-2023-followupqg,
+ address = {Nusa Dua, Bali},
+ author = {Meng, Yan  and
+Pan, Liangming  and
+Cao, Yixin  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the 13th International Joint Conference on Natural Language Processing and the 3rd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)},
+ doi = {10.18653/v1/2023.ijcnlp-main.17},
+ editor = {Park, Jong C.  and
+Arase, Yuki  and
+Hu, Baotian  and
+Lu, Wei  and
+Wijaya, Derry  and
+Purwarianti, Ayu  and
+Krisnadhi, Adila Alfa},
+ month = {November},
+ pages = {252--271},
+ publisher = {Association for Computational Linguistics},
+ title = {FollowupQG: Towards information-seeking follow-up question generation},
+ url = {https://aclanthology.org/2023.ijcnlp-main.17},
+ year = {2023}
+}
diff --git a/content/publication/meng-etal-2023-followupqg/index.md b/content/publication/meng-etal-2023-followupqg/index.md
new file mode 100644
index 0000000..a12f5ef
--- /dev/null
+++ b/content/publication/meng-etal-2023-followupqg/index.md
@@ -0,0 +1,19 @@
+---
+title: 'FollowupQG: Towards information-seeking follow-up question generation'
+authors:
+- Yan Meng
+- Liangming Pan
+- Yixin Cao
+- min
+date: '2023-11-01'
+publishDate: '2024-07-06T02:22:24.527498Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 13th International Joint Conference on Natural Language
+  Processing and the 3rd Conference of the Asia-Pacific Chapter of the Association
+  for Computational Linguistics (Volume 1: Long Papers)*'
+doi: 10.18653/v1/2023.ijcnlp-main.17
+links:
+- name: URL
+  url: https://aclanthology.org/2023.ijcnlp-main.17
+---
diff --git a/content/publication/ng-etal-2012-exploiting/cite.bib b/content/publication/ng-etal-2012-exploiting/cite.bib
new file mode 100644
index 0000000..3e9142c
--- /dev/null
+++ b/content/publication/ng-etal-2012-exploiting/cite.bib
@@ -0,0 +1,17 @@
+@inproceedings{ng-etal-2012-exploiting,
+ address = {Mumbai, India},
+ author = {Ng, Jun-Ping  and
+Bysani, Praveen  and
+Lin, Ziheng  and
+Kan, Min-Yen  and
+Tan, Chew-Lim},
+ booktitle = {Proceedings of COLING 2012},
+ editor = {Kay, Martin  and
+Boitet, Christian},
+ month = {December},
+ pages = {2093--2108},
+ publisher = {The COLING 2012 Organizing Committee},
+ title = {Exploiting Category-Specific Information for Multi-Document Summarization},
+ url = {https://aclanthology.org/C12-1128},
+ year = {2012}
+}
diff --git a/content/publication/ng-etal-2012-exploiting/index.md b/content/publication/ng-etal-2012-exploiting/index.md
new file mode 100644
index 0000000..0c73807
--- /dev/null
+++ b/content/publication/ng-etal-2012-exploiting/index.md
@@ -0,0 +1,17 @@
+---
+title: Exploiting Category-Specific Information for Multi-Document Summarization
+authors:
+- Jun-Ping Ng
+- Praveen Bysani
+- Ziheng Lin
+- min
+- Chew-Lim Tan
+date: '2012-12-01'
+publishDate: '2024-07-11T07:40:56.463025Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of COLING 2012*'
+links:
+- name: URL
+  url: https://aclanthology.org/C12-1128
+---
diff --git a/content/publication/ng-etal-2013-exploiting/cite.bib b/content/publication/ng-etal-2013-exploiting/cite.bib
new file mode 100644
index 0000000..e81b232
--- /dev/null
+++ b/content/publication/ng-etal-2013-exploiting/cite.bib
@@ -0,0 +1,22 @@
+@inproceedings{ng-etal-2013-exploiting,
+ address = {Seattle, Washington, USA},
+ author = {Ng, Jun-Ping  and
+Kan, Min-Yen  and
+Lin, Ziheng  and
+Feng, Wei  and
+Chen, Bin  and
+Su, Jian  and
+Tan, Chew-Lim},
+ booktitle = {Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing},
+ editor = {Yarowsky, David  and
+Baldwin, Timothy  and
+Korhonen, Anna  and
+Livescu, Karen  and
+Bethard, Steven},
+ month = {October},
+ pages = {12--23},
+ publisher = {Association for Computational Linguistics},
+ title = {Exploiting Discourse Analysis for Article-Wide Temporal Classification},
+ url = {https://aclanthology.org/D13-1002},
+ year = {2013}
+}
diff --git a/content/publication/ng-etal-2013-exploiting/index.md b/content/publication/ng-etal-2013-exploiting/index.md
new file mode 100644
index 0000000..0b852e8
--- /dev/null
+++ b/content/publication/ng-etal-2013-exploiting/index.md
@@ -0,0 +1,20 @@
+---
+title: Exploiting Discourse Analysis for Article-Wide Temporal Classification
+authors:
+- Jun-Ping Ng
+- min
+- Ziheng Lin
+- Wei Feng
+- Bin Chen
+- Jian Su
+- Chew-Lim Tan
+date: '2013-10-01'
+publishDate: '2024-07-11T07:40:56.432328Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 2013 Conference on Empirical Methods in Natural
+  Language Processing*'
+links:
+- name: URL
+  url: https://aclanthology.org/D13-1002
+---
diff --git a/content/publication/ng-etal-2014-exploiting/cite.bib b/content/publication/ng-etal-2014-exploiting/cite.bib
new file mode 100644
index 0000000..98d6472
--- /dev/null
+++ b/content/publication/ng-etal-2014-exploiting/cite.bib
@@ -0,0 +1,17 @@
+@inproceedings{ng-etal-2014-exploiting,
+ address = {Baltimore, Maryland},
+ author = {Ng, Jun-Ping  and
+Chen, Yan  and
+Kan, Min-Yen  and
+Li, Zhoujun},
+ booktitle = {Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
+ doi = {10.3115/v1/P14-1087},
+ editor = {Toutanova, Kristina  and
+Wu, Hua},
+ month = {June},
+ pages = {923--933},
+ publisher = {Association for Computational Linguistics},
+ title = {Exploiting Timelines to Enhance Multi-document Summarization},
+ url = {https://aclanthology.org/P14-1087},
+ year = {2014}
+}
diff --git a/content/publication/ng-etal-2014-exploiting/index.md b/content/publication/ng-etal-2014-exploiting/index.md
new file mode 100644
index 0000000..1bc7f8b
--- /dev/null
+++ b/content/publication/ng-etal-2014-exploiting/index.md
@@ -0,0 +1,18 @@
+---
+title: Exploiting Timelines to Enhance Multi-document Summarization
+authors:
+- Jun-Ping Ng
+- Yan Chen
+- min
+- Zhoujun Li
+date: '2014-06-01'
+publishDate: '2024-07-11T07:40:56.413715Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 52nd Annual Meeting of the Association for Computational
+  Linguistics (Volume 1: Long Papers)*'
+doi: 10.3115/v1/P14-1087
+links:
+- name: URL
+  url: https://aclanthology.org/P14-1087
+---
diff --git a/content/publication/ng-kan-2012-improved/cite.bib b/content/publication/ng-kan-2012-improved/cite.bib
new file mode 100644
index 0000000..f6c87c8
--- /dev/null
+++ b/content/publication/ng-kan-2012-improved/cite.bib
@@ -0,0 +1,14 @@
+@inproceedings{ng-kan-2012-improved,
+ address = {Mumbai, India},
+ author = {Ng, Jun-Ping  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of COLING 2012},
+ editor = {Kay, Martin  and
+Boitet, Christian},
+ month = {December},
+ pages = {2109--2124},
+ publisher = {The COLING 2012 Organizing Committee},
+ title = {Improved Temporal Relation Classification using Dependency Parses and Selective Crowdsourced Annotations},
+ url = {https://aclanthology.org/C12-1129},
+ year = {2012}
+}
diff --git a/content/publication/ng-kan-2012-improved/index.md b/content/publication/ng-kan-2012-improved/index.md
new file mode 100644
index 0000000..b2f1022
--- /dev/null
+++ b/content/publication/ng-kan-2012-improved/index.md
@@ -0,0 +1,15 @@
+---
+title: Improved Temporal Relation Classification using Dependency Parses and Selective
+  Crowdsourced Annotations
+authors:
+- Jun-Ping Ng
+- min
+date: '2012-12-01'
+publishDate: '2024-07-11T07:40:56.469115Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of COLING 2012*'
+links:
+- name: URL
+  url: https://aclanthology.org/C12-1129
+---
diff --git a/content/publication/nguyen-etal-2018-treatment/cite.bib b/content/publication/nguyen-etal-2018-treatment/cite.bib
new file mode 100644
index 0000000..6ea923b
--- /dev/null
+++ b/content/publication/nguyen-etal-2018-treatment/cite.bib
@@ -0,0 +1,19 @@
+@inproceedings{nguyen-etal-2018-treatment,
+ abstract = {With Health 2.0, patients and caregivers increasingly seek information regarding possible drug side effects during their medical treatments in online health communities. These are helpful platforms for non-professional medical opinions, yet pose risk of being unreliable in quality and insufficient in quantity to cover the wide range of potential drug reactions. Existing approaches which analyze such user-generated content in online forums heavily rely on feature engineering of both documents and users, and often overlook the relationships between posts within a common discussion thread. Inspired by recent advancements, we propose a neural architecture that models the textual content of user-generated documents and user experiences in online communities to predict side effects during treatment. Experimental results show that our proposed architecture outperforms baseline models.},
+ address = {Brussels, Belgium},
+ author = {Nguyen, Van Hoang  and
+Sugiyama, Kazunari  and
+Kan, Min-Yen  and
+Halder, Kishaloy},
+ booktitle = {Proceedings of the Ninth International Workshop on Health Text Mining and Information Analysis},
+ doi = {10.18653/v1/W18-5602},
+ editor = {Lavelli, Alberto  and
+Minard, Anne-Lyse  and
+Rinaldi, Fabio},
+ month = {October},
+ pages = {12--21},
+ publisher = {Association for Computational Linguistics},
+ title = {Treatment Side Effect Prediction from Online User-generated Content},
+ url = {https://aclanthology.org/W18-5602},
+ year = {2018}
+}
diff --git a/content/publication/nguyen-etal-2018-treatment/index.md b/content/publication/nguyen-etal-2018-treatment/index.md
new file mode 100644
index 0000000..3ae6381
--- /dev/null
+++ b/content/publication/nguyen-etal-2018-treatment/index.md
@@ -0,0 +1,29 @@
+---
+title: Treatment Side Effect Prediction from Online User-generated Content
+authors:
+- Van Hoang Nguyen
+- Kazunari Sugiyama
+- min
+- Kishaloy Halder
+date: '2018-10-01'
+publishDate: '2024-07-11T07:40:56.340987Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the Ninth International Workshop on Health Text Mining
+  and Information Analysis*'
+doi: 10.18653/v1/W18-5602
+abstract: With Health 2.0, patients and caregivers increasingly seek information regarding
+  possible drug side effects during their medical treatments in online health communities.
+  These are helpful platforms for non-professional medical opinions, yet pose risk
+  of being unreliable in quality and insufficient in quantity to cover the wide range
+  of potential drug reactions. Existing approaches which analyze such user-generated
+  content in online forums heavily rely on feature engineering of both documents and
+  users, and often overlook the relationships between posts within a common discussion
+  thread. Inspired by recent advancements, we propose a neural architecture that models
+  the textual content of user-generated documents and user experiences in online communities
+  to predict side effects during treatment. Experimental results show that our proposed
+  architecture outperforms baseline models.
+links:
+- name: URL
+  url: https://aclanthology.org/W18-5602
+---
diff --git a/content/publication/ou-etal-2023-songs/cite.bib b/content/publication/ou-etal-2023-songs/cite.bib
new file mode 100644
index 0000000..38e53e6
--- /dev/null
+++ b/content/publication/ou-etal-2023-songs/cite.bib
@@ -0,0 +1,19 @@
+@inproceedings{ou-etal-2023-songs,
+ abstract = {The development of general-domain neural machine translation (NMT) methods has advanced significantly in recent years, but the lack of naturalness and musical constraints in the outputs makes them unable to produce singable lyric translations. This paper bridges the singability quality gap by formalizing lyric translation into a constrained translation problem, converting theoretical guidance and practical techniques from translatology literature to prompt-driven NMT approaches, exploring better adaptation methods, and instantiating them to an English-Chinese lyric translation system. Our model achieves 99.85%, 99.00%, and 95.52% on length accuracy, rhyme accuracy, and word boundary recall. In our subjective evaluation, our model shows a 75% relative enhancement on overall quality, compared against naive fine-tuning (Code available at r̆lhttps://github.com/Sonata165/ControllableLyricTranslation).},
+ address = {Toronto, Canada},
+ author = {Ou, Longshen  and
+Ma, Xichu  and
+Kan, Min-Yen  and
+Wang, Ye},
+ booktitle = {Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
+ doi = {10.18653/v1/2023.acl-long.27},
+ editor = {Rogers, Anna  and
+Boyd-Graber, Jordan  and
+Okazaki, Naoaki},
+ month = {July},
+ pages = {447--467},
+ publisher = {Association for Computational Linguistics},
+ title = {Songs Across Borders: Singable and Controllable Neural Lyric Translation},
+ url = {https://aclanthology.org/2023.acl-long.27},
+ year = {2023}
+}
diff --git a/content/publication/ou-etal-2023-songs/index.md b/content/publication/ou-etal-2023-songs/index.md
new file mode 100644
index 0000000..0a0ffeb
--- /dev/null
+++ b/content/publication/ou-etal-2023-songs/index.md
@@ -0,0 +1,29 @@
+---
+title: 'Songs Across Borders: Singable and Controllable Neural Lyric Translation'
+authors:
+- Longshen Ou
+- Xichu Ma
+- min
+- Ye Wang
+date: '2023-07-01'
+publishDate: '2024-07-06T02:22:24.625493Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 61st Annual Meeting of the Association for Computational
+  Linguistics (Volume 1: Long Papers)*'
+doi: 10.18653/v1/2023.acl-long.27
+abstract: The development of general-domain neural machine translation (NMT) methods
+  has advanced significantly in recent years, but the lack of naturalness and musical
+  constraints in the outputs makes them unable to produce singable lyric translations.
+  This paper bridges the singability quality gap by formalizing lyric translation
+  into a constrained translation problem, converting theoretical guidance and practical
+  techniques from translatology literature to prompt-driven NMT approaches, exploring
+  better adaptation methods, and instantiating them to an English-Chinese lyric translation
+  system. Our model achieves 99.85%, 99.00%, and 95.52% on length accuracy, rhyme
+  accuracy, and word boundary recall. In our subjective evaluation, our model shows
+  a 75% relative enhancement on overall quality, compared against naive fine-tuning
+  (Code available at r̆lhttps://github.com/Sonata165/ControllableLyricTranslation).
+links:
+- name: URL
+  url: https://aclanthology.org/2023.acl-long.27
+---
diff --git a/content/publication/pan-etal-2020-semantic/cite.bib b/content/publication/pan-etal-2020-semantic/cite.bib
new file mode 100644
index 0000000..11f9426
--- /dev/null
+++ b/content/publication/pan-etal-2020-semantic/cite.bib
@@ -0,0 +1,21 @@
+@inproceedings{pan-etal-2020-semantic,
+ abstract = {This paper proposes the problem of Deep Question Generation (DQG), which aims to generate complex questions that require reasoning over multiple pieces of information about the input passage. In order to capture the global structure of the document and facilitate reasoning, we propose a novel framework that first constructs a semantic-level graph for the input document and then encodes the semantic graph by introducing an attention-based GGNN (Att-GGNN). Afterward, we fuse the document-level and graph-level representations to perform joint training of content selection and question decoding. On the HotpotQA deep-question centric dataset, our model greatly improves performance over questions requiring reasoning over multiple facts, leading to state-of-the-art performance. The code is publicly available at r̆lhttps://github.com/WING-NUS/SG-Deep-Question-Generation.},
+ address = {Online},
+ author = {Pan, Liangming  and
+Xie, Yuxi  and
+Feng, Yansong  and
+Chua, Tat-Seng  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
+ doi = {10.18653/v1/2020.acl-main.135},
+ editor = {Jurafsky, Dan  and
+Chai, Joyce  and
+Schluter, Natalie  and
+Tetreault, Joel},
+ month = {July},
+ pages = {1463--1475},
+ publisher = {Association for Computational Linguistics},
+ title = {Semantic Graphs for Generating Deep Questions},
+ url = {https://aclanthology.org/2020.acl-main.135},
+ year = {2020}
+}
diff --git a/content/publication/pan-etal-2020-semantic/index.md b/content/publication/pan-etal-2020-semantic/index.md
new file mode 100644
index 0000000..efb4442
--- /dev/null
+++ b/content/publication/pan-etal-2020-semantic/index.md
@@ -0,0 +1,29 @@
+---
+title: Semantic Graphs for Generating Deep Questions
+authors:
+- Liangming Pan
+- Yuxi Xie
+- Yansong Feng
+- Tat-Seng Chua
+- min
+date: '2020-07-01'
+publishDate: '2024-07-11T07:40:56.306034Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 58th Annual Meeting of the Association for Computational
+  Linguistics*'
+doi: 10.18653/v1/2020.acl-main.135
+abstract: This paper proposes the problem of Deep Question Generation (DQG), which
+  aims to generate complex questions that require reasoning over multiple pieces of
+  information about the input passage. In order to capture the global structure of
+  the document and facilitate reasoning, we propose a novel framework that first constructs
+  a semantic-level graph for the input document and then encodes the semantic graph
+  by introducing an attention-based GGNN (Att-GGNN). Afterward, we fuse the document-level
+  and graph-level representations to perform joint training of content selection and
+  question decoding. On the HotpotQA deep-question centric dataset, our model greatly
+  improves performance over questions requiring reasoning over multiple facts, leading
+  to state-of-the-art performance. The code is publicly available at r̆lhttps://github.com/WING-NUS/SG-Deep-Question-Generation.
+links:
+- name: URL
+  url: https://aclanthology.org/2020.acl-main.135
+---
diff --git a/content/publication/pan-etal-2021-unsupervised/cite.bib b/content/publication/pan-etal-2021-unsupervised/cite.bib
new file mode 100644
index 0000000..09b2f6d
--- /dev/null
+++ b/content/publication/pan-etal-2021-unsupervised/cite.bib
@@ -0,0 +1,26 @@
+@inproceedings{pan-etal-2021-unsupervised,
+ abstract = {Obtaining training data for multi-hop question answering (QA) is time-consuming and resource-intensive. We explore the possibility to train a well-performed multi-hop QA model without referencing any human-labeled multi-hop question-answer pairs, i.e., unsupervised multi-hop QA. We propose MQA-QG, an unsupervised framework that can generate human-like multi-hop training data from both homogeneous and heterogeneous data sources. MQA-QG generates questions by first selecting/generating relevant information from each data source and then integrating the multiple information to form a multi-hop question. Using only generated training data, we can train a competent multi-hop QA which achieves 61% and 83% of the supervised learning performance for the HybridQA and the HotpotQA dataset, respectively. We also show that pretraining the QA system with the generated data would greatly reduce the demand for human-annotated training data. Our codes are publicly available at r̆lhttps://github.com/teacherpeterpan/Unsupervised-Multi-hop-QA.},
+ address = {Online},
+ author = {Pan, Liangming  and
+Chen, Wenhu  and
+Xiong, Wenhan  and
+Kan, Min-Yen  and
+Wang, William Yang},
+ booktitle = {Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
+ doi = {10.18653/v1/2021.naacl-main.469},
+ editor = {Toutanova, Kristina  and
+Rumshisky, Anna  and
+Zettlemoyer, Luke  and
+Hakkani-Tur, Dilek  and
+Beltagy, Iz  and
+Bethard, Steven  and
+Cotterell, Ryan  and
+Chakraborty, Tanmoy  and
+Zhou, Yichao},
+ month = {June},
+ pages = {5866--5880},
+ publisher = {Association for Computational Linguistics},
+ title = {Unsupervised Multi-hop Question Answering by Question Generation},
+ url = {https://aclanthology.org/2021.naacl-main.469},
+ year = {2021}
+}
diff --git a/content/publication/pan-etal-2021-unsupervised/index.md b/content/publication/pan-etal-2021-unsupervised/index.md
new file mode 100644
index 0000000..fdf1da2
--- /dev/null
+++ b/content/publication/pan-etal-2021-unsupervised/index.md
@@ -0,0 +1,31 @@
+---
+title: Unsupervised Multi-hop Question Answering by Question Generation
+authors:
+- Liangming Pan
+- Wenhu Chen
+- Wenhan Xiong
+- min
+- William Yang Wang
+date: '2021-06-01'
+publishDate: '2024-07-11T07:40:56.233397Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 2021 Conference of the North American Chapter of
+  the Association for Computational Linguistics: Human Language Technologies*'
+doi: 10.18653/v1/2021.naacl-main.469
+abstract: Obtaining training data for multi-hop question answering (QA) is time-consuming
+  and resource-intensive. We explore the possibility to train a well-performed multi-hop
+  QA model without referencing any human-labeled multi-hop question-answer pairs,
+  i.e., unsupervised multi-hop QA. We propose MQA-QG, an unsupervised framework that
+  can generate human-like multi-hop training data from both homogeneous and heterogeneous
+  data sources. MQA-QG generates questions by first selecting/generating relevant
+  information from each data source and then integrating the multiple information
+  to form a multi-hop question. Using only generated training data, we can train a
+  competent multi-hop QA which achieves 61% and 83% of the supervised learning performance
+  for the HybridQA and the HotpotQA dataset, respectively. We also show that pretraining
+  the QA system with the generated data would greatly reduce the demand for human-annotated
+  training data. Our codes are publicly available at r̆lhttps://github.com/teacherpeterpan/Unsupervised-Multi-hop-QA.
+links:
+- name: URL
+  url: https://aclanthology.org/2021.naacl-main.469
+---
diff --git a/content/publication/pan-etal-2021-zero/cite.bib b/content/publication/pan-etal-2021-zero/cite.bib
new file mode 100644
index 0000000..e6eace6
--- /dev/null
+++ b/content/publication/pan-etal-2021-zero/cite.bib
@@ -0,0 +1,21 @@
+@inproceedings{pan-etal-2021-zero,
+ abstract = {Neural models for automated fact verification have achieved promising results thanks to the availability of large, human-annotated datasets. However, for each new domain that requires fact verification, creating a dataset by manually writing claims and linking them to their supporting evidence is expensive. We develop QACG, a framework for training a robust fact verification model by using automatically generated claims that can be supported, refuted, or unverifiable from evidence from Wikipedia. QACG generates question-answer pairs from the evidence and then converts them into different types of claims. Experiments on the FEVER dataset show that our QACG framework significantly reduces the demand for human-annotated training data. In a zero-shot scenario, QACG improves a RoBERTa model′s F1 from 50% to 77%, equivalent in performance to 2K+ manually-curated examples. Our QACG code is publicly available.},
+ address = {Online},
+ author = {Pan, Liangming  and
+Chen, Wenhu  and
+Xiong, Wenhan  and
+Kan, Min-Yen  and
+Wang, William Yang},
+ booktitle = {Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)},
+ doi = {10.18653/v1/2021.acl-short.61},
+ editor = {Zong, Chengqing  and
+Xia, Fei  and
+Li, Wenjie  and
+Navigli, Roberto},
+ month = {August},
+ pages = {476--483},
+ publisher = {Association for Computational Linguistics},
+ title = {Zero-shot Fact Verification by Claim Generation},
+ url = {https://aclanthology.org/2021.acl-short.61},
+ year = {2021}
+}
diff --git a/content/publication/pan-etal-2021-zero/index.md b/content/publication/pan-etal-2021-zero/index.md
new file mode 100644
index 0000000..d41b59d
--- /dev/null
+++ b/content/publication/pan-etal-2021-zero/index.md
@@ -0,0 +1,32 @@
+---
+title: Zero-shot Fact Verification by Claim Generation
+authors:
+- Liangming Pan
+- Wenhu Chen
+- Wenhan Xiong
+- min
+- William Yang Wang
+date: '2021-08-01'
+publishDate: '2024-07-11T07:40:56.240829Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 59th Annual Meeting of the Association for Computational
+  Linguistics and the 11th International Joint Conference on Natural Language Processing
+  (Volume 2: Short Papers)*'
+doi: 10.18653/v1/2021.acl-short.61
+abstract: Neural models for automated fact verification have achieved promising results
+  thanks to the availability of large, human-annotated datasets. However, for each
+  new domain that requires fact verification, creating a dataset by manually writing
+  claims and linking them to their supporting evidence is expensive. We develop QACG,
+  a framework for training a robust fact verification model by using automatically
+  generated claims that can be supported, refuted, or unverifiable from evidence from
+  Wikipedia. QACG generates question-answer pairs from the evidence and then converts
+  them into different types of claims. Experiments on the FEVER dataset show that
+  our QACG framework significantly reduces the demand for human-annotated training
+  data. In a zero-shot scenario, QACG improves a RoBERTa model′s F1 from 50% to 77%,
+  equivalent in performance to 2K+ manually-curated examples. Our QACG code is publicly
+  available.
+links:
+- name: URL
+  url: https://aclanthology.org/2021.acl-short.61
+---
diff --git a/content/publication/pan-etal-2023-attacking/cite.bib b/content/publication/pan-etal-2023-attacking/cite.bib
new file mode 100644
index 0000000..4f321e2
--- /dev/null
+++ b/content/publication/pan-etal-2023-attacking/cite.bib
@@ -0,0 +1,22 @@
+@inproceedings{pan-etal-2023-attacking,
+ address = {Nusa Dua, Bali},
+ author = {Pan, Liangming  and
+Chen, Wenhu  and
+Kan, Min-Yen  and
+Wang, William Yang},
+ booktitle = {Proceedings of the 13th International Joint Conference on Natural Language Processing and the 3rd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)},
+ doi = {10.18653/v1/2023.ijcnlp-main.35},
+ editor = {Park, Jong C.  and
+Arase, Yuki  and
+Hu, Baotian  and
+Lu, Wei  and
+Wijaya, Derry  and
+Purwarianti, Ayu  and
+Krisnadhi, Adila Alfa},
+ month = {November},
+ pages = {525--539},
+ publisher = {Association for Computational Linguistics},
+ title = {Attacking Open-domain Question Answering by Injecting Misinformation},
+ url = {https://aclanthology.org/2023.ijcnlp-main.35},
+ year = {2023}
+}
diff --git a/content/publication/pan-etal-2023-attacking/index.md b/content/publication/pan-etal-2023-attacking/index.md
new file mode 100644
index 0000000..32960c9
--- /dev/null
+++ b/content/publication/pan-etal-2023-attacking/index.md
@@ -0,0 +1,19 @@
+---
+title: Attacking Open-domain Question Answering by Injecting Misinformation
+authors:
+- Liangming Pan
+- Wenhu Chen
+- min
+- William Yang Wang
+date: '2023-11-01'
+publishDate: '2024-07-06T02:22:24.540512Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 13th International Joint Conference on Natural Language
+  Processing and the 3rd Conference of the Asia-Pacific Chapter of the Association
+  for Computational Linguistics (Volume 1: Long Papers)*'
+doi: 10.18653/v1/2023.ijcnlp-main.35
+links:
+- name: URL
+  url: https://aclanthology.org/2023.ijcnlp-main.35
+---
diff --git a/content/publication/pan-etal-2023-fact/cite.bib b/content/publication/pan-etal-2023-fact/cite.bib
new file mode 100644
index 0000000..1f0ab9a
--- /dev/null
+++ b/content/publication/pan-etal-2023-fact/cite.bib
@@ -0,0 +1,22 @@
+@inproceedings{pan-etal-2023-fact,
+ abstract = {Fact-checking real-world claims often requires collecting multiple pieces of evidence and applying complex multi-step reasoning. In this paper, we present Program-Guided Fact-Checking (ProgramFC), a novel fact-checking model that decomposes complex claims into simpler sub-tasks that can be solved using a shared library of specialized functions. We first leverage the in-context learning ability of large language models to generate reasoning programs to guide the verification process. Afterward, we execute the program by delegating each sub-task to the corresponding sub-task handler. This process makes our model both explanatory and data-efficient, providing clear explanations of its reasoning process and requiring minimal training data. We evaluate ProgramFC on two challenging fact-checking datasets and show that it outperforms seven fact-checking baselines across different settings of evidence availability, with explicit output programs that benefit human debugging. Our codes and data are publicly available at r̆lhttps://github.com/mbzuai-nlp/ProgramFC.},
+ address = {Toronto, Canada},
+ author = {Pan, Liangming  and
+Wu, Xiaobao  and
+Lu, Xinyuan  and
+Luu, Anh Tuan  and
+Wang, William Yang  and
+Kan, Min-Yen  and
+Nakov, Preslav},
+ booktitle = {Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
+ doi = {10.18653/v1/2023.acl-long.386},
+ editor = {Rogers, Anna  and
+Boyd-Graber, Jordan  and
+Okazaki, Naoaki},
+ month = {July},
+ pages = {6981--7004},
+ publisher = {Association for Computational Linguistics},
+ title = {Fact-Checking Complex Claims with Program-Guided Reasoning},
+ url = {https://aclanthology.org/2023.acl-long.386},
+ year = {2023}
+}
diff --git a/content/publication/pan-etal-2023-fact/index.md b/content/publication/pan-etal-2023-fact/index.md
new file mode 100644
index 0000000..c494c71
--- /dev/null
+++ b/content/publication/pan-etal-2023-fact/index.md
@@ -0,0 +1,34 @@
+---
+title: Fact-Checking Complex Claims with Program-Guided Reasoning
+authors:
+- Liangming Pan
+- Xiaobao Wu
+- Xinyuan Lu
+- Anh Tuan Luu
+- William Yang Wang
+- min
+- Preslav Nakov
+date: '2023-07-01'
+publishDate: '2024-07-06T02:22:24.639283Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 61st Annual Meeting of the Association for Computational
+  Linguistics (Volume 1: Long Papers)*'
+doi: 10.18653/v1/2023.acl-long.386
+abstract: Fact-checking real-world claims often requires collecting multiple pieces
+  of evidence and applying complex multi-step reasoning. In this paper, we present
+  Program-Guided Fact-Checking (ProgramFC), a novel fact-checking model that decomposes
+  complex claims into simpler sub-tasks that can be solved using a shared library
+  of specialized functions. We first leverage the in-context learning ability of large
+  language models to generate reasoning programs to guide the verification process.
+  Afterward, we execute the program by delegating each sub-task to the corresponding
+  sub-task handler. This process makes our model both explanatory and data-efficient,
+  providing clear explanations of its reasoning process and requiring minimal training
+  data. We evaluate ProgramFC on two challenging fact-checking datasets and show that
+  it outperforms seven fact-checking baselines across different settings of evidence
+  availability, with explicit output programs that benefit human debugging. Our codes
+  and data are publicly available at r̆lhttps://github.com/mbzuai-nlp/ProgramFC.
+links:
+- name: URL
+  url: https://aclanthology.org/2023.acl-long.386
+---
diff --git a/content/publication/pan-etal-2023-investigating/cite.bib b/content/publication/pan-etal-2023-investigating/cite.bib
new file mode 100644
index 0000000..210bea9
--- /dev/null
+++ b/content/publication/pan-etal-2023-investigating/cite.bib
@@ -0,0 +1,21 @@
+@inproceedings{pan-etal-2023-investigating,
+ address = {Nusa Dua, Bali},
+ author = {Pan, Liangming  and
+Zhang, Yunxiang  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the 13th International Joint Conference on Natural Language Processing and the 3rd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)},
+ doi = {10.18653/v1/2023.ijcnlp-main.34},
+ editor = {Park, Jong C.  and
+Arase, Yuki  and
+Hu, Baotian  and
+Lu, Wei  and
+Wijaya, Derry  and
+Purwarianti, Ayu  and
+Krisnadhi, Adila Alfa},
+ month = {November},
+ pages = {511--524},
+ publisher = {Association for Computational Linguistics},
+ title = {Investigating Zero- and Few-shot Generalization in Fact Verification},
+ url = {https://aclanthology.org/2023.ijcnlp-main.34},
+ year = {2023}
+}
diff --git a/content/publication/pan-etal-2023-investigating/index.md b/content/publication/pan-etal-2023-investigating/index.md
new file mode 100644
index 0000000..915a148
--- /dev/null
+++ b/content/publication/pan-etal-2023-investigating/index.md
@@ -0,0 +1,18 @@
+---
+title: Investigating Zero- and Few-shot Generalization in Fact Verification
+authors:
+- Liangming Pan
+- Yunxiang Zhang
+- min
+date: '2023-11-01'
+publishDate: '2024-07-06T02:22:24.534246Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 13th International Joint Conference on Natural Language
+  Processing and the 3rd Conference of the Asia-Pacific Chapter of the Association
+  for Computational Linguistics (Volume 1: Long Papers)*'
+doi: 10.18653/v1/2023.ijcnlp-main.34
+links:
+- name: URL
+  url: https://aclanthology.org/2023.ijcnlp-main.34
+---
diff --git a/content/publication/pan-etal-2023-qacheck/cite.bib b/content/publication/pan-etal-2023-qacheck/cite.bib
new file mode 100644
index 0000000..36cad00
--- /dev/null
+++ b/content/publication/pan-etal-2023-qacheck/cite.bib
@@ -0,0 +1,18 @@
+@inproceedings{pan-etal-2023-qacheck,
+ abstract = {Fact-checking real-world claims often requires intricate, multi-step reasoning due to the absence of direct evidence to support or refute them. However, existing fact-checking systems often lack transparency in their decision-making, making it challenging for users to comprehend their reasoning process. To address this, we propose the Question-guided Multi-hop Fact-Checking (QACheck) system, which guides the model′s reasoning process by asking a series of questions critical for verifying a claim. QACheck has five key modules: a claim verifier, a question generator, a question-answering module, a QA validator, and a reasoner. Users can input a claim into QACheck, which then predicts its veracity and provides a comprehensive report detailing its reasoning process, guided by a sequence of (question, answer) pairs. QACheck also provides the source of evidence supporting each question, fostering a transparent, explainable, and user-friendly fact-checking process.},
+ address = {Singapore},
+ author = {Pan, Liangming  and
+Lu, Xinyuan  and
+Kan, Min-Yen  and
+Nakov, Preslav},
+ booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: System Demonstrations},
+ doi = {10.18653/v1/2023.emnlp-demo.23},
+ editor = {Feng, Yansong  and
+Lefever, Els},
+ month = {December},
+ pages = {264--273},
+ publisher = {Association for Computational Linguistics},
+ title = {QACheck: A Demonstration System for Question-Guided Multi-Hop Fact-Checking},
+ url = {https://aclanthology.org/2023.emnlp-demo.23},
+ year = {2023}
+}
diff --git a/content/publication/pan-etal-2023-qacheck/index.md b/content/publication/pan-etal-2023-qacheck/index.md
new file mode 100644
index 0000000..4299039
--- /dev/null
+++ b/content/publication/pan-etal-2023-qacheck/index.md
@@ -0,0 +1,30 @@
+---
+title: 'QACheck: A Demonstration System for Question-Guided Multi-Hop Fact-Checking'
+authors:
+- Liangming Pan
+- Xinyuan Lu
+- min
+- Preslav Nakov
+date: '2023-12-01'
+publishDate: '2024-07-06T02:22:24.589500Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 2023 Conference on Empirical Methods in Natural
+  Language Processing: System Demonstrations*'
+doi: 10.18653/v1/2023.emnlp-demo.23
+abstract: 'Fact-checking real-world claims often requires intricate, multi-step reasoning
+  due to the absence of direct evidence to support or refute them. However, existing
+  fact-checking systems often lack transparency in their decision-making, making it
+  challenging for users to comprehend their reasoning process. To address this, we
+  propose the Question-guided Multi-hop Fact-Checking (QACheck) system, which guides
+  the model′s reasoning process by asking a series of questions critical for verifying
+  a claim. QACheck has five key modules: a claim verifier, a question generator, a
+  question-answering module, a QA validator, and a reasoner. Users can input a claim
+  into QACheck, which then predicts its veracity and provides a comprehensive report
+  detailing its reasoning process, guided by a sequence of (question, answer) pairs.
+  QACheck also provides the source of evidence supporting each question, fostering
+  a transparent, explainable, and user-friendly fact-checking process.'
+links:
+- name: URL
+  url: https://aclanthology.org/2023.emnlp-demo.23
+---
diff --git a/content/publication/pan-etal-2023-risk/cite.bib b/content/publication/pan-etal-2023-risk/cite.bib
new file mode 100644
index 0000000..8c21118
--- /dev/null
+++ b/content/publication/pan-etal-2023-risk/cite.bib
@@ -0,0 +1,21 @@
+@inproceedings{pan-etal-2023-risk,
+ abstract = {We investigate the potential misuse of modern Large Language Models (LLMs) for generating credible-sounding misinformation and its subsequent impact on information-intensive applications, particularly Open-Domain Question Answering (ODQA) systems. We establish a threat model and simulate potential misuse scenarios, both unintentional and intentional, to assess the extent to which LLMs can be utilized to produce misinformation. Our study reveals that LLMs can act as effective misinformation generators, leading to a significant degradation (up to 87%) in the performance of ODQA systems. Moreover, we uncover disparities in the attributes associated with persuading humans and machines, presenting an obstacle to current human-centric approaches to combat misinformation. To mitigate the harm caused by LLM-generated misinformation, we propose three defense strategies: misinformation detection, vigilant prompting, and reader ensemble. These approaches have demonstrated promising results, albeit with certain associated costs. Lastly, we discuss the practicality of utilizing LLMs as automatic misinformation generators and provide relevant resources and code to facilitate future research in this area.},
+ address = {Singapore},
+ author = {Pan, Yikang  and
+Pan, Liangming  and
+Chen, Wenhu  and
+Nakov, Preslav  and
+Kan, Min-Yen  and
+Wang, William},
+ booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2023},
+ doi = {10.18653/v1/2023.findings-emnlp.97},
+ editor = {Bouamor, Houda  and
+Pino, Juan  and
+Bali, Kalika},
+ month = {December},
+ pages = {1389--1403},
+ publisher = {Association for Computational Linguistics},
+ title = {On the Risk of Misinformation Pollution with Large Language Models},
+ url = {https://aclanthology.org/2023.findings-emnlp.97},
+ year = {2023}
+}
diff --git a/content/publication/pan-etal-2023-risk/index.md b/content/publication/pan-etal-2023-risk/index.md
new file mode 100644
index 0000000..1ec72d6
--- /dev/null
+++ b/content/publication/pan-etal-2023-risk/index.md
@@ -0,0 +1,34 @@
+---
+title: On the Risk of Misinformation Pollution with Large Language Models
+authors:
+- Yikang Pan
+- Liangming Pan
+- Wenhu Chen
+- Preslav Nakov
+- min
+- William Wang
+date: '2023-12-01'
+publishDate: '2024-07-06T02:22:24.546968Z'
+publication_types:
+- paper-conference
+publication: '*Findings of the Association for Computational Linguistics: EMNLP 2023*'
+doi: 10.18653/v1/2023.findings-emnlp.97
+abstract: 'We investigate the potential misuse of modern Large Language Models (LLMs)
+  for generating credible-sounding misinformation and its subsequent impact on information-intensive
+  applications, particularly Open-Domain Question Answering (ODQA) systems. We establish
+  a threat model and simulate potential misuse scenarios, both unintentional and intentional,
+  to assess the extent to which LLMs can be utilized to produce misinformation. Our
+  study reveals that LLMs can act as effective misinformation generators, leading
+  to a significant degradation (up to 87%) in the performance of ODQA systems. Moreover,
+  we uncover disparities in the attributes associated with persuading humans and machines,
+  presenting an obstacle to current human-centric approaches to combat misinformation.
+  To mitigate the harm caused by LLM-generated misinformation, we propose three defense
+  strategies: misinformation detection, vigilant prompting, and reader ensemble. These
+  approaches have demonstrated promising results, albeit with certain associated costs.
+  Lastly, we discuss the practicality of utilizing LLMs as automatic misinformation
+  generators and provide relevant resources and code to facilitate future research
+  in this area.'
+links:
+- name: URL
+  url: https://aclanthology.org/2023.findings-emnlp.97
+---
diff --git a/content/publication/prasad-kan-2017-wing/cite.bib b/content/publication/prasad-kan-2017-wing/cite.bib
new file mode 100644
index 0000000..0788a1d
--- /dev/null
+++ b/content/publication/prasad-kan-2017-wing/cite.bib
@@ -0,0 +1,20 @@
+@inproceedings{prasad-kan-2017-wing,
+ abstract = {We describe an end-to-end pipeline processing approach for SemEval 2017′s Task 10 to extract keyphrases and their relations from scientific publications. We jointly identify and classify keyphrases by modeling the subtasks as sequential labeling. Our system utilizes standard, surface-level features along with the adjacent word features, and performs conditional decoding on whole text to extract keyphrases. We focus only on the identification and typing of keyphrases (Subtasks A and B, together referred as extraction), but provide an end-to-end system inclusive of keyphrase relation identification (Subtask C) for completeness. Our top performing configuration achieves an $F_1$ of 0.27 for the end-to-end keyphrase extraction and relation identification scenario on the final test data, and compares on par to other top ranked systems for keyphrase extraction. Our system outperforms other techniques that do not employ global decoding and hence do not account for dependencies between keyphrases. We believe this is crucial for keyphrase classification in the given context of scientific document mining.},
+ address = {Vancouver, Canada},
+ author = {Prasad, Animesh  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the 11th International Workshop on Semantic Evaluation (SemEval-2017)},
+ doi = {10.18653/v1/S17-2170},
+ editor = {Bethard, Steven  and
+Carpuat, Marine  and
+Apidianaki, Marianna  and
+Mohammad, Saif M.  and
+Cer, Daniel  and
+Jurgens, David},
+ month = {August},
+ pages = {973--977},
+ publisher = {Association for Computational Linguistics},
+ title = {WING-NUS at SemEval-2017 Task 10: Keyphrase Extraction and Classification as Joint Sequence Labeling},
+ url = {https://aclanthology.org/S17-2170},
+ year = {2017}
+}
diff --git a/content/publication/prasad-kan-2017-wing/index.md b/content/publication/prasad-kan-2017-wing/index.md
new file mode 100644
index 0000000..d00130c
--- /dev/null
+++ b/content/publication/prasad-kan-2017-wing/index.md
@@ -0,0 +1,31 @@
+---
+title: 'WING-NUS at SemEval-2017 Task 10: Keyphrase Extraction and Classification
+  as Joint Sequence Labeling'
+authors:
+- Animesh Prasad
+- min
+date: '2017-08-01'
+publishDate: '2024-07-11T07:40:56.382115Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 11th International Workshop on Semantic Evaluation
+  (SemEval-2017)*'
+doi: 10.18653/v1/S17-2170
+abstract: We describe an end-to-end pipeline processing approach for SemEval 2017′s
+  Task 10 to extract keyphrases and their relations from scientific publications.
+  We jointly identify and classify keyphrases by modeling the subtasks as sequential
+  labeling. Our system utilizes standard, surface-level features along with the adjacent
+  word features, and performs conditional decoding on whole text to extract keyphrases.
+  We focus only on the identification and typing of keyphrases (Subtasks A and B,
+  together referred as extraction), but provide an end-to-end system inclusive of
+  keyphrase relation identification (Subtask C) for completeness. Our top performing
+  configuration achieves an $F_1$ of 0.27 for the end-to-end keyphrase extraction
+  and relation identification scenario on the final test data, and compares on par
+  to other top ranked systems for keyphrase extraction. Our system outperforms other
+  techniques that do not employ global decoding and hence do not account for dependencies
+  between keyphrases. We believe this is crucial for keyphrase classification in the
+  given context of scientific document mining.
+links:
+- name: URL
+  url: https://aclanthology.org/S17-2170
+---
diff --git a/content/publication/prasad-kan-2019-glocal/cite.bib b/content/publication/prasad-kan-2019-glocal/cite.bib
new file mode 100644
index 0000000..c25c82b
--- /dev/null
+++ b/content/publication/prasad-kan-2019-glocal/cite.bib
@@ -0,0 +1,17 @@
+@inproceedings{prasad-kan-2019-glocal,
+ abstract = {Graph Convolutional Networks (GCNs) are a class of spectral clustering techniques that leverage localized convolution filters to perform supervised classification directly on graphical structures. While such methods model nodes′ local pairwise importance, they lack the capability to model global importance relative to other nodes of the graph. This causes such models to miss critical information in tasks where global ranking is a key component for the task, such as in keyphrase extraction. We address this shortcoming by allowing the proper incorporation of global information into the GCN family of models through the use of scaled node weights. In the context of keyphrase extraction, incorporating global random walk scores obtained from TextRank boosts performance significantly. With our proposed method, we achieve state-of-the-art results, bettering a strong baseline by an absolute 2% increase in F1 score.},
+ address = {Minneapolis, Minnesota},
+ author = {Prasad, Animesh  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)},
+ doi = {10.18653/v1/N19-1182},
+ editor = {Burstein, Jill  and
+Doran, Christy  and
+Solorio, Thamar},
+ month = {June},
+ pages = {1837--1846},
+ publisher = {Association for Computational Linguistics},
+ title = {Glocal: Incorporating Global Information in Local Convolution for Keyphrase Extraction},
+ url = {https://aclanthology.org/N19-1182},
+ year = {2019}
+}
diff --git a/content/publication/prasad-kan-2019-glocal/index.md b/content/publication/prasad-kan-2019-glocal/index.md
new file mode 100644
index 0000000..8a7b98f
--- /dev/null
+++ b/content/publication/prasad-kan-2019-glocal/index.md
@@ -0,0 +1,29 @@
+---
+title: 'Glocal: Incorporating Global Information in Local Convolution for Keyphrase
+  Extraction'
+authors:
+- Animesh Prasad
+- min
+date: '2019-06-01'
+publishDate: '2024-07-11T07:40:56.320220Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 2019 Conference of the North American Chapter of
+  the Association for Computational Linguistics: Human Language Technologies, Volume
+  1 (Long and Short Papers)*'
+doi: 10.18653/v1/N19-1182
+abstract: Graph Convolutional Networks (GCNs) are a class of spectral clustering techniques
+  that leverage localized convolution filters to perform supervised classification
+  directly on graphical structures. While such methods model nodes′ local pairwise
+  importance, they lack the capability to model global importance relative to other
+  nodes of the graph. This causes such models to miss critical information in tasks
+  where global ranking is a key component for the task, such as in keyphrase extraction.
+  We address this shortcoming by allowing the proper incorporation of global information
+  into the GCN family of models through the use of scaled node weights. In the context
+  of keyphrase extraction, incorporating global random walk scores obtained from TextRank
+  boosts performance significantly. With our proposed method, we achieve state-of-the-art
+  results, bettering a strong baseline by an absolute 2% increase in F1 score.
+links:
+- name: URL
+  url: https://aclanthology.org/N19-1182
+---
diff --git a/content/publication/qin-etal-2022-gl/cite.bib b/content/publication/qin-etal-2022-gl/cite.bib
new file mode 100644
index 0000000..d53dffd
--- /dev/null
+++ b/content/publication/qin-etal-2022-gl/cite.bib
@@ -0,0 +1,22 @@
+@inproceedings{qin-etal-2022-gl,
+ abstract = {Due to high data demands of current methods, attention to zero-shot cross-lingual spoken language understanding (SLU) has grown, as such approaches greatly reduce human annotation effort. However, existing models solely rely on shared parameters, which can only perform implicit alignment across languages. We present Global-Local Contrastive Learning Framework (GL-CLeF) to address this shortcoming. Specifically, we employ contrastive learning, leveraging bilingual dictionaries to construct multilingual views of the same utterance, then encourage their representations to be more similar than negative example pairs, which achieves to explicitly align representations of similar sentences across languages. In addition, a key step in GL-CLeF is a proposed Local and Global component, which achieves a fine-grained cross-lingual transfer (i.e., sentence-level Local intent transfer, token-level Local slot transfer, and semantic-level Global transfer across intent and slot). Experiments on MultiATIS++ show that GL-CLeF achieves the best performance and successfully pulls representations of similar sentences across languages closer.},
+ address = {Dublin, Ireland},
+ author = {Qin, Libo  and
+Chen, Qiguang  and
+Xie, Tianbao  and
+Li, Qixin  and
+Lou, Jian-Guang  and
+Che, Wanxiang  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
+ doi = {10.18653/v1/2022.acl-long.191},
+ editor = {Muresan, Smaranda  and
+Nakov, Preslav  and
+Villavicencio, Aline},
+ month = {May},
+ pages = {2677--2686},
+ publisher = {Association for Computational Linguistics},
+ title = {GL-CLeF: A Global--Local Contrastive Learning Framework for Cross-lingual Spoken Language Understanding},
+ url = {https://aclanthology.org/2022.acl-long.191},
+ year = {2022}
+}
diff --git a/content/publication/qin-etal-2022-gl/index.md b/content/publication/qin-etal-2022-gl/index.md
new file mode 100644
index 0000000..049f37f
--- /dev/null
+++ b/content/publication/qin-etal-2022-gl/index.md
@@ -0,0 +1,36 @@
+---
+title: 'GL-CLeF: A Global--Local Contrastive Learning Framework for Cross-lingual
+  Spoken Language Understanding'
+authors:
+- Libo Qin
+- Qiguang Chen
+- Tianbao Xie
+- Qixin Li
+- Jian-Guang Lou
+- Wanxiang Che
+- min
+date: '2022-05-01'
+publishDate: '2024-07-05T17:09:42.638128Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 60th Annual Meeting of the Association for Computational
+  Linguistics (Volume 1: Long Papers)*'
+doi: 10.18653/v1/2022.acl-long.191
+abstract: Due to high data demands of current methods, attention to zero-shot cross-lingual
+  spoken language understanding (SLU) has grown, as such approaches greatly reduce
+  human annotation effort. However, existing models solely rely on shared parameters,
+  which can only perform implicit alignment across languages. We present Global-Local
+  Contrastive Learning Framework (GL-CLeF) to address this shortcoming. Specifically,
+  we employ contrastive learning, leveraging bilingual dictionaries to construct multilingual
+  views of the same utterance, then encourage their representations to be more similar
+  than negative example pairs, which achieves to explicitly align representations
+  of similar sentences across languages. In addition, a key step in GL-CLeF is a proposed
+  Local and Global component, which achieves a fine-grained cross-lingual transfer
+  (i.e., sentence-level Local intent transfer, token-level Local slot transfer, and
+  semantic-level Global transfer across intent and slot). Experiments on MultiATIS++
+  show that GL-CLeF achieves the best performance and successfully pulls representations
+  of similar sentences across languages closer.
+links:
+- name: URL
+  url: https://aclanthology.org/2022.acl-long.191
+---
diff --git a/content/publication/qiu-etal-2004-public/cite.bib b/content/publication/qiu-etal-2004-public/cite.bib
new file mode 100644
index 0000000..90b8533
--- /dev/null
+++ b/content/publication/qiu-etal-2004-public/cite.bib
@@ -0,0 +1,17 @@
+@inproceedings{qiu-etal-2004-public,
+ address = {Lisbon, Portugal},
+ author = {Qiu, Long  and
+Kan, Min-Yen  and
+Chua, Tat-Seng},
+ booktitle = {Proceedings of the Fourth International Conference on Language Resources and Evaluation (LREC′04)},
+ editor = {Lino, Maria Teresa  and
+Xavier, Maria Francisca  and
+Ferreira, Fátima  and
+Costa, Rute  and
+Silva, Raquel},
+ month = {May},
+ publisher = {European Language Resources Association (ELRA)},
+ title = {A Public Reference Implementation of the RAP Anaphora Resolution Algorithm},
+ url = {http://www.lrec-conf.org/proceedings/lrec2004/pdf/778.pdf},
+ year = {2004}
+}
diff --git a/content/publication/qiu-etal-2004-public/index.md b/content/publication/qiu-etal-2004-public/index.md
new file mode 100644
index 0000000..14ae8dc
--- /dev/null
+++ b/content/publication/qiu-etal-2004-public/index.md
@@ -0,0 +1,14 @@
+---
+title: A Public Reference Implementation of the RAP Anaphora Resolution Algorithm
+authors:
+- Long Qiu
+- min
+- Tat-Seng Chua
+date: '2004-05-01'
+publishDate: '2024-07-11T07:40:56.604279Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the Fourth International Conference on Language Resources
+  and Evaluation (LREC′04)*'
+url_pdf: http://www.lrec-conf.org/proceedings/lrec2004/pdf/778.pdf
+---
diff --git a/content/publication/qiu-etal-2006-paraphrase/cite.bib b/content/publication/qiu-etal-2006-paraphrase/cite.bib
new file mode 100644
index 0000000..f196f5c
--- /dev/null
+++ b/content/publication/qiu-etal-2006-paraphrase/cite.bib
@@ -0,0 +1,15 @@
+@inproceedings{qiu-etal-2006-paraphrase,
+ address = {Sydney, Australia},
+ author = {Qiu, Long  and
+Kan, Min-Yen  and
+Chua, Tat-Seng},
+ booktitle = {Proceedings of the 2006 Conference on Empirical Methods in Natural Language Processing},
+ editor = {Jurafsky, Dan  and
+Gaussier, Eric},
+ month = {July},
+ pages = {18--26},
+ publisher = {Association for Computational Linguistics},
+ title = {Paraphrase Recognition via Dissimilarity Significance Classification},
+ url = {https://aclanthology.org/W06-1603},
+ year = {2006}
+}
diff --git a/content/publication/qiu-etal-2006-paraphrase/index.md b/content/publication/qiu-etal-2006-paraphrase/index.md
new file mode 100644
index 0000000..9dd4dc0
--- /dev/null
+++ b/content/publication/qiu-etal-2006-paraphrase/index.md
@@ -0,0 +1,16 @@
+---
+title: Paraphrase Recognition via Dissimilarity Significance Classification
+authors:
+- Long Qiu
+- min
+- Tat-Seng Chua
+date: '2006-07-01'
+publishDate: '2024-07-11T07:40:56.598259Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 2006 Conference on Empirical Methods in Natural
+  Language Processing*'
+links:
+- name: URL
+  url: https://aclanthology.org/W06-1603
+---
diff --git a/content/publication/qiu-etal-2008-modeling/cite.bib b/content/publication/qiu-etal-2008-modeling/cite.bib
new file mode 100644
index 0000000..3c3f25b
--- /dev/null
+++ b/content/publication/qiu-etal-2008-modeling/cite.bib
@@ -0,0 +1,9 @@
+@inproceedings{qiu-etal-2008-modeling,
+ author = {Qiu, Long  and
+Kan, Min-Yen  and
+Chua, Tat-Seng},
+ booktitle = {Proceedings of the Third International Joint Conference on Natural Language Processing: Volume-I},
+ title = {Modeling Context in Scenario Template Creation},
+ url = {https://aclanthology.org/I08-1021},
+ year = {2008}
+}
diff --git a/content/publication/qiu-etal-2008-modeling/index.md b/content/publication/qiu-etal-2008-modeling/index.md
new file mode 100644
index 0000000..23d4ed0
--- /dev/null
+++ b/content/publication/qiu-etal-2008-modeling/index.md
@@ -0,0 +1,16 @@
+---
+title: Modeling Context in Scenario Template Creation
+authors:
+- Long Qiu
+- min
+- Tat-Seng Chua
+date: '2008-01-01'
+publishDate: '2024-07-11T07:40:56.567890Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the Third International Joint Conference on Natural
+  Language Processing: Volume-I*'
+links:
+- name: URL
+  url: https://aclanthology.org/I08-1021
+---
diff --git a/content/publication/ramesh-kashyap-etal-2021-analyzing/cite.bib b/content/publication/ramesh-kashyap-etal-2021-analyzing/cite.bib
new file mode 100644
index 0000000..6d06f58
--- /dev/null
+++ b/content/publication/ramesh-kashyap-etal-2021-analyzing/cite.bib
@@ -0,0 +1,25 @@
+@inproceedings{ramesh-kashyap-etal-2021-analyzing,
+ abstract = {The robustness of pretrained language models(PLMs) is generally measured using performance drops on two or more domains. However, we do not yet understand the inherent robustness achieved by contributions from different layers of a PLM. We systematically analyze the robustness of these representations layer by layer from two perspectives. First, we measure the robustness of representations by using domain divergence between two domains. We find that i) Domain variance increases from the lower to the upper layers for vanilla PLMs; ii) Models continuously pretrained on domain-specific data (DAPT)(Gururangan et al., 2020) exhibit more variance than their pretrained PLM counterparts; and that iii) Distilled models (e.g., DistilBERT) also show greater domain variance. Second, we investigate the robustness of representations by analyzing the encoded syntactic and semantic information using diagnostic probes. We find that similar layers have similar amounts of linguistic information for data from an unseen domain.},
+ address = {Kyiv, Ukraine},
+ author = {Ramesh Kashyap, Abhinav  and
+Mehnaz, Laiba  and
+Malik, Bhavitvya  and
+Waheed, Abdul  and
+Hazarika, Devamanyu  and
+Kan, Min-Yen  and
+Shah, Rajiv Ratn},
+ booktitle = {Proceedings of the Second Workshop on Domain Adaptation for NLP},
+ editor = {Ben-David, Eyal  and
+Cohen, Shay  and
+McDonald, Ryan  and
+Plank, Barbara  and
+Reichart, Roi  and
+Rotman, Guy  and
+Ziser, Yftah},
+ month = {April},
+ pages = {222--244},
+ publisher = {Association for Computational Linguistics},
+ title = {Analyzing the Domain Robustness of Pretrained Language Models, Layer by Layer},
+ url = {https://aclanthology.org/2021.adaptnlp-1.23},
+ year = {2021}
+}
diff --git a/content/publication/ramesh-kashyap-etal-2021-analyzing/index.md b/content/publication/ramesh-kashyap-etal-2021-analyzing/index.md
new file mode 100644
index 0000000..a9472bc
--- /dev/null
+++ b/content/publication/ramesh-kashyap-etal-2021-analyzing/index.md
@@ -0,0 +1,32 @@
+---
+title: Analyzing the Domain Robustness of Pretrained Language Models, Layer by Layer
+authors:
+- Abhinav Ramesh Kashyap
+- Laiba Mehnaz
+- Bhavitvya Malik
+- Abdul Waheed
+- Devamanyu Hazarika
+- min
+- Rajiv Ratn Shah
+date: '2021-04-01'
+publishDate: '2024-07-11T07:40:56.223020Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the Second Workshop on Domain Adaptation for NLP*'
+abstract: The robustness of pretrained language models(PLMs) is generally measured
+  using performance drops on two or more domains. However, we do not yet understand
+  the inherent robustness achieved by contributions from different layers of a PLM.
+  We systematically analyze the robustness of these representations layer by layer
+  from two perspectives. First, we measure the robustness of representations by using
+  domain divergence between two domains. We find that i) Domain variance increases
+  from the lower to the upper layers for vanilla PLMs; ii) Models continuously pretrained
+  on domain-specific data (DAPT)(Gururangan et al., 2020) exhibit more variance than
+  their pretrained PLM counterparts; and that iii) Distilled models (e.g., DistilBERT)
+  also show greater domain variance. Second, we investigate the robustness of representations
+  by analyzing the encoded syntactic and semantic information using diagnostic probes.
+  We find that similar layers have similar amounts of linguistic information for data
+  from an unseen domain.
+links:
+- name: URL
+  url: https://aclanthology.org/2021.adaptnlp-1.23
+---
diff --git a/content/publication/ramesh-kashyap-etal-2022-different/cite.bib b/content/publication/ramesh-kashyap-etal-2022-different/cite.bib
new file mode 100644
index 0000000..ed49be6
--- /dev/null
+++ b/content/publication/ramesh-kashyap-etal-2022-different/cite.bib
@@ -0,0 +1,20 @@
+@inproceedings{ramesh-kashyap-etal-2022-different,
+ abstract = {Automatic transfer of text between domains has become popular in recent times. One of its aims is to preserve the semantic content while adapting to the target domain. However, it does not explicitly maintain other attributes between the source and translated text: e.g., text length and descriptiveness. Maintaining constraints in transfer has several downstream applications, including data augmentation and debiasing. We introduce a method for such constrained unsupervised text style transfer by introducing two complementary losses to the generative adversarial network (GAN) family of models. Unlike the competing losses used in GANs, we introduce cooperative losses where the discriminator and the generator cooperate and reduce the same loss. The first is a contrastive loss and the second is a classification loss --- aiming to regularize the latent space further and bring similar sentences closer together. We demonstrate that such training retains lexical, syntactic and domain-specific constraints between domains for multiple benchmark datasets, including ones where more than one attribute change. We show that the complementary cooperative losses improve text quality, according to both automated and human evaluation measures.},
+ address = {Dublin, Ireland},
+ author = {Ramesh Kashyap, Abhinav  and
+Hazarika, Devamanyu  and
+Kan, Min-Yen  and
+Zimmermann, Roger  and
+Poria, Soujanya},
+ booktitle = {Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
+ doi = {10.18653/v1/2022.acl-long.32},
+ editor = {Muresan, Smaranda  and
+Nakov, Preslav  and
+Villavicencio, Aline},
+ month = {May},
+ pages = {416--431},
+ publisher = {Association for Computational Linguistics},
+ title = {So Different Yet So Alike! Constrained Unsupervised Text Style Transfer},
+ url = {https://aclanthology.org/2022.acl-long.32},
+ year = {2022}
+}
diff --git a/content/publication/ramesh-kashyap-etal-2022-different/index.md b/content/publication/ramesh-kashyap-etal-2022-different/index.md
new file mode 100644
index 0000000..2ac3644
--- /dev/null
+++ b/content/publication/ramesh-kashyap-etal-2022-different/index.md
@@ -0,0 +1,34 @@
+---
+title: So Different Yet So Alike! Constrained Unsupervised Text Style Transfer
+authors:
+- Abhinav Ramesh Kashyap
+- Devamanyu Hazarika
+- min
+- Roger Zimmermann
+- Soujanya Poria
+date: '2022-05-01'
+publishDate: '2024-07-05T17:09:42.630957Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 60th Annual Meeting of the Association for Computational
+  Linguistics (Volume 1: Long Papers)*'
+doi: 10.18653/v1/2022.acl-long.32
+abstract: 'Automatic transfer of text between domains has become popular in recent
+  times. One of its aims is to preserve the semantic content while adapting to the
+  target domain. However, it does not explicitly maintain other attributes between
+  the source and translated text: e.g., text length and descriptiveness. Maintaining
+  constraints in transfer has several downstream applications, including data augmentation
+  and debiasing. We introduce a method for such constrained unsupervised text style
+  transfer by introducing two complementary losses to the generative adversarial network
+  (GAN) family of models. Unlike the competing losses used in GANs, we introduce cooperative
+  losses where the discriminator and the generator cooperate and reduce the same loss.
+  The first is a contrastive loss and the second is a classification loss --- aiming
+  to regularize the latent space further and bring similar sentences closer together.
+  We demonstrate that such training retains lexical, syntactic and domain-specific
+  constraints between domains for multiple benchmark datasets, including ones where
+  more than one attribute change. We show that the complementary cooperative losses
+  improve text quality, according to both automated and human evaluation measures.'
+links:
+- name: URL
+  url: https://aclanthology.org/2022.acl-long.32
+---
diff --git a/content/publication/ramesh-kashyap-kan-2020-sciwing/cite.bib b/content/publication/ramesh-kashyap-kan-2020-sciwing/cite.bib
new file mode 100644
index 0000000..32e003d
--- /dev/null
+++ b/content/publication/ramesh-kashyap-kan-2020-sciwing/cite.bib
@@ -0,0 +1,25 @@
+@inproceedings{ramesh-kashyap-kan-2020-sciwing,
+ abstract = {We introduce SciWING, an open-source soft-ware toolkit which provides access to state-of-the-art pre-trained models for scientific document processing (SDP) tasks, such as citation string parsing, logical structure recovery and citation intent classification. Compared to other toolkits, SciWING follows a full neural pipeline and provides a Python inter-face for SDP. When needed, SciWING provides fine-grained control for rapid experimentation with different models by swapping and stacking different modules. Transfer learning from general and scientific documents specific pre-trained transformers (i.e., BERT, SciBERT, etc.) can be performed. SciWING incorporates ready-to-use web and terminal-based applications and demonstrations to aid adoption and development. The toolkit is available from r̆lhttp://sciwing.io and the demos are available at \l̆http://rebrand.ly/sciwing-demo.},
+ address = {Online},
+ author = {Ramesh Kashyap, Abhinav  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the First Workshop on Scholarly Document Processing},
+ doi = {10.18653/v1/2020.sdp-1.13},
+ editor = {Chandrasekaran, Muthu Kumar  and
+de Waard, Anita  and
+Feigenblat, Guy  and
+Freitag, Dayne  and
+Ghosal, Tirthankar  and
+Hovy, Eduard  and
+Knoth, Petr  and
+Konopnicki, David  and
+Mayr, Philipp  and
+Patton, Robert M.  and
+Shmueli-Scheuer, Michal},
+ month = {November},
+ pages = {113--120},
+ publisher = {Association for Computational Linguistics},
+ title = {SciWING-- A Software Toolkit for Scientific Document Processing},
+ url = {https://aclanthology.org/2020.sdp-1.13},
+ year = {2020}
+}
diff --git a/content/publication/ramesh-kashyap-kan-2020-sciwing/index.md b/content/publication/ramesh-kashyap-kan-2020-sciwing/index.md
new file mode 100644
index 0000000..2d105b5
--- /dev/null
+++ b/content/publication/ramesh-kashyap-kan-2020-sciwing/index.md
@@ -0,0 +1,26 @@
+---
+title: SciWING-- A Software Toolkit for Scientific Document Processing
+authors:
+- Abhinav Ramesh Kashyap
+- min
+date: '2020-11-01'
+publishDate: '2024-07-11T07:40:56.255395Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the First Workshop on Scholarly Document Processing*'
+doi: 10.18653/v1/2020.sdp-1.13
+abstract: We introduce SciWING, an open-source soft-ware toolkit which provides access
+  to state-of-the-art pre-trained models for scientific document processing (SDP)
+  tasks, such as citation string parsing, logical structure recovery and citation
+  intent classification. Compared to other toolkits, SciWING follows a full neural
+  pipeline and provides a Python inter-face for SDP. When needed, SciWING provides
+  fine-grained control for rapid experimentation with different models by swapping
+  and stacking different modules. Transfer learning from general and scientific documents
+  specific pre-trained transformers (i.e., BERT, SciBERT, etc.) can be performed.
+  SciWING incorporates ready-to-use web and terminal-based applications and demonstrations
+  to aid adoption and development. The toolkit is available from r̆lhttp://sciwing.io
+  and the demos are available at l̆http://rebrand.ly/sciwing-demo.
+links:
+- name: URL
+  url: https://aclanthology.org/2020.sdp-1.13
+---
diff --git a/content/publication/rohatgi-etal-2023-acl/cite.bib b/content/publication/rohatgi-etal-2023-acl/cite.bib
new file mode 100644
index 0000000..c575384
--- /dev/null
+++ b/content/publication/rohatgi-etal-2023-acl/cite.bib
@@ -0,0 +1,20 @@
+@inproceedings{rohatgi-etal-2023-acl,
+ abstract = {We present ACL OCL, a scholarly corpus derived from the ACL Anthology to assist Open scientific research in the Computational Linguistics domain. Integrating and enhancing the previous versions of the ACL Anthology, the ACL OCL contributes metadata, PDF files, citation graphs and additional structured full texts with sections, figures, and links to a large knowledge resource (Semantic Scholar). The ACL OCL spans seven decades, containing 73K papers, alongside 210K figures. We spotlight how ACL OCL applies to observe trends in computational linguistics. By detecting paper topics with a supervised neural model, we note that interest in ``Syntax: Tagging, Chunking and Parsing″ is waning and ``Natural Language Generation″ is resurging. Our dataset is available from HuggingFace (https://huggingface.co/datasets/WINGNUS/ACL-OCL).},
+ address = {Singapore},
+ author = {Rohatgi, Shaurya  and
+Qin, Yanxia  and
+Aw, Benjamin  and
+Unnithan, Niranjana  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing},
+ doi = {10.18653/v1/2023.emnlp-main.640},
+ editor = {Bouamor, Houda  and
+Pino, Juan  and
+Bali, Kalika},
+ month = {December},
+ pages = {10348--10361},
+ publisher = {Association for Computational Linguistics},
+ title = {The ACL OCL Corpus: Advancing Open Science in Computational Linguistics},
+ url = {https://aclanthology.org/2023.emnlp-main.640},
+ year = {2023}
+}
diff --git a/content/publication/rohatgi-etal-2023-acl/index.md b/content/publication/rohatgi-etal-2023-acl/index.md
new file mode 100644
index 0000000..4f0d52b
--- /dev/null
+++ b/content/publication/rohatgi-etal-2023-acl/index.md
@@ -0,0 +1,29 @@
+---
+title: 'The ACL OCL Corpus: Advancing Open Science in Computational Linguistics'
+authors:
+- Shaurya Rohatgi
+- Yanxia Qin
+- Benjamin Aw
+- Niranjana Unnithan
+- min
+date: '2023-12-01'
+publishDate: '2024-07-06T02:22:24.575377Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 2023 Conference on Empirical Methods in Natural
+  Language Processing*'
+doi: 10.18653/v1/2023.emnlp-main.640
+abstract: 'We present ACL OCL, a scholarly corpus derived from the ACL Anthology to
+  assist Open scientific research in the Computational Linguistics domain. Integrating
+  and enhancing the previous versions of the ACL Anthology, the ACL OCL contributes
+  metadata, PDF files, citation graphs and additional structured full texts with sections,
+  figures, and links to a large knowledge resource (Semantic Scholar). The ACL OCL
+  spans seven decades, containing 73K papers, alongside 210K figures. We spotlight
+  how ACL OCL applies to observe trends in computational linguistics. By detecting
+  paper topics with a supervised neural model, we note that interest in ``Syntax:
+  Tagging, Chunking and Parsing″ is waning and ``Natural Language Generation″ is resurging.
+  Our dataset is available from HuggingFace (https://huggingface.co/datasets/WINGNUS/ACL-OCL).'
+links:
+- name: URL
+  url: https://aclanthology.org/2023.emnlp-main.640
+---
diff --git a/content/publication/setiawan-etal-2007-ordering/cite.bib b/content/publication/setiawan-etal-2007-ordering/cite.bib
new file mode 100644
index 0000000..a8a0401
--- /dev/null
+++ b/content/publication/setiawan-etal-2007-ordering/cite.bib
@@ -0,0 +1,15 @@
+@inproceedings{setiawan-etal-2007-ordering,
+ address = {Prague, Czech Republic},
+ author = {Setiawan, Hendra  and
+Kan, Min-Yen  and
+Li, Haizhou},
+ booktitle = {Proceedings of the 45th Annual Meeting of the Association of Computational Linguistics},
+ editor = {Zaenen, Annie  and
+van den Bosch, Antal},
+ month = {June},
+ pages = {712--719},
+ publisher = {Association for Computational Linguistics},
+ title = {Ordering Phrases with Function Words},
+ url = {https://aclanthology.org/P07-1090},
+ year = {2007}
+}
diff --git a/content/publication/setiawan-etal-2007-ordering/index.md b/content/publication/setiawan-etal-2007-ordering/index.md
new file mode 100644
index 0000000..2445508
--- /dev/null
+++ b/content/publication/setiawan-etal-2007-ordering/index.md
@@ -0,0 +1,16 @@
+---
+title: Ordering Phrases with Function Words
+authors:
+- Hendra Setiawan
+- min
+- Haizhou Li
+date: '2007-06-01'
+publishDate: '2024-07-11T07:40:56.586029Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 45th Annual Meeting of the Association of Computational
+  Linguistics*'
+links:
+- name: URL
+  url: https://aclanthology.org/P07-1090
+---
diff --git a/content/publication/setiawan-etal-2009-topological/cite.bib b/content/publication/setiawan-etal-2009-topological/cite.bib
new file mode 100644
index 0000000..ea29b9d
--- /dev/null
+++ b/content/publication/setiawan-etal-2009-topological/cite.bib
@@ -0,0 +1,18 @@
+@inproceedings{setiawan-etal-2009-topological,
+ address = {Suntec, Singapore},
+ author = {Setiawan, Hendra  and
+Kan, Min-Yen  and
+Li, Haizhou  and
+Resnik, Philip},
+ booktitle = {Proceedings of the Joint Conference of the 47th Annual Meeting of the ACL and the 4th International Joint Conference on Natural Language Processing of the AFNLP},
+ editor = {Su, Keh-Yih  and
+Su, Jian  and
+Wiebe, Janyce  and
+Li, Haizhou},
+ month = {August},
+ pages = {324--332},
+ publisher = {Association for Computational Linguistics},
+ title = {Topological Ordering of Function Words in Hierarchical Phrase-based Translation},
+ url = {https://aclanthology.org/P09-1037},
+ year = {2009}
+}
diff --git a/content/publication/setiawan-etal-2009-topological/index.md b/content/publication/setiawan-etal-2009-topological/index.md
new file mode 100644
index 0000000..0d07225
--- /dev/null
+++ b/content/publication/setiawan-etal-2009-topological/index.md
@@ -0,0 +1,18 @@
+---
+title: Topological Ordering of Function Words in Hierarchical Phrase-based Translation
+authors:
+- Hendra Setiawan
+- min
+- Haizhou Li
+- Philip Resnik
+date: '2009-08-01'
+publishDate: '2024-07-11T07:40:56.542475Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the Joint Conference of the 47th Annual Meeting of the
+  ACL and the 4th International Joint Conference on Natural Language Processing of
+  the AFNLP*'
+links:
+- name: URL
+  url: https://aclanthology.org/P09-1037
+---
diff --git a/content/publication/si-etal-2019-sentiment/cite.bib b/content/publication/si-etal-2019-sentiment/cite.bib
new file mode 100644
index 0000000..140f102
--- /dev/null
+++ b/content/publication/si-etal-2019-sentiment/cite.bib
@@ -0,0 +1,26 @@
+@inproceedings{si-etal-2019-sentiment,
+ abstract = {Sentiment ambiguous lexicons refer to words where their polarity depends strongly on con- text. As such, when the context is absent, their translations or their embedded sentence ends up (incorrectly) being dependent on the training data. While neural machine translation (NMT) has achieved great progress in recent years, most systems aim to produce one single correct translation for a given source sentence. We investigate the translation variation in two sentiment scenarios. We perform experiments to study the preservation of sentiment during translation with three different methods that we propose. We conducted tests with both sentiment and non-sentiment bearing contexts to examine the effectiveness of our methods. We show that NMT can generate both positive- and negative-valent translations of a source sentence, based on a given input sentiment label. Empirical evaluations show that our valence-sensitive embedding (VSE) method significantly outperforms a sequence-to-sequence (seq2seq) baseline, both in terms of BLEU score and ambiguous word translation accuracy in test, given non-sentiment bearing contexts.},
+ address = {Hong Kong, China},
+ author = {Si, Chenglei  and
+Wu, Kui  and
+Aw, Ai Ti  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the 6th Workshop on Asian Translation},
+ doi = {10.18653/v1/D19-5227},
+ editor = {Nakazawa, Toshiaki  and
+Ding, Chenchen  and
+Dabre, Raj  and
+Kunchukuttan, Anoop  and
+Doi, Nobushige  and
+Oda, Yusuke  and
+Bojar, Ondřej  and
+Parida, Shantipriya  and
+Goto, Isao  and
+Mino, Hidaya},
+ month = {November},
+ pages = {200--206},
+ publisher = {Association for Computational Linguistics},
+ title = {Sentiment Aware Neural Machine Translation},
+ url = {https://aclanthology.org/D19-5227},
+ year = {2019}
+}
diff --git a/content/publication/si-etal-2019-sentiment/index.md b/content/publication/si-etal-2019-sentiment/index.md
new file mode 100644
index 0000000..e02c077
--- /dev/null
+++ b/content/publication/si-etal-2019-sentiment/index.md
@@ -0,0 +1,31 @@
+---
+title: Sentiment Aware Neural Machine Translation
+authors:
+- Chenglei Si
+- Kui Wu
+- Ai Ti Aw
+- min
+date: '2019-11-01'
+publishDate: '2024-07-11T07:40:56.334066Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 6th Workshop on Asian Translation*'
+doi: 10.18653/v1/D19-5227
+abstract: Sentiment ambiguous lexicons refer to words where their polarity depends
+  strongly on con- text. As such, when the context is absent, their translations or
+  their embedded sentence ends up (incorrectly) being dependent on the training data.
+  While neural machine translation (NMT) has achieved great progress in recent years,
+  most systems aim to produce one single correct translation for a given source sentence.
+  We investigate the translation variation in two sentiment scenarios. We perform
+  experiments to study the preservation of sentiment during translation with three
+  different methods that we propose. We conducted tests with both sentiment and non-sentiment
+  bearing contexts to examine the effectiveness of our methods. We show that NMT can
+  generate both positive- and negative-valent translations of a source sentence, based
+  on a given input sentiment label. Empirical evaluations show that our valence-sensitive
+  embedding (VSE) method significantly outperforms a sequence-to-sequence (seq2seq)
+  baseline, both in terms of BLEU score and ambiguous word translation accuracy in
+  test, given non-sentiment bearing contexts.
+links:
+- name: URL
+  url: https://aclanthology.org/D19-5227
+---
diff --git a/content/publication/tan-etal-2006-extending/cite.bib b/content/publication/tan-etal-2006-extending/cite.bib
new file mode 100644
index 0000000..53df326
--- /dev/null
+++ b/content/publication/tan-etal-2006-extending/cite.bib
@@ -0,0 +1,9 @@
+@inproceedings{tan-etal-2006-extending,
+ author = {Tan, Yee Fan  and
+Kan, Min-Yen  and
+Cui, Hang},
+ booktitle = {Proceedings of the Workshop on Multi-word-expressions in a multilingual context},
+ title = {Extending corpus-based identification of light verb constructions using a supervised learning framework},
+ url = {https://aclanthology.org/W06-2407},
+ year = {2006}
+}
diff --git a/content/publication/tan-etal-2006-extending/index.md b/content/publication/tan-etal-2006-extending/index.md
new file mode 100644
index 0000000..1c247c3
--- /dev/null
+++ b/content/publication/tan-etal-2006-extending/index.md
@@ -0,0 +1,17 @@
+---
+title: Extending corpus-based identification of light verb constructions using a supervised
+  learning framework
+authors:
+- Yee Fan Tan
+- min
+- Hang Cui
+date: '2006-01-01'
+publishDate: '2024-07-11T07:40:56.592096Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the Workshop on Multi-word-expressions in a multilingual
+  context*'
+links:
+- name: URL
+  url: https://aclanthology.org/W06-2407
+---
diff --git a/content/publication/tan-etal-2020-mind/cite.bib b/content/publication/tan-etal-2020-mind/cite.bib
new file mode 100644
index 0000000..7e7ad19
--- /dev/null
+++ b/content/publication/tan-etal-2020-mind/cite.bib
@@ -0,0 +1,20 @@
+@inproceedings{tan-etal-2020-mind,
+ abstract = {Inflectional variation is a common feature of World Englishes such as Colloquial Singapore English and African American Vernacular English. Although comprehension by human readers is usually unimpaired by non-standard inflections, current NLP systems are not yet robust. We propose Base-Inflection Encoding (BITE), a method to tokenize English text by reducing inflected words to their base forms before reinjecting the grammatical information as special symbols. Fine-tuning pretrained NLP models for downstream tasks using our encoding defends against inflectional adversaries while maintaining performance on clean data. Models using BITE generalize better to dialects with non-standard inflections without explicit training and translation models converge faster when trained with BITE. Finally, we show that our encoding improves the vocabulary efficiency of popular data-driven subword tokenizers. Since there has been no prior work on quantitatively evaluating vocabulary efficiency, we propose metrics to do so.},
+ address = {Online},
+ author = {Tan, Samson  and
+Joty, Shafiq  and
+Varshney, Lav  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)},
+ doi = {10.18653/v1/2020.emnlp-main.455},
+ editor = {Webber, Bonnie  and
+Cohn, Trevor  and
+He, Yulan  and
+Liu, Yang},
+ month = {November},
+ pages = {5647--5663},
+ publisher = {Association for Computational Linguistics},
+ title = {Mind Your Inflections! Improving NLP for Non-Standard Englishes with Base-Inflection Encoding},
+ url = {https://aclanthology.org/2020.emnlp-main.455},
+ year = {2020}
+}
diff --git a/content/publication/tan-etal-2020-mind/index.md b/content/publication/tan-etal-2020-mind/index.md
new file mode 100644
index 0000000..b618eba
--- /dev/null
+++ b/content/publication/tan-etal-2020-mind/index.md
@@ -0,0 +1,32 @@
+---
+title: Mind Your Inflections! Improving NLP for Non-Standard Englishes with Base-Inflection
+  Encoding
+authors:
+- Samson Tan
+- Shafiq Joty
+- Lav Varshney
+- min
+date: '2020-11-01'
+publishDate: '2024-07-11T07:40:56.262379Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 2020 Conference on Empirical Methods in Natural
+  Language Processing (EMNLP)*'
+doi: 10.18653/v1/2020.emnlp-main.455
+abstract: Inflectional variation is a common feature of World Englishes such as Colloquial
+  Singapore English and African American Vernacular English. Although comprehension
+  by human readers is usually unimpaired by non-standard inflections, current NLP
+  systems are not yet robust. We propose Base-Inflection Encoding (BITE), a method
+  to tokenize English text by reducing inflected words to their base forms before
+  reinjecting the grammatical information as special symbols. Fine-tuning pretrained
+  NLP models for downstream tasks using our encoding defends against inflectional
+  adversaries while maintaining performance on clean data. Models using BITE generalize
+  better to dialects with non-standard inflections without explicit training and translation
+  models converge faster when trained with BITE. Finally, we show that our encoding
+  improves the vocabulary efficiency of popular data-driven subword tokenizers. Since
+  there has been no prior work on quantitatively evaluating vocabulary efficiency,
+  we propose metrics to do so.
+links:
+- name: URL
+  url: https://aclanthology.org/2020.emnlp-main.455
+---
diff --git a/content/publication/tan-etal-2020-morphin/cite.bib b/content/publication/tan-etal-2020-morphin/cite.bib
new file mode 100644
index 0000000..f69310c
--- /dev/null
+++ b/content/publication/tan-etal-2020-morphin/cite.bib
@@ -0,0 +1,20 @@
+@inproceedings{tan-etal-2020-morphin,
+ abstract = {Training on only perfect Standard English corpora predisposes pre-trained neural networks to discriminate against minorities from non-standard linguistic backgrounds (e.g., African American Vernacular English, Colloquial Singapore English, etc.). We perturb the inflectional morphology of words to craft plausible and semantically similar adversarial examples that expose these biases in popular NLP models, e.g., BERT and Transformer, and show that adversarially fine-tuning them for a single epoch significantly improves robustness without sacrificing performance on clean data.},
+ address = {Online},
+ author = {Tan, Samson  and
+Joty, Shafiq  and
+Kan, Min-Yen  and
+Socher, Richard},
+ booktitle = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
+ doi = {10.18653/v1/2020.acl-main.263},
+ editor = {Jurafsky, Dan  and
+Chai, Joyce  and
+Schluter, Natalie  and
+Tetreault, Joel},
+ month = {July},
+ pages = {2920--2935},
+ publisher = {Association for Computational Linguistics},
+ title = {It′s Morphin′ Time! Combating Linguistic Discrimination with Inflectional Perturbations},
+ url = {https://aclanthology.org/2020.acl-main.263},
+ year = {2020}
+}
diff --git a/content/publication/tan-etal-2020-morphin/index.md b/content/publication/tan-etal-2020-morphin/index.md
new file mode 100644
index 0000000..dec5d5f
--- /dev/null
+++ b/content/publication/tan-etal-2020-morphin/index.md
@@ -0,0 +1,26 @@
+---
+title: It′s Morphin′ Time! Combating Linguistic Discrimination with Inflectional Perturbations
+authors:
+- Samson Tan
+- Shafiq Joty
+- min
+- Richard Socher
+date: '2020-07-01'
+publishDate: '2024-07-11T07:40:56.313557Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 58th Annual Meeting of the Association for Computational
+  Linguistics*'
+doi: 10.18653/v1/2020.acl-main.263
+abstract: Training on only perfect Standard English corpora predisposes pre-trained
+  neural networks to discriminate against minorities from non-standard linguistic
+  backgrounds (e.g., African American Vernacular English, Colloquial Singapore English,
+  etc.). We perturb the inflectional morphology of words to craft plausible and semantically
+  similar adversarial examples that expose these biases in popular NLP models, e.g.,
+  BERT and Transformer, and show that adversarially fine-tuning them for a single
+  epoch significantly improves robustness without sacrificing performance on clean
+  data.
+links:
+- name: URL
+  url: https://aclanthology.org/2020.acl-main.263
+---
diff --git a/content/publication/tan-etal-2021-reliability/cite.bib b/content/publication/tan-etal-2021-reliability/cite.bib
new file mode 100644
index 0000000..40a512d
--- /dev/null
+++ b/content/publication/tan-etal-2021-reliability/cite.bib
@@ -0,0 +1,22 @@
+@inproceedings{tan-etal-2021-reliability,
+ abstract = {Questions of fairness, robustness, and transparency are paramount to address before deploying NLP systems. Central to these concerns is the question of reliability: Can NLP systems reliably treat different demographics fairly and function correctly in diverse and noisy environments? To address this, we argue for the need for reliability testing and contextualize it among existing work on improving accountability. We show how adversarial attacks can be reframed for this goal, via a framework for developing reliability tests. We argue that reliability testing --- with an emphasis on interdisciplinary collaboration --- will enable rigorous and targeted testing, and aid in the enactment and enforcement of industry standards.},
+ address = {Online},
+ author = {Tan, Samson  and
+Joty, Shafiq  and
+Baxter, Kathy  and
+Taeihagh, Araz  and
+Bennett, Gregory A.  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)},
+ doi = {10.18653/v1/2021.acl-long.321},
+ editor = {Zong, Chengqing  and
+Xia, Fei  and
+Li, Wenjie  and
+Navigli, Roberto},
+ month = {August},
+ pages = {4153--4169},
+ publisher = {Association for Computational Linguistics},
+ title = {Reliability Testing for Natural Language Processing Systems},
+ url = {https://aclanthology.org/2021.acl-long.321},
+ year = {2021}
+}
diff --git a/content/publication/tan-etal-2021-reliability/index.md b/content/publication/tan-etal-2021-reliability/index.md
new file mode 100644
index 0000000..a1aee3e
--- /dev/null
+++ b/content/publication/tan-etal-2021-reliability/index.md
@@ -0,0 +1,30 @@
+---
+title: Reliability Testing for Natural Language Processing Systems
+authors:
+- Samson Tan
+- Shafiq Joty
+- Kathy Baxter
+- Araz Taeihagh
+- Gregory A. Bennett
+- min
+date: '2021-08-01'
+publishDate: '2024-07-11T07:40:56.247998Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 59th Annual Meeting of the Association for Computational
+  Linguistics and the 11th International Joint Conference on Natural Language Processing
+  (Volume 1: Long Papers)*'
+doi: 10.18653/v1/2021.acl-long.321
+abstract: 'Questions of fairness, robustness, and transparency are paramount to address
+  before deploying NLP systems. Central to these concerns is the question of reliability:
+  Can NLP systems reliably treat different demographics fairly and function correctly
+  in diverse and noisy environments? To address this, we argue for the need for reliability
+  testing and contextualize it among existing work on improving accountability. We
+  show how adversarial attacks can be reframed for this goal, via a framework for
+  developing reliability tests. We argue that reliability testing --- with an emphasis
+  on interdisciplinary collaboration --- will enable rigorous and targeted testing,
+  and aid in the enactment and enforcement of industry standards.'
+links:
+- name: URL
+  url: https://aclanthology.org/2021.acl-long.321
+---
diff --git a/content/publication/wang-etal-2012-tweeting/cite.bib b/content/publication/wang-etal-2012-tweeting/cite.bib
new file mode 100644
index 0000000..e2fb329
--- /dev/null
+++ b/content/publication/wang-etal-2012-tweeting/cite.bib
@@ -0,0 +1,16 @@
+@inproceedings{wang-etal-2012-tweeting,
+ address = {Montréal, Canada},
+ author = {Wang, Aobo  and
+Chen, Tao  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the Second Workshop on Language in Social Media},
+ editor = {Sood, Sara Owsley  and
+Nagarajan, Meenakshi  and
+Gamon, Michael},
+ month = {June},
+ pages = {46--55},
+ publisher = {Association for Computational Linguistics},
+ title = {Re-tweeting from a linguistic perspective},
+ url = {https://aclanthology.org/W12-2106},
+ year = {2012}
+}
diff --git a/content/publication/wang-etal-2012-tweeting/index.md b/content/publication/wang-etal-2012-tweeting/index.md
new file mode 100644
index 0000000..60eb307
--- /dev/null
+++ b/content/publication/wang-etal-2012-tweeting/index.md
@@ -0,0 +1,15 @@
+---
+title: Re-tweeting from a linguistic perspective
+authors:
+- Aobo Wang
+- Tao Chen
+- min
+date: '2012-06-01'
+publishDate: '2024-07-11T07:40:56.450801Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the Second Workshop on Language in Social Media*'
+links:
+- name: URL
+  url: https://aclanthology.org/W12-2106
+---
diff --git a/content/publication/wang-etal-2013-chinese/cite.bib b/content/publication/wang-etal-2013-chinese/cite.bib
new file mode 100644
index 0000000..ceebda2
--- /dev/null
+++ b/content/publication/wang-etal-2013-chinese/cite.bib
@@ -0,0 +1,17 @@
+@inproceedings{wang-etal-2013-chinese,
+ address = {Nagoya, Japan},
+ author = {Wang, Aobo  and
+Kan, Min-Yen  and
+Andrade, Daniel  and
+Onishi, Takashi  and
+Ishikawa, Kai},
+ booktitle = {Proceedings of the Sixth International Joint Conference on Natural Language Processing},
+ editor = {Mitkov, Ruslan  and
+Park, Jong C.},
+ month = {October},
+ pages = {127--135},
+ publisher = {Asian Federation of Natural Language Processing},
+ title = {Chinese Informal Word Normalization: an Experimental Study},
+ url = {https://aclanthology.org/I13-1015},
+ year = {2013}
+}
diff --git a/content/publication/wang-etal-2013-chinese/index.md b/content/publication/wang-etal-2013-chinese/index.md
new file mode 100644
index 0000000..00a76c1
--- /dev/null
+++ b/content/publication/wang-etal-2013-chinese/index.md
@@ -0,0 +1,18 @@
+---
+title: 'Chinese Informal Word Normalization: an Experimental Study'
+authors:
+- Aobo Wang
+- min
+- Daniel Andrade
+- Takashi Onishi
+- Kai Ishikawa
+date: '2013-10-01'
+publishDate: '2024-07-11T07:40:56.426038Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the Sixth International Joint Conference on Natural
+  Language Processing*'
+links:
+- name: URL
+  url: https://aclanthology.org/I13-1015
+---
diff --git a/content/publication/wang-kan-2013-mining/cite.bib b/content/publication/wang-kan-2013-mining/cite.bib
new file mode 100644
index 0000000..ff8f7ef
--- /dev/null
+++ b/content/publication/wang-kan-2013-mining/cite.bib
@@ -0,0 +1,15 @@
+@inproceedings{wang-kan-2013-mining,
+ address = {Sofia, Bulgaria},
+ author = {Wang, Aobo  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
+ editor = {Schuetze, Hinrich  and
+Fung, Pascale  and
+Poesio, Massimo},
+ month = {August},
+ pages = {731--741},
+ publisher = {Association for Computational Linguistics},
+ title = {Mining Informal Language from Chinese Microtext: Joint Word Recognition and Segmentation},
+ url = {https://aclanthology.org/P13-1072},
+ year = {2013}
+}
diff --git a/content/publication/wang-kan-2013-mining/index.md b/content/publication/wang-kan-2013-mining/index.md
new file mode 100644
index 0000000..1c920f0
--- /dev/null
+++ b/content/publication/wang-kan-2013-mining/index.md
@@ -0,0 +1,16 @@
+---
+title: 'Mining Informal Language from Chinese Microtext: Joint Word Recognition and
+  Segmentation'
+authors:
+- Aobo Wang
+- min
+date: '2013-08-01'
+publishDate: '2024-07-11T07:40:56.420009Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 51st Annual Meeting of the Association for Computational
+  Linguistics (Volume 1: Long Papers)*'
+links:
+- name: URL
+  url: https://aclanthology.org/P13-1072
+---
diff --git a/content/publication/ws-2009-2009-text/cite.bib b/content/publication/ws-2009-2009-text/cite.bib
new file mode 100644
index 0000000..7ffaec2
--- /dev/null
+++ b/content/publication/ws-2009-2009-text/cite.bib
@@ -0,0 +1,10 @@
+@proceedings{ws-2009-2009-text,
+ address = {Suntec City, Singapore},
+ editor = {Kan, Min-Yen  and
+Teufel, Simone},
+ month = {August},
+ publisher = {Association for Computational Linguistics},
+ title = {Proceedings of the 2009 Workshop on Text and Citation Analysis for Scholarly Digital Libraries (NLPIR4DL)},
+ url = {https://aclanthology.org/W09-3600},
+ year = {2009}
+}
diff --git a/content/publication/ws-2009-2009-text/index.md b/content/publication/ws-2009-2009-text/index.md
new file mode 100644
index 0000000..21ec248
--- /dev/null
+++ b/content/publication/ws-2009-2009-text/index.md
@@ -0,0 +1,15 @@
+---
+title: Proceedings of the 2009 Workshop on Text and Citation Analysis for Scholarly
+  Digital Libraries (NLPIR4DL)
+authors:
+- min
+- Simone Teufel
+date: '2009-08-01'
+publishDate: '2024-07-11T07:40:56.518007Z'
+publication_types:
+- book
+publication: '*Association for Computational Linguistics*'
+links:
+- name: URL
+  url: https://aclanthology.org/W09-3600
+---
diff --git a/content/publication/ws-2016-joint/cite.bib b/content/publication/ws-2016-joint/cite.bib
new file mode 100644
index 0000000..d4267e0
--- /dev/null
+++ b/content/publication/ws-2016-joint/cite.bib
@@ -0,0 +1,14 @@
+@proceedings{ws-2016-joint,
+ editor = {Cabanac, Guillaume  and
+Chandrasekaran, Muthu Kumar  and
+Frommholz, Ingo  and
+Jaidka, Kokil  and
+Kan, Min-Yen  and
+Mayr, Philipp  and
+Wolfram, Dietmar},
+ month = {June},
+ pages = {1--5},
+ title = {Proceedings of the Joint Workshop on Bibliometric-enhanced Information Retrieval and Natural Language Processing for Digital Libraries (BIRNDL)},
+ url = {https://aclanthology.org/W16-1500},
+ year = {2016}
+}
diff --git a/content/publication/ws-2016-joint/index.md b/content/publication/ws-2016-joint/index.md
new file mode 100644
index 0000000..6bac8d0
--- /dev/null
+++ b/content/publication/ws-2016-joint/index.md
@@ -0,0 +1,19 @@
+---
+title: Proceedings of the Joint Workshop on Bibliometric-enhanced Information Retrieval
+  and Natural Language Processing for Digital Libraries (BIRNDL)
+authors:
+- Guillaume Cabanac
+- Muthu Kumar Chandrasekaran
+- Ingo Frommholz
+- Kokil Jaidka
+- min
+- Philipp Mayr
+- Dietmar Wolfram
+date: '2016-06-01'
+publishDate: '2024-07-11T07:40:56.395203Z'
+publication_types:
+- book
+links:
+- name: URL
+  url: https://aclanthology.org/W16-1500
+---
diff --git a/content/publication/xie-etal-2020-exploring/cite.bib b/content/publication/xie-etal-2020-exploring/cite.bib
new file mode 100644
index 0000000..0099ed3
--- /dev/null
+++ b/content/publication/xie-etal-2020-exploring/cite.bib
@@ -0,0 +1,20 @@
+@inproceedings{xie-etal-2020-exploring,
+ abstract = {Recent question generation (QG) approaches often utilize the sequence-to-sequence framework (Seq2Seq) to optimize the log likelihood of ground-truth questions using teacher forcing. However, this training objective is inconsistent with actual question quality, which is often reflected by certain global properties such as whether the question can be answered by the document. As such, we directly optimize for QG-specific objectives via reinforcement learning to improve question quality. We design three different rewards that target to improve the fluency, relevance, and answerability of generated questions. We conduct both automatic and human evaluations in addition to thorough analysis to explore the effect of each QG-specific reward. We find that optimizing on question-specific rewards generally leads to better performance in automatic evaluation metrics. However, only the rewards that correlate well with human judgement (e.g., relevance) lead to real improvement in question quality. Optimizing for the others, especially answerability, introduces incorrect bias to the model, resulting in poorer question quality. The code is publicly available at r̆lhttps://github.com/YuxiXie/RL-for-Question-Generation.},
+ address = {Barcelona, Spain (Online)},
+ author = {Xie, Yuxi  and
+Pan, Liangming  and
+Wang, Dongzhe  and
+Kan, Min-Yen  and
+Feng, Yansong},
+ booktitle = {Proceedings of the 28th International Conference on Computational Linguistics},
+ doi = {10.18653/v1/2020.coling-main.228},
+ editor = {Scott, Donia  and
+Bel, Nuria  and
+Zong, Chengqing},
+ month = {December},
+ pages = {2534--2546},
+ publisher = {International Committee on Computational Linguistics},
+ title = {Exploring Question-Specific Rewards for Generating Deep Questions},
+ url = {https://aclanthology.org/2020.coling-main.228},
+ year = {2020}
+}
diff --git a/content/publication/xie-etal-2020-exploring/index.md b/content/publication/xie-etal-2020-exploring/index.md
new file mode 100644
index 0000000..ff08b3e
--- /dev/null
+++ b/content/publication/xie-etal-2020-exploring/index.md
@@ -0,0 +1,33 @@
+---
+title: Exploring Question-Specific Rewards for Generating Deep Questions
+authors:
+- Yuxi Xie
+- Liangming Pan
+- Dongzhe Wang
+- min
+- Yansong Feng
+date: '2020-12-01'
+publishDate: '2024-07-11T07:40:56.276341Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 28th International Conference on Computational Linguistics*'
+doi: 10.18653/v1/2020.coling-main.228
+abstract: Recent question generation (QG) approaches often utilize the sequence-to-sequence
+  framework (Seq2Seq) to optimize the log likelihood of ground-truth questions using
+  teacher forcing. However, this training objective is inconsistent with actual question
+  quality, which is often reflected by certain global properties such as whether the
+  question can be answered by the document. As such, we directly optimize for QG-specific
+  objectives via reinforcement learning to improve question quality. We design three
+  different rewards that target to improve the fluency, relevance, and answerability
+  of generated questions. We conduct both automatic and human evaluations in addition
+  to thorough analysis to explore the effect of each QG-specific reward. We find that
+  optimizing on question-specific rewards generally leads to better performance in
+  automatic evaluation metrics. However, only the rewards that correlate well with
+  human judgement (e.g., relevance) lead to real improvement in question quality.
+  Optimizing for the others, especially answerability, introduces incorrect bias to
+  the model, resulting in poorer question quality. The code is publicly available
+  at r̆lhttps://github.com/YuxiXie/RL-for-Question-Generation.
+links:
+- name: URL
+  url: https://aclanthology.org/2020.coling-main.228
+---
diff --git a/content/publication/xie-etal-2023-echo/cite.bib b/content/publication/xie-etal-2023-echo/cite.bib
new file mode 100644
index 0000000..9627c70
--- /dev/null
+++ b/content/publication/xie-etal-2023-echo/cite.bib
@@ -0,0 +1,18 @@
+@inproceedings{xie-etal-2023-echo,
+ abstract = {We introduce ECHo (Event Causality Inference via Human-Centric Reasoning), a diagnostic dataset of event causality inference grounded in visio-linguistic social scenarios. ECHo employs real-world human-centric deductive information building on a television crime drama. ECHo requires the Theory-of-Mind (ToM) ability to understand and reason about social interactions based on multimodal information. Using ECHo, we propose a unified Chain-of-Thought (CoT) framework to assess the reasoning capability of current AI systems. Our ToM-enhanced CoT pipeline accommodates various large foundation models in both zero-shot and few-shot visio-linguistic reasoning. We use this framework to scrutinize recent large foundation models such as InstructGPT and MiniGPT-4 on three diagnostic human-centric tasks. Further analysis demonstrates ECHo as a challenging dataset to expose imperfections and inconsistencies in reasoning. Our data and code are publicly available at [https://github.com/YuxiXie/ECHo](https://github.com/YuxiXie/ECHo).},
+ address = {Singapore},
+ author = {Xie, Yuxi  and
+Li, Guanzhen  and
+Kan, Min-Yen},
+ booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2023},
+ doi = {10.18653/v1/2023.findings-emnlp.268},
+ editor = {Bouamor, Houda  and
+Pino, Juan  and
+Bali, Kalika},
+ month = {December},
+ pages = {4064--4085},
+ publisher = {Association for Computational Linguistics},
+ title = {ECHo: A Visio-Linguistic Dataset for Event Causality Inference via Human-Centric Reasoning},
+ url = {https://aclanthology.org/2023.findings-emnlp.268},
+ year = {2023}
+}
diff --git a/content/publication/xie-etal-2023-echo/index.md b/content/publication/xie-etal-2023-echo/index.md
new file mode 100644
index 0000000..8cc1a05
--- /dev/null
+++ b/content/publication/xie-etal-2023-echo/index.md
@@ -0,0 +1,29 @@
+---
+title: 'ECHo: A Visio-Linguistic Dataset for Event Causality Inference via Human-Centric
+  Reasoning'
+authors:
+- Yuxi Xie
+- Guanzhen Li
+- min
+date: '2023-12-01'
+publishDate: '2024-07-06T02:22:24.554194Z'
+publication_types:
+- paper-conference
+publication: '*Findings of the Association for Computational Linguistics: EMNLP 2023*'
+doi: 10.18653/v1/2023.findings-emnlp.268
+abstract: We introduce ECHo (Event Causality Inference via Human-Centric Reasoning),
+  a diagnostic dataset of event causality inference grounded in visio-linguistic social
+  scenarios. ECHo employs real-world human-centric deductive information building
+  on a television crime drama. ECHo requires the Theory-of-Mind (ToM) ability to understand
+  and reason about social interactions based on multimodal information. Using ECHo,
+  we propose a unified Chain-of-Thought (CoT) framework to assess the reasoning capability
+  of current AI systems. Our ToM-enhanced CoT pipeline accommodates various large
+  foundation models in both zero-shot and few-shot visio-linguistic reasoning. We
+  use this framework to scrutinize recent large foundation models such as InstructGPT
+  and MiniGPT-4 on three diagnostic human-centric tasks. Further analysis demonstrates
+  ECHo as a challenging dataset to expose imperfections and inconsistencies in reasoning.
+  Our data and code are publicly available at [https://github.com/YuxiXie/ECHo](https://github.com/YuxiXie/ECHo).
+links:
+- name: URL
+  url: https://aclanthology.org/2023.findings-emnlp.268
+---
diff --git a/content/publication/xu-etal-2022-corefdiffs/cite.bib b/content/publication/xu-etal-2022-corefdiffs/cite.bib
new file mode 100644
index 0000000..8281f61
--- /dev/null
+++ b/content/publication/xu-etal-2022-corefdiffs/cite.bib
@@ -0,0 +1,36 @@
+@inproceedings{xu-etal-2022-corefdiffs,
+ abstract = {Knowledge-grounded dialog systems need to incorporate smooth transitions among knowledge selected for generating responses, to ensure that dialog flows naturally. For document-grounded dialog systems, the inter- and intra-document knowledge relations can be used to model such conversational flows. We develop a novel Multi-Document Co-Referential Graph (Coref-MDG) to effectively capture the inter-document relationships based on commonsense and similarity and the intra-document co-referential structures of knowledge segments within the grounding documents. We propose CorefDiffs, a Co-referential and Differential flow management method, to linearize the static Coref-MDG into conversational sequence logic. CorefDiffs performs knowledge selection by accounting for contextual graph structures and the knowledge difference sequences. CorefDiffs significantly outperforms the state-of-the-art by 9.5%, 7.4% and 8.2% on three public benchmarks. This demonstrates that the effective modeling of co-reference and knowledge difference for dialog flows are critical for transitions in document-grounded conversation.},
+ address = {Gyeongju, Republic of Korea},
+ author = {Xu, Lin  and
+Zhou, Qixian  and
+Fu, Jinlan  and
+Kan, Min-Yen  and
+Ng, See-Kiong},
+ booktitle = {Proceedings of the 29th International Conference on Computational Linguistics},
+ editor = {Calzolari, Nicoletta  and
+Huang, Chu-Ren  and
+Kim, Hansaem  and
+Pustejovsky, James  and
+Wanner, Leo  and
+Choi, Key-Sun  and
+Ryu, Pum-Mo  and
+Chen, Hsin-Hsi  and
+Donatelli, Lucia  and
+Ji, Heng  and
+Kurohashi, Sadao  and
+Paggio, Patrizia  and
+Xue, Nianwen  and
+Kim, Seokhwan  and
+Hahm, Younggyun  and
+He, Zhong  and
+Lee, Tony Kyungil  and
+Santus, Enrico  and
+Bond, Francis  and
+Na, Seung-Hoon},
+ month = {October},
+ pages = {471--484},
+ publisher = {International Committee on Computational Linguistics},
+ title = {CorefDiffs: Co-referential and Differential Knowledge Flow in Document Grounded Conversations},
+ url = {https://aclanthology.org/2022.coling-1.38},
+ year = {2022}
+}
diff --git a/content/publication/xu-etal-2022-corefdiffs/index.md b/content/publication/xu-etal-2022-corefdiffs/index.md
new file mode 100644
index 0000000..6b8f10b
--- /dev/null
+++ b/content/publication/xu-etal-2022-corefdiffs/index.md
@@ -0,0 +1,31 @@
+---
+title: 'CorefDiffs: Co-referential and Differential Knowledge Flow in Document Grounded
+  Conversations'
+authors:
+- Lin Xu
+- Qixian Zhou
+- Jinlan Fu
+- min
+- See-Kiong Ng
+date: '2022-10-01'
+publishDate: '2024-07-05T17:09:42.624059Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 29th International Conference on Computational Linguistics*'
+abstract: Knowledge-grounded dialog systems need to incorporate smooth transitions
+  among knowledge selected for generating responses, to ensure that dialog flows naturally.
+  For document-grounded dialog systems, the inter- and intra-document knowledge relations
+  can be used to model such conversational flows. We develop a novel Multi-Document
+  Co-Referential Graph (Coref-MDG) to effectively capture the inter-document relationships
+  based on commonsense and similarity and the intra-document co-referential structures
+  of knowledge segments within the grounding documents. We propose CorefDiffs, a Co-referential
+  and Differential flow management method, to linearize the static Coref-MDG into
+  conversational sequence logic. CorefDiffs performs knowledge selection by accounting
+  for contextual graph structures and the knowledge difference sequences. CorefDiffs
+  significantly outperforms the state-of-the-art by 9.5%, 7.4% and 8.2% on three public
+  benchmarks. This demonstrates that the effective modeling of co-reference and knowledge
+  difference for dialog flows are critical for transitions in document-grounded conversation.
+links:
+- name: URL
+  url: https://aclanthology.org/2022.coling-1.38
+---
diff --git a/content/publication/zhang-etal-2022-interpreting/cite.bib b/content/publication/zhang-etal-2022-interpreting/cite.bib
new file mode 100644
index 0000000..dc902fb
--- /dev/null
+++ b/content/publication/zhang-etal-2022-interpreting/cite.bib
@@ -0,0 +1,19 @@
+@inproceedings{zhang-etal-2022-interpreting,
+ abstract = {Modern Natural Language Processing (NLP) models are known to be sensitive to input perturbations and their performance can decrease when applied to real-world, noisy data. However, it is still unclear why models are less robust to some perturbations than others. In this work, we test the hypothesis that the extent to which a model is affected by an unseen textual perturbation (robustness) can be explained by the learnability of the perturbation (defined as how well the model learns to identify the perturbation with a small amount of evidence). We further give a causal justification for the learnability metric. We conduct extensive experiments with four prominent NLP models --- TextRNN, BERT, RoBERTa and XLNet --- over eight types of textual perturbations on three datasets. We show that a model which is better at identifying a perturbation (higher learnability) becomes worse at ignoring such a perturbation at test time (lower robustness), providing empirical support for our hypothesis.},
+ address = {Dublin, Ireland},
+ author = {Zhang, Yunxiang  and
+Pan, Liangming  and
+Tan, Samson  and
+Kan, Min-Yen},
+ booktitle = {Findings of the Association for Computational Linguistics: ACL 2022},
+ doi = {10.18653/v1/2022.findings-acl.315},
+ editor = {Muresan, Smaranda  and
+Nakov, Preslav  and
+Villavicencio, Aline},
+ month = {May},
+ pages = {3993--4007},
+ publisher = {Association for Computational Linguistics},
+ title = {Interpreting the Robustness of Neural NLP Models to Textual Perturbations},
+ url = {https://aclanthology.org/2022.findings-acl.315},
+ year = {2022}
+}
diff --git a/content/publication/zhang-etal-2022-interpreting/index.md b/content/publication/zhang-etal-2022-interpreting/index.md
new file mode 100644
index 0000000..1ab5b8d
--- /dev/null
+++ b/content/publication/zhang-etal-2022-interpreting/index.md
@@ -0,0 +1,29 @@
+---
+title: Interpreting the Robustness of Neural NLP Models to Textual Perturbations
+authors:
+- Yunxiang Zhang
+- Liangming Pan
+- Samson Tan
+- min
+date: '2022-05-01'
+publishDate: '2024-07-05T17:09:42.596004Z'
+publication_types:
+- paper-conference
+publication: '*Findings of the Association for Computational Linguistics: ACL 2022*'
+doi: 10.18653/v1/2022.findings-acl.315
+abstract: Modern Natural Language Processing (NLP) models are known to be sensitive
+  to input perturbations and their performance can decrease when applied to real-world,
+  noisy data. However, it is still unclear why models are less robust to some perturbations
+  than others. In this work, we test the hypothesis that the extent to which a model
+  is affected by an unseen textual perturbation (robustness) can be explained by the
+  learnability of the perturbation (defined as how well the model learns to identify
+  the perturbation with a small amount of evidence). We further give a causal justification
+  for the learnability metric. We conduct extensive experiments with four prominent
+  NLP models --- TextRNN, BERT, RoBERTa and XLNet --- over eight types of textual
+  perturbations on three datasets. We show that a model which is better at identifying
+  a perturbation (higher learnability) becomes worse at ignoring such a perturbation
+  at test time (lower robustness), providing empirical support for our hypothesis.
+links:
+- name: URL
+  url: https://aclanthology.org/2022.findings-acl.315
+---
diff --git a/content/publication/zhang-etal-2024-nnose/cite.bib b/content/publication/zhang-etal-2024-nnose/cite.bib
new file mode 100644
index 0000000..5ef112c
--- /dev/null
+++ b/content/publication/zhang-etal-2024-nnose/cite.bib
@@ -0,0 +1,17 @@
+@inproceedings{zhang-etal-2024-nnose,
+ abstract = {The labor market is changing rapidly, prompting increased interest in the automatic extraction of occupational skills from text. With the advent of English benchmark job description datasets, there is a need for systems that handle their diversity well. We tackle the complexity in occupational skill datasets tasks---combining and leveraging multiple datasets for skill extraction, to identify rarely observed skills within a dataset, and overcoming the scarcity of skills across datasets. In particular, we investigate the retrieval-augmentation of language models, employing an external datastore for retrieving similar skills in a dataset-unifying manner. Our proposed method, \textbfNearest \textbfNeighbor \textbfOccupational \textbfSkill \textbfExtraction (NNOSE) effectively leverages multiple datasets by retrieving neighboring skills from other datasets in the datastore. This improves skill extraction \textitwithout additional fine-tuning. Crucially, we observe a performance gain in predicting infrequent patterns, with substantial gains of up to 30% span-F1 in cross-dataset settings.},
+ address = {St. Julian′s, Malta},
+ author = {Zhang, Mike  and
+van der Goot, Rob  and
+Kan, Min-Yen  and
+Plank, Barbara},
+ booktitle = {Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)},
+ editor = {Graham, Yvette  and
+Purver, Matthew},
+ month = {March},
+ pages = {589--608},
+ publisher = {Association for Computational Linguistics},
+ title = {NNOSE: Nearest Neighbor Occupational Skill Extraction},
+ url = {https://aclanthology.org/2024.eacl-long.35},
+ year = {2024}
+}
diff --git a/content/publication/zhang-etal-2024-nnose/index.md b/content/publication/zhang-etal-2024-nnose/index.md
new file mode 100644
index 0000000..b8c17dc
--- /dev/null
+++ b/content/publication/zhang-etal-2024-nnose/index.md
@@ -0,0 +1,30 @@
+---
+title: 'NNOSE: Nearest Neighbor Occupational Skill Extraction'
+authors:
+- Mike Zhang
+- Rob van der Goot
+- min
+- Barbara Plank
+date: '2024-03-01'
+publishDate: '2024-07-06T02:22:24.520030Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 18th Conference of the European Chapter of the Association
+  for Computational Linguistics (Volume 1: Long Papers)*'
+abstract: The labor market is changing rapidly, prompting increased interest in the
+  automatic extraction of occupational skills from text. With the advent of English
+  benchmark job description datasets, there is a need for systems that handle their
+  diversity well. We tackle the complexity in occupational skill datasets tasks---combining
+  and leveraging multiple datasets for skill extraction, to identify rarely observed
+  skills within a dataset, and overcoming the scarcity of skills across datasets.
+  In particular, we investigate the retrieval-augmentation of language models, employing
+  an external datastore for retrieving similar skills in a dataset-unifying manner.
+  Our proposed method, textbfNearest textbfNeighbor textbfOccupational textbfSkill
+  textbfExtraction (NNOSE) effectively leverages multiple datasets by retrieving neighboring
+  skills from other datasets in the datastore. This improves skill extraction textitwithout
+  additional fine-tuning. Crucially, we observe a performance gain in predicting infrequent
+  patterns, with substantial gains of up to 30% span-F1 in cross-dataset settings.
+links:
+- name: URL
+  url: https://aclanthology.org/2024.eacl-long.35
+---
diff --git a/publications.bib b/publications.bib
new file mode 100644
index 0000000..708b62c
--- /dev/null
+++ b/publications.bib
@@ -0,0 +1,10 @@
+@ARTICLE{8743365,
+  author={An, Ya-Hui and Pan, Liangming and Kan, Min-Yen and Dong, Qiang and Fu, Yan},
+  journal={IEEE Access}, 
+  title={Resource Mention Extraction for MOOC Discussion Forums}, 
+  year={2019},
+  volume={7},
+  number={},
+  pages={87887-87900},
+  keywords={Context;Task analysis;Tagging;Message systems;Discussion forums;Context modeling;Semantics;Artificial intelligence;deep learning;hyperlinking;learning resources;MOOC discussion forums;name entity recognition},
+  doi={10.1109/ACCESS.2019.2924250}}