Merge pull request #4 from WING-NUS/hugoblox-import-publications

Hugo Blox Builder - Import latest publications
WING-NUS · Jul 11, 2024 · 85156b0 · 85156b0
2 parents ae6b301 + 62721cc
commit 85156b0
Show file tree

Hide file tree

Showing 128 changed files with 2,509 additions and 0 deletions.
diff --git a/content/publication/acl-2017-association-linguistics/cite.bib b/content/publication/acl-2017-association-linguistics/cite.bib
@@ -0,0 +1,11 @@
+@proceedings{acl-2017-association-linguistics,
+ address = {Vancouver, Canada},
+ doi = {10.18653/v1/P17-2},
+ editor = {Barzilay, Regina  and
+Kan, Min-Yen},
+ month = {July},
+ publisher = {Association for Computational Linguistics},
+ title = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
+ url = {https://aclanthology.org/P17-2000},
+ year = {2017}
+}
diff --git a/content/publication/acl-2017-association-linguistics/index.md b/content/publication/acl-2017-association-linguistics/index.md
@@ -0,0 +1,16 @@
+---
+title: 'Proceedings of the 55th Annual Meeting of the Association for Computational
+  Linguistics (Volume 2: Short Papers)'
+authors:
+- Regina Barzilay
+- Min-Yen Kan
+date: '2017-07-01'
+publishDate: '2024-07-11T07:40:56.389063Z'
+publication_types:
+- book
+publication: '*Association for Computational Linguistics*'
+doi: 10.18653/v1/P17-2
+links:
+- name: URL
+  url: https://aclanthology.org/P17-2000
+---
diff --git a/content/publication/bhola-etal-2020-retrieving/cite.bib b/content/publication/bhola-etal-2020-retrieving/cite.bib
@@ -0,0 +1,19 @@
+@inproceedings{bhola-etal-2020-retrieving,
+ abstract = {We introduce a deep learning model to learn the set of enumerated job skills associated with a job description. In our analysis of a large-scale government job portal mycareersfuture.sg, we observe that as much as 65% of job descriptions miss describing a significant number of relevant skills. Our model addresses this task from the perspective of an extreme multi-label classification (XMLC) problem, where descriptions are the evidence for the binary relevance of thousands of individual skills. Building upon the current state-of-the-art language modeling approaches such as BERT, we show our XMLC method improves on an existing baseline solution by over 9% and 7% absolute improvements in terms of recall and normalized discounted cumulative gain. We further show that our approach effectively addresses the missing skills problem, and helps in recovering relevant skills that were missed out in the job postings by taking into account the structured semantic representation of skills and their co-occurrences through a Correlation Aware Bootstrapping process. We further show that our approach, to ensure the BERT-XMLC model accounts for structured semantic representation of skills and their co-occurrences through a Correlation Aware Bootstrapping process, effectively addresses the missing skills problem, and helps in recovering relevant skills that were missed out in the job postings. To facilitate future research and replication of our work, we have made the dataset and the implementation of our model publicly available.},
+ address = {Barcelona, Spain (Online)},
+ author = {Bhola, Akshay  and
+Halder, Kishaloy  and
+Prasad, Animesh  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the 28th International Conference on Computational Linguistics},
+ doi = {10.18653/v1/2020.coling-main.513},
+ editor = {Scott, Donia  and
+Bel, Nuria  and
+Zong, Chengqing},
+ month = {December},
+ pages = {5832--5842},
+ publisher = {International Committee on Computational Linguistics},
+ title = {Retrieving Skills from Job Descriptions: A Language Model Based Extreme Multi-label Classification Framework},
+ url = {https://aclanthology.org/2020.coling-main.513},
+ year = {2020}
+}
diff --git a/content/publication/bhola-etal-2020-retrieving/index.md b/content/publication/bhola-etal-2020-retrieving/index.md
@@ -0,0 +1,37 @@
+---
+title: 'Retrieving Skills from Job Descriptions: A Language Model Based Extreme Multi-label
+  Classification Framework'
+authors:
+- Akshay Bhola
+- Kishaloy Halder
+- Animesh Prasad
+- Min-Yen Kan
+date: '2020-12-01'
+publishDate: '2024-07-11T07:40:56.291153Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 28th International Conference on Computational Linguistics*'
+doi: 10.18653/v1/2020.coling-main.513
+abstract: We introduce a deep learning model to learn the set of enumerated job skills
+  associated with a job description. In our analysis of a large-scale government job
+  portal mycareersfuture.sg, we observe that as much as 65% of job descriptions miss
+  describing a significant number of relevant skills. Our model addresses this task
+  from the perspective of an extreme multi-label classification (XMLC) problem, where
+  descriptions are the evidence for the binary relevance of thousands of individual
+  skills. Building upon the current state-of-the-art language modeling approaches
+  such as BERT, we show our XMLC method improves on an existing baseline solution
+  by over 9% and 7% absolute improvements in terms of recall and normalized discounted
+  cumulative gain. We further show that our approach effectively addresses the missing
+  skills problem, and helps in recovering relevant skills that were missed out in
+  the job postings by taking into account the structured semantic representation of
+  skills and their co-occurrences through a Correlation Aware Bootstrapping process.
+  We further show that our approach, to ensure the BERT-XMLC model accounts for structured
+  semantic representation of skills and their co-occurrences through a Correlation
+  Aware Bootstrapping process, effectively addresses the missing skills problem, and
+  helps in recovering relevant skills that were missed out in the job postings. To
+  facilitate future research and replication of our work, we have made the dataset
+  and the implementation of our model publicly available.
+links:
+- name: URL
+  url: https://aclanthology.org/2020.coling-main.513
+---
diff --git a/content/publication/bird-etal-2008-acl/cite.bib b/content/publication/bird-etal-2008-acl/cite.bib
@@ -0,0 +1,27 @@
+@inproceedings{bird-etal-2008-acl,
+ abstract = {The ACL Anthology is a digital archive of conference and journal papers in natural language processing and computational linguistics. Its primary purpose is to serve as a reference repository of research results, but we believe that it can also be an object of study and a platform for research in its own right. We describe an enriched and standardized reference corpus derived from the ACL Anthology that can be used for research in scholarly document processing. This corpus, which we call the ACL Anthology Reference Corpus (ACL ARC), brings together the recent activities of a number of research groups around the world. Our goal is to make the corpus widely available, and to encourage other researchers to use it as a standard testbed for experiments in both bibliographic and bibliometric research.},
+ address = {Marrakech, Morocco},
+ author = {Bird, Steven  and
+Dale, Robert  and
+Dorr, Bonnie  and
+Gibson, Bryan  and
+Joseph, Mark  and
+Kan, Min-Yen  and
+Lee, Dongwon  and
+Powley, Brett  and
+Radev, Dragomir  and
+Tan, Yee Fan},
+ booktitle = {Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08)},
+ editor = {Calzolari, Nicoletta  and
+Choukri, Khalid  and
+Maegaard, Bente  and
+Mariani, Joseph  and
+Odijk, Jan  and
+Piperidis, Stelios  and
+Tapias, Daniel},
+ month = {May},
+ publisher = {European Language Resources Association (ELRA)},
+ title = {The ACL Anthology Reference Corpus: A Reference Dataset for Bibliographic Research in Computational Linguistics},
+ url = {http://www.lrec-conf.org/proceedings/lrec2008/pdf/445_paper.pdf},
+ year = {2008}
+}
diff --git a/content/publication/bird-etal-2008-acl/index.md b/content/publication/bird-etal-2008-acl/index.md
@@ -0,0 +1,32 @@
+---
+title: 'The ACL Anthology Reference Corpus: A Reference Dataset for Bibliographic
+  Research in Computational Linguistics'
+authors:
+- Steven Bird
+- Robert Dale
+- Bonnie Dorr
+- Bryan Gibson
+- Mark Joseph
+- Min-Yen Kan
+- Dongwon Lee
+- Brett Powley
+- Dragomir Radev
+- Yee Fan Tan
+date: '2008-05-01'
+publishDate: '2024-07-11T07:40:56.554775Z'
+publication_types:
+- paper-conference
+publication: "*Proceedings of the Sixth International Conference on Language Resources
+  and Evaluation (LREC'08)*"
+abstract: The ACL Anthology is a digital archive of conference and journal papers
+  in natural language processing and computational linguistics. Its primary purpose
+  is to serve as a reference repository of research results, but we believe that it
+  can also be an object of study and a platform for research in its own right. We
+  describe an enriched and standardized reference corpus derived from the ACL Anthology
+  that can be used for research in scholarly document processing. This corpus, which
+  we call the ACL Anthology Reference Corpus (ACL ARC), brings together the recent
+  activities of a number of research groups around the world. Our goal is to make
+  the corpus widely available, and to encourage other researchers to use it as a standard
+  testbed for experiments in both bibliographic and bibliometric research.
+url_pdf: http://www.lrec-conf.org/proceedings/lrec2008/pdf/445_paper.pdf
+---
diff --git a/content/publication/bysani-kan-2012-integrating/cite.bib b/content/publication/bysani-kan-2012-integrating/cite.bib
@@ -0,0 +1,13 @@
+@inproceedings{bysani-kan-2012-integrating,
+ address = {Jeju Island, Korea},
+ author = {Bysani, Praveen  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the ACL-2012 Special Workshop on Rediscovering 50 Years of Discoveries},
+ editor = {Banchs, Rafael E.},
+ month = {July},
+ pages = {83--87},
+ publisher = {Association for Computational Linguistics},
+ title = {Integrating User-Generated Content in the ACL Anthology},
+ url = {https://aclanthology.org/W12-3209},
+ year = {2012}
+}
diff --git a/content/publication/bysani-kan-2012-integrating/index.md b/content/publication/bysani-kan-2012-integrating/index.md
@@ -0,0 +1,15 @@
+---
+title: Integrating User-Generated Content in the ACL Anthology
+authors:
+- Praveen Bysani
+- Min-Yen Kan
+date: '2012-07-01'
+publishDate: '2024-07-11T07:40:56.444716Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the ACL-2012 Special Workshop on Rediscovering 50 Years
+  of Discoveries*'
+links:
+- name: URL
+  url: https://aclanthology.org/W12-3209
+---
diff --git a/content/publication/cao-etal-2020-expertise/cite.bib b/content/publication/cao-etal-2020-expertise/cite.bib
@@ -0,0 +1,22 @@
+@inproceedings{cao-etal-2020-expertise,
+ abstract = {The curse of knowledge can impede communication between experts and laymen. We propose a new task of expertise style transfer and contribute a manually annotated dataset with the goal of alleviating such cognitive biases. Solving this task not only simplifies the professional language, but also improves the accuracy and expertise level of laymen descriptions using simple words. This is a challenging task, unaddressed in previous work, as it requires the models to have expert intelligence in order to modify text with a deep understanding of domain knowledge and structures. We establish the benchmark performance of five state-of-the-art models for style transfer and text simplification. The results demonstrate a significant gap between machine and human performance. We also discuss the challenges of automatic evaluation, to provide insights into future research directions. The dataset is publicly available at r̆lhttps://srhthu.github.io/expertise-style-transfer/.},
+ address = {Online},
+ author = {Cao, Yixin  and
+Shui, Ruihao  and
+Pan, Liangming  and
+Kan, Min-Yen  and
+Liu, Zhiyuan  and
+Chua, Tat-Seng},
+ booktitle = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
+ doi = {10.18653/v1/2020.acl-main.100},
+ editor = {Jurafsky, Dan  and
+Chai, Joyce  and
+Schluter, Natalie  and
+Tetreault, Joel},
+ month = {July},
+ pages = {1061--1071},
+ publisher = {Association for Computational Linguistics},
+ title = {Expertise Style Transfer: A New Task Towards Better Communication between Experts and Laymen},
+ url = {https://aclanthology.org/2020.acl-main.100},
+ year = {2020}
+}
diff --git a/content/publication/cao-etal-2020-expertise/index.md b/content/publication/cao-etal-2020-expertise/index.md
@@ -0,0 +1,33 @@
+---
+title: 'Expertise Style Transfer: A New Task Towards Better Communication between
+  Experts and Laymen'
+authors:
+- Yixin Cao
+- Ruihao Shui
+- Liangming Pan
+- Min-Yen Kan
+- Zhiyuan Liu
+- Tat-Seng Chua
+date: '2020-07-01'
+publishDate: '2024-07-11T07:40:56.298355Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 58th Annual Meeting of the Association for Computational
+  Linguistics*'
+doi: 10.18653/v1/2020.acl-main.100
+abstract: The curse of knowledge can impede communication between experts and laymen.
+  We propose a new task of expertise style transfer and contribute a manually annotated
+  dataset with the goal of alleviating such cognitive biases. Solving this task not
+  only simplifies the professional language, but also improves the accuracy and expertise
+  level of laymen descriptions using simple words. This is a challenging task, unaddressed
+  in previous work, as it requires the models to have expert intelligence in order
+  to modify text with a deep understanding of domain knowledge and structures. We
+  establish the benchmark performance of five state-of-the-art models for style transfer
+  and text simplification. The results demonstrate a significant gap between machine
+  and human performance. We also discuss the challenges of automatic evaluation, to
+  provide insights into future research directions. The dataset is publicly available
+  at r̆lhttps://srhthu.github.io/expertise-style-transfer/.
+links:
+- name: URL
+  url: https://aclanthology.org/2020.acl-main.100
+---
diff --git a/content/publication/chandrasekaran-kan-2018-countering/cite.bib b/content/publication/chandrasekaran-kan-2018-countering/cite.bib
@@ -0,0 +1,18 @@
+@inproceedings{chandrasekaran-kan-2018-countering,
+ abstract = {We systematically confirm that instructors are strongly influenced by the user interface presentation of Massive Online Open Course (MOOC) discussion forums. In a large scale dataset, we conclusively show that instructor interventions exhibit strong position bias, as measured by the position where the thread appeared on the user interface at the time of intervention. We measure and remove this bias, enabling unbiased statistical modelling and evaluation. We show that our de-biased classifier improves predicting interventions over the state-of-the-art on courses with sufficient number of interventions by 8.2% in F1 and 24.4% in recall on average.},
+ address = {Melbourne, Australia},
+ author = {Chandrasekaran, Muthu Kumar  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the 5th Workshop on Natural Language Processing Techniques for Educational Applications},
+ doi = {10.18653/v1/W18-3720},
+ editor = {Tseng, Yuen-Hsien  and
+Chen, Hsin-Hsi  and
+Ng, Vincent  and
+Komachi, Mamoru},
+ month = {July},
+ pages = {135--142},
+ publisher = {Association for Computational Linguistics},
+ title = {Countering Position Bias in Instructor Interventions in MOOC Discussion Forums},
+ url = {https://aclanthology.org/W18-3720},
+ year = {2018}
+}
diff --git a/content/publication/chandrasekaran-kan-2018-countering/index.md b/content/publication/chandrasekaran-kan-2018-countering/index.md
@@ -0,0 +1,24 @@
+---
+title: Countering Position Bias in Instructor Interventions in MOOC Discussion Forums
+authors:
+- Muthu Kumar Chandrasekaran
+- Min-Yen Kan
+date: '2018-07-01'
+publishDate: '2024-07-11T07:40:56.347855Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 5th Workshop on Natural Language Processing Techniques
+  for Educational Applications*'
+doi: 10.18653/v1/W18-3720
+abstract: We systematically confirm that instructors are strongly influenced by the
+  user interface presentation of Massive Online Open Course (MOOC) discussion forums.
+  In a large scale dataset, we conclusively show that instructor interventions exhibit
+  strong position bias, as measured by the position where the thread appeared on the
+  user interface at the time of intervention. We measure and remove this bias, enabling
+  unbiased statistical modelling and evaluation. We show that our de-biased classifier
+  improves predicting interventions over the state-of-the-art on courses with sufficient
+  number of interventions by 8.2% in F1 and 24.4% in recall on average.
+links:
+- name: URL
+  url: https://aclanthology.org/W18-3720
+---
diff --git a/content/publication/chen-etal-2015-interactive/cite.bib b/content/publication/chen-etal-2015-interactive/cite.bib
@@ -0,0 +1,20 @@
+@inproceedings{chen-etal-2015-interactive,
+ address = {Beijing, China},
+ author = {Chen, Tao  and
+Zheng, Naijia  and
+Zhao, Yue  and
+Chandrasekaran, Muthu Kumar  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the 2nd Workshop on Natural Language Processing Techniques for Educational Applications},
+ doi = {10.18653/v1/W15-4406},
+ editor = {Chen, Hsin-Hsi  and
+Tseng, Yuen-Hsien  and
+Matsumoto, Yuji  and
+Wong, Lung Hsiang},
+ month = {July},
+ pages = {34--42},
+ publisher = {Association for Computational Linguistics},
+ title = {Interactive Second Language Learning from News Websites},
+ url = {https://aclanthology.org/W15-4406},
+ year = {2015}
+}
diff --git a/content/publication/chen-etal-2015-interactive/index.md b/content/publication/chen-etal-2015-interactive/index.md
@@ -0,0 +1,19 @@
+---
+title: Interactive Second Language Learning from News Websites
+authors:
+- Tao Chen
+- Naijia Zheng
+- Yue Zhao
+- Muthu Kumar Chandrasekaran
+- Min-Yen Kan
+date: '2015-07-01'
+publishDate: '2024-07-11T07:40:56.401303Z'
+publication_types:
+- paper-conference
+publication: '*Proceedings of the 2nd Workshop on Natural Language Processing Techniques
+  for Educational Applications*'
+doi: 10.18653/v1/W15-4406
+links:
+- name: URL
+  url: https://aclanthology.org/W15-4406
+---
diff --git a/content/publication/councill-etal-2008-parscit/cite.bib b/content/publication/councill-etal-2008-parscit/cite.bib
@@ -0,0 +1,20 @@
+@inproceedings{councill-etal-2008-parscit,
+ abstract = {We describe ParsCit, a freely available, open-source implementation of a reference string parsing package. At the core of ParsCit is a trained conditional random field (CRF) model used to label the token sequences in the reference string. A heuristic model wraps this core with added functionality to identify reference strings from a plain text file, and to retrieve the citation contexts. The package comes with utilities to run it as a web service or as a standalone utility. We compare ParsCit on three distinct reference string datasets and show that it compares well with other previously published work.},
+ address = {Marrakech, Morocco},
+ author = {Councill, Isaac  and
+Giles, C. Lee  and
+Kan, Min-Yen},
+ booktitle = {Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08)},
+ editor = {Calzolari, Nicoletta  and
+Choukri, Khalid  and
+Maegaard, Bente  and
+Mariani, Joseph  and
+Odijk, Jan  and
+Piperidis, Stelios  and
+Tapias, Daniel},
+ month = {May},
+ publisher = {European Language Resources Association (ELRA)},
+ title = {ParsCit: an Open-source CRF Reference String Parsing Package},
+ url = {http://www.lrec-conf.org/proceedings/lrec2008/pdf/166_paper.pdf},
+ year = {2008}
+}
diff --git a/content/publication/councill-etal-2008-parscit/index.md b/content/publication/councill-etal-2008-parscit/index.md
@@ -0,0 +1,22 @@
+---
+title: 'ParsCit: an Open-source CRF Reference String Parsing Package'
+authors:
+- Isaac Councill
+- C. Lee Giles
+- Min-Yen Kan
+date: '2008-05-01'
+publishDate: '2024-07-11T07:40:56.561520Z'
+publication_types:
+- paper-conference
+publication: "*Proceedings of the Sixth International Conference on Language Resources
+  and Evaluation (LREC'08)*"
+abstract: We describe ParsCit, a freely available, open-source implementation of a
+  reference string parsing package. At the core of ParsCit is a trained conditional
+  random field (CRF) model used to label the token sequences in the reference string.
+  A heuristic model wraps this core with added functionality to identify reference
+  strings from a plain text file, and to retrieve the citation contexts. The package
+  comes with utilities to run it as a web service or as a standalone utility. We compare
+  ParsCit on three distinct reference string datasets and show that it compares well
+  with other previously published work.
+url_pdf: http://www.lrec-conf.org/proceedings/lrec2008/pdf/166_paper.pdf
+---
diff --git a/content/publication/elmacioglu-etal-2007-psnus/cite.bib b/content/publication/elmacioglu-etal-2007-psnus/cite.bib
@@ -0,0 +1,18 @@
+@inproceedings{elmacioglu-etal-2007-psnus,
+ address = {Prague, Czech Republic},
+ author = {Elmacioglu, Ergin  and
+Tan, Yee Fan  and
+Yan, Su  and
+Kan, Min-Yen  and
+Lee, Dongwon},
+ booktitle = {Proceedings of the Fourth International Workshop on Semantic Evaluations (SemEval-2007)},
+ editor = {Agirre, Eneko  and
+Màrquez, Lluı́s  and
+Wicentowski, Richard},
+ month = {June},
+ pages = {268--271},
+ publisher = {Association for Computational Linguistics},
+ title = {PSNUS: Web People Name Disambiguation by Simple Clustering with Rich Features},
+ url = {https://aclanthology.org/S07-1058},
+ year = {2007}
+}