Skip to content

Commit

Permalink
Merge pull request #4 from WING-NUS/hugoblox-import-publications
Browse files Browse the repository at this point in the history
Hugo Blox Builder - Import latest publications
  • Loading branch information
knmnyn authored Jul 11, 2024
2 parents ae6b301 + 62721cc commit 85156b0
Show file tree
Hide file tree
Showing 128 changed files with 2,509 additions and 0 deletions.
11 changes: 11 additions & 0 deletions content/publication/acl-2017-association-linguistics/cite.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
@proceedings{acl-2017-association-linguistics,
address = {Vancouver, Canada},
doi = {10.18653/v1/P17-2},
editor = {Barzilay, Regina and
Kan, Min-Yen},
month = {July},
publisher = {Association for Computational Linguistics},
title = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
url = {https://aclanthology.org/P17-2000},
year = {2017}
}
16 changes: 16 additions & 0 deletions content/publication/acl-2017-association-linguistics/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---
title: 'Proceedings of the 55th Annual Meeting of the Association for Computational
Linguistics (Volume 2: Short Papers)'
authors:
- Regina Barzilay
- Min-Yen Kan
date: '2017-07-01'
publishDate: '2024-07-11T07:40:56.389063Z'
publication_types:
- book
publication: '*Association for Computational Linguistics*'
doi: 10.18653/v1/P17-2
links:
- name: URL
url: https://aclanthology.org/P17-2000
---
19 changes: 19 additions & 0 deletions content/publication/bhola-etal-2020-retrieving/cite.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
@inproceedings{bhola-etal-2020-retrieving,
abstract = {We introduce a deep learning model to learn the set of enumerated job skills associated with a job description. In our analysis of a large-scale government job portal mycareersfuture.sg, we observe that as much as 65% of job descriptions miss describing a significant number of relevant skills. Our model addresses this task from the perspective of an extreme multi-label classification (XMLC) problem, where descriptions are the evidence for the binary relevance of thousands of individual skills. Building upon the current state-of-the-art language modeling approaches such as BERT, we show our XMLC method improves on an existing baseline solution by over 9% and 7% absolute improvements in terms of recall and normalized discounted cumulative gain. We further show that our approach effectively addresses the missing skills problem, and helps in recovering relevant skills that were missed out in the job postings by taking into account the structured semantic representation of skills and their co-occurrences through a Correlation Aware Bootstrapping process. We further show that our approach, to ensure the BERT-XMLC model accounts for structured semantic representation of skills and their co-occurrences through a Correlation Aware Bootstrapping process, effectively addresses the missing skills problem, and helps in recovering relevant skills that were missed out in the job postings. To facilitate future research and replication of our work, we have made the dataset and the implementation of our model publicly available.},
address = {Barcelona, Spain (Online)},
author = {Bhola, Akshay and
Halder, Kishaloy and
Prasad, Animesh and
Kan, Min-Yen},
booktitle = {Proceedings of the 28th International Conference on Computational Linguistics},
doi = {10.18653/v1/2020.coling-main.513},
editor = {Scott, Donia and
Bel, Nuria and
Zong, Chengqing},
month = {December},
pages = {5832--5842},
publisher = {International Committee on Computational Linguistics},
title = {Retrieving Skills from Job Descriptions: A Language Model Based Extreme Multi-label Classification Framework},
url = {https://aclanthology.org/2020.coling-main.513},
year = {2020}
}
37 changes: 37 additions & 0 deletions content/publication/bhola-etal-2020-retrieving/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
---
title: 'Retrieving Skills from Job Descriptions: A Language Model Based Extreme Multi-label
Classification Framework'
authors:
- Akshay Bhola
- Kishaloy Halder
- Animesh Prasad
- Min-Yen Kan
date: '2020-12-01'
publishDate: '2024-07-11T07:40:56.291153Z'
publication_types:
- paper-conference
publication: '*Proceedings of the 28th International Conference on Computational Linguistics*'
doi: 10.18653/v1/2020.coling-main.513
abstract: We introduce a deep learning model to learn the set of enumerated job skills
associated with a job description. In our analysis of a large-scale government job
portal mycareersfuture.sg, we observe that as much as 65% of job descriptions miss
describing a significant number of relevant skills. Our model addresses this task
from the perspective of an extreme multi-label classification (XMLC) problem, where
descriptions are the evidence for the binary relevance of thousands of individual
skills. Building upon the current state-of-the-art language modeling approaches
such as BERT, we show our XMLC method improves on an existing baseline solution
by over 9% and 7% absolute improvements in terms of recall and normalized discounted
cumulative gain. We further show that our approach effectively addresses the missing
skills problem, and helps in recovering relevant skills that were missed out in
the job postings by taking into account the structured semantic representation of
skills and their co-occurrences through a Correlation Aware Bootstrapping process.
We further show that our approach, to ensure the BERT-XMLC model accounts for structured
semantic representation of skills and their co-occurrences through a Correlation
Aware Bootstrapping process, effectively addresses the missing skills problem, and
helps in recovering relevant skills that were missed out in the job postings. To
facilitate future research and replication of our work, we have made the dataset
and the implementation of our model publicly available.
links:
- name: URL
url: https://aclanthology.org/2020.coling-main.513
---
27 changes: 27 additions & 0 deletions content/publication/bird-etal-2008-acl/cite.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
@inproceedings{bird-etal-2008-acl,
abstract = {The ACL Anthology is a digital archive of conference and journal papers in natural language processing and computational linguistics. Its primary purpose is to serve as a reference repository of research results, but we believe that it can also be an object of study and a platform for research in its own right. We describe an enriched and standardized reference corpus derived from the ACL Anthology that can be used for research in scholarly document processing. This corpus, which we call the ACL Anthology Reference Corpus (ACL ARC), brings together the recent activities of a number of research groups around the world. Our goal is to make the corpus widely available, and to encourage other researchers to use it as a standard testbed for experiments in both bibliographic and bibliometric research.},
address = {Marrakech, Morocco},
author = {Bird, Steven and
Dale, Robert and
Dorr, Bonnie and
Gibson, Bryan and
Joseph, Mark and
Kan, Min-Yen and
Lee, Dongwon and
Powley, Brett and
Radev, Dragomir and
Tan, Yee Fan},
booktitle = {Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08)},
editor = {Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Tapias, Daniel},
month = {May},
publisher = {European Language Resources Association (ELRA)},
title = {The ACL Anthology Reference Corpus: A Reference Dataset for Bibliographic Research in Computational Linguistics},
url = {http://www.lrec-conf.org/proceedings/lrec2008/pdf/445_paper.pdf},
year = {2008}
}
32 changes: 32 additions & 0 deletions content/publication/bird-etal-2008-acl/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
---
title: 'The ACL Anthology Reference Corpus: A Reference Dataset for Bibliographic
Research in Computational Linguistics'
authors:
- Steven Bird
- Robert Dale
- Bonnie Dorr
- Bryan Gibson
- Mark Joseph
- Min-Yen Kan
- Dongwon Lee
- Brett Powley
- Dragomir Radev
- Yee Fan Tan
date: '2008-05-01'
publishDate: '2024-07-11T07:40:56.554775Z'
publication_types:
- paper-conference
publication: "*Proceedings of the Sixth International Conference on Language Resources
and Evaluation (LREC'08)*"
abstract: The ACL Anthology is a digital archive of conference and journal papers
in natural language processing and computational linguistics. Its primary purpose
is to serve as a reference repository of research results, but we believe that it
can also be an object of study and a platform for research in its own right. We
describe an enriched and standardized reference corpus derived from the ACL Anthology
that can be used for research in scholarly document processing. This corpus, which
we call the ACL Anthology Reference Corpus (ACL ARC), brings together the recent
activities of a number of research groups around the world. Our goal is to make
the corpus widely available, and to encourage other researchers to use it as a standard
testbed for experiments in both bibliographic and bibliometric research.
url_pdf: http://www.lrec-conf.org/proceedings/lrec2008/pdf/445_paper.pdf
---
13 changes: 13 additions & 0 deletions content/publication/bysani-kan-2012-integrating/cite.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
@inproceedings{bysani-kan-2012-integrating,
address = {Jeju Island, Korea},
author = {Bysani, Praveen and
Kan, Min-Yen},
booktitle = {Proceedings of the ACL-2012 Special Workshop on Rediscovering 50 Years of Discoveries},
editor = {Banchs, Rafael E.},
month = {July},
pages = {83--87},
publisher = {Association for Computational Linguistics},
title = {Integrating User-Generated Content in the ACL Anthology},
url = {https://aclanthology.org/W12-3209},
year = {2012}
}
15 changes: 15 additions & 0 deletions content/publication/bysani-kan-2012-integrating/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
---
title: Integrating User-Generated Content in the ACL Anthology
authors:
- Praveen Bysani
- Min-Yen Kan
date: '2012-07-01'
publishDate: '2024-07-11T07:40:56.444716Z'
publication_types:
- paper-conference
publication: '*Proceedings of the ACL-2012 Special Workshop on Rediscovering 50 Years
of Discoveries*'
links:
- name: URL
url: https://aclanthology.org/W12-3209
---
22 changes: 22 additions & 0 deletions content/publication/cao-etal-2020-expertise/cite.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
@inproceedings{cao-etal-2020-expertise,
abstract = {The curse of knowledge can impede communication between experts and laymen. We propose a new task of expertise style transfer and contribute a manually annotated dataset with the goal of alleviating such cognitive biases. Solving this task not only simplifies the professional language, but also improves the accuracy and expertise level of laymen descriptions using simple words. This is a challenging task, unaddressed in previous work, as it requires the models to have expert intelligence in order to modify text with a deep understanding of domain knowledge and structures. We establish the benchmark performance of five state-of-the-art models for style transfer and text simplification. The results demonstrate a significant gap between machine and human performance. We also discuss the challenges of automatic evaluation, to provide insights into future research directions. The dataset is publicly available at r̆lhttps://srhthu.github.io/expertise-style-transfer/.},
address = {Online},
author = {Cao, Yixin and
Shui, Ruihao and
Pan, Liangming and
Kan, Min-Yen and
Liu, Zhiyuan and
Chua, Tat-Seng},
booktitle = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
doi = {10.18653/v1/2020.acl-main.100},
editor = {Jurafsky, Dan and
Chai, Joyce and
Schluter, Natalie and
Tetreault, Joel},
month = {July},
pages = {1061--1071},
publisher = {Association for Computational Linguistics},
title = {Expertise Style Transfer: A New Task Towards Better Communication between Experts and Laymen},
url = {https://aclanthology.org/2020.acl-main.100},
year = {2020}
}
33 changes: 33 additions & 0 deletions content/publication/cao-etal-2020-expertise/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
---
title: 'Expertise Style Transfer: A New Task Towards Better Communication between
Experts and Laymen'
authors:
- Yixin Cao
- Ruihao Shui
- Liangming Pan
- Min-Yen Kan
- Zhiyuan Liu
- Tat-Seng Chua
date: '2020-07-01'
publishDate: '2024-07-11T07:40:56.298355Z'
publication_types:
- paper-conference
publication: '*Proceedings of the 58th Annual Meeting of the Association for Computational
Linguistics*'
doi: 10.18653/v1/2020.acl-main.100
abstract: The curse of knowledge can impede communication between experts and laymen.
We propose a new task of expertise style transfer and contribute a manually annotated
dataset with the goal of alleviating such cognitive biases. Solving this task not
only simplifies the professional language, but also improves the accuracy and expertise
level of laymen descriptions using simple words. This is a challenging task, unaddressed
in previous work, as it requires the models to have expert intelligence in order
to modify text with a deep understanding of domain knowledge and structures. We
establish the benchmark performance of five state-of-the-art models for style transfer
and text simplification. The results demonstrate a significant gap between machine
and human performance. We also discuss the challenges of automatic evaluation, to
provide insights into future research directions. The dataset is publicly available
at r̆lhttps://srhthu.github.io/expertise-style-transfer/.
links:
- name: URL
url: https://aclanthology.org/2020.acl-main.100
---
18 changes: 18 additions & 0 deletions content/publication/chandrasekaran-kan-2018-countering/cite.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
@inproceedings{chandrasekaran-kan-2018-countering,
abstract = {We systematically confirm that instructors are strongly influenced by the user interface presentation of Massive Online Open Course (MOOC) discussion forums. In a large scale dataset, we conclusively show that instructor interventions exhibit strong position bias, as measured by the position where the thread appeared on the user interface at the time of intervention. We measure and remove this bias, enabling unbiased statistical modelling and evaluation. We show that our de-biased classifier improves predicting interventions over the state-of-the-art on courses with sufficient number of interventions by 8.2% in F1 and 24.4% in recall on average.},
address = {Melbourne, Australia},
author = {Chandrasekaran, Muthu Kumar and
Kan, Min-Yen},
booktitle = {Proceedings of the 5th Workshop on Natural Language Processing Techniques for Educational Applications},
doi = {10.18653/v1/W18-3720},
editor = {Tseng, Yuen-Hsien and
Chen, Hsin-Hsi and
Ng, Vincent and
Komachi, Mamoru},
month = {July},
pages = {135--142},
publisher = {Association for Computational Linguistics},
title = {Countering Position Bias in Instructor Interventions in MOOC Discussion Forums},
url = {https://aclanthology.org/W18-3720},
year = {2018}
}
24 changes: 24 additions & 0 deletions content/publication/chandrasekaran-kan-2018-countering/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
---
title: Countering Position Bias in Instructor Interventions in MOOC Discussion Forums
authors:
- Muthu Kumar Chandrasekaran
- Min-Yen Kan
date: '2018-07-01'
publishDate: '2024-07-11T07:40:56.347855Z'
publication_types:
- paper-conference
publication: '*Proceedings of the 5th Workshop on Natural Language Processing Techniques
for Educational Applications*'
doi: 10.18653/v1/W18-3720
abstract: We systematically confirm that instructors are strongly influenced by the
user interface presentation of Massive Online Open Course (MOOC) discussion forums.
In a large scale dataset, we conclusively show that instructor interventions exhibit
strong position bias, as measured by the position where the thread appeared on the
user interface at the time of intervention. We measure and remove this bias, enabling
unbiased statistical modelling and evaluation. We show that our de-biased classifier
improves predicting interventions over the state-of-the-art on courses with sufficient
number of interventions by 8.2% in F1 and 24.4% in recall on average.
links:
- name: URL
url: https://aclanthology.org/W18-3720
---
20 changes: 20 additions & 0 deletions content/publication/chen-etal-2015-interactive/cite.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
@inproceedings{chen-etal-2015-interactive,
address = {Beijing, China},
author = {Chen, Tao and
Zheng, Naijia and
Zhao, Yue and
Chandrasekaran, Muthu Kumar and
Kan, Min-Yen},
booktitle = {Proceedings of the 2nd Workshop on Natural Language Processing Techniques for Educational Applications},
doi = {10.18653/v1/W15-4406},
editor = {Chen, Hsin-Hsi and
Tseng, Yuen-Hsien and
Matsumoto, Yuji and
Wong, Lung Hsiang},
month = {July},
pages = {34--42},
publisher = {Association for Computational Linguistics},
title = {Interactive Second Language Learning from News Websites},
url = {https://aclanthology.org/W15-4406},
year = {2015}
}
19 changes: 19 additions & 0 deletions content/publication/chen-etal-2015-interactive/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
---
title: Interactive Second Language Learning from News Websites
authors:
- Tao Chen
- Naijia Zheng
- Yue Zhao
- Muthu Kumar Chandrasekaran
- Min-Yen Kan
date: '2015-07-01'
publishDate: '2024-07-11T07:40:56.401303Z'
publication_types:
- paper-conference
publication: '*Proceedings of the 2nd Workshop on Natural Language Processing Techniques
for Educational Applications*'
doi: 10.18653/v1/W15-4406
links:
- name: URL
url: https://aclanthology.org/W15-4406
---
20 changes: 20 additions & 0 deletions content/publication/councill-etal-2008-parscit/cite.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
@inproceedings{councill-etal-2008-parscit,
abstract = {We describe ParsCit, a freely available, open-source implementation of a reference string parsing package. At the core of ParsCit is a trained conditional random field (CRF) model used to label the token sequences in the reference string. A heuristic model wraps this core with added functionality to identify reference strings from a plain text file, and to retrieve the citation contexts. The package comes with utilities to run it as a web service or as a standalone utility. We compare ParsCit on three distinct reference string datasets and show that it compares well with other previously published work.},
address = {Marrakech, Morocco},
author = {Councill, Isaac and
Giles, C. Lee and
Kan, Min-Yen},
booktitle = {Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08)},
editor = {Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Tapias, Daniel},
month = {May},
publisher = {European Language Resources Association (ELRA)},
title = {ParsCit: an Open-source CRF Reference String Parsing Package},
url = {http://www.lrec-conf.org/proceedings/lrec2008/pdf/166_paper.pdf},
year = {2008}
}
22 changes: 22 additions & 0 deletions content/publication/councill-etal-2008-parscit/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
---
title: 'ParsCit: an Open-source CRF Reference String Parsing Package'
authors:
- Isaac Councill
- C. Lee Giles
- Min-Yen Kan
date: '2008-05-01'
publishDate: '2024-07-11T07:40:56.561520Z'
publication_types:
- paper-conference
publication: "*Proceedings of the Sixth International Conference on Language Resources
and Evaluation (LREC'08)*"
abstract: We describe ParsCit, a freely available, open-source implementation of a
reference string parsing package. At the core of ParsCit is a trained conditional
random field (CRF) model used to label the token sequences in the reference string.
A heuristic model wraps this core with added functionality to identify reference
strings from a plain text file, and to retrieve the citation contexts. The package
comes with utilities to run it as a web service or as a standalone utility. We compare
ParsCit on three distinct reference string datasets and show that it compares well
with other previously published work.
url_pdf: http://www.lrec-conf.org/proceedings/lrec2008/pdf/166_paper.pdf
---
18 changes: 18 additions & 0 deletions content/publication/elmacioglu-etal-2007-psnus/cite.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
@inproceedings{elmacioglu-etal-2007-psnus,
address = {Prague, Czech Republic},
author = {Elmacioglu, Ergin and
Tan, Yee Fan and
Yan, Su and
Kan, Min-Yen and
Lee, Dongwon},
booktitle = {Proceedings of the Fourth International Workshop on Semantic Evaluations (SemEval-2007)},
editor = {Agirre, Eneko and
Màrquez, Lluı́s and
Wicentowski, Richard},
month = {June},
pages = {268--271},
publisher = {Association for Computational Linguistics},
title = {PSNUS: Web People Name Disambiguation by Simple Clustering with Rich Features},
url = {https://aclanthology.org/S07-1058},
year = {2007}
}
Loading

0 comments on commit 85156b0

Please sign in to comment.