references.bib


@article{xu_medex_2010,
	title = {{MedEx}: a medication information extraction system for clinical narratives},
	volume = {17},
	issn = {1067-5027, 1527-974X},
	shorttitle = {{MedEx}},
	url = {https://academic.oup.com/jamia/article-lookup/doi/10.1197/jamia.M3378},
	doi = {10.1197/jamia.M3378},
	language = {en},
	number = {1},
	urldate = {2020-06-06},
	journal = {Journal of the American Medical Informatics Association},
	author = {Xu, H. and Stenner, S. P and Doan, S. and Johnson, K. B and Waitman, L. R and Denny, J. C},
	month = jan,
	year = {2010},
	pages = {19--24},
	file = {Texte intégral:C\:\\Users\\33623\\Zotero\\storage\\TIMMA75N\\Xu et al. - 2010 - MedEx a medication information extraction system .pdf:application/pdf},
}

@article{piolat_version_2011,
	title = {La version française du dictionnaire pour le {LIWC} : modalités de construction et exemples d’utilisation},
	volume = {56},
	issn = {00332984},
	shorttitle = {La version française du dictionnaire pour le {LIWC}},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0033298411000355},
	doi = {10.1016/j.psfr.2011.07.002},
	language = {fr},
	number = {3},
	urldate = {2020-07-01},
	journal = {Psychologie Française},
	author = {Piolat, A. and Booth, R.J. and Chung, C.K. and Davids, M. and Pennebaker, J.W.},
	month = sep,
	year = {2011},
	pages = {145--159},
}

@article{weissman_construct_2019,
	title = {Construct validity of six sentiment analysis methods in the text of encounter notes of patients with critical illness},
	volume = {89},
	issn = {15320464},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S1532046418302284},
	doi = {10.1016/j.jbi.2018.12.001},
	language = {en},
	urldate = {2020-07-01},
	journal = {Journal of Biomedical Informatics},
	author = {Weissman, Gary E. and Ungar, Lyle H. and Harhay, Michael O. and Courtright, Katherine R. and Halpern, Scott D.},
	month = jan,
	year = {2019},
	pages = {114--121},
	file = {Texte intégral:C\:\\Users\\33623\\Zotero\\storage\\QBL57PMT\\Weissman et al. - 2019 - Construct validity of six sentiment analysis metho.pdf:application/pdf},
}

@article{muhammad_contextual_2016,
	title = {Contextual sentiment analysis for social media genres},
	volume = {108},
	issn = {09507051},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0950705116301149},
	doi = {10.1016/j.knosys.2016.05.032},
	language = {en},
	urldate = {2020-07-02},
	journal = {Knowledge-Based Systems},
	author = {Muhammad, Aminu and Wiratunga, Nirmalie and Lothian, Robert},
	month = sep,
	year = {2016},
	pages = {92--101},
	file = {Version soumise:C\:\\Users\\33623\\Zotero\\storage\\W8RZBLVV\\Muhammad et al. - 2016 - Contextual sentiment analysis for social media gen.pdf:application/pdf},
}

@inproceedings{denecke_using_2008,
	address = {Cancun, Mexico},
	title = {Using {SentiWordNet} for multilingual sentiment analysis},
	isbn = {978-1-4244-2161-9 978-1-4244-2162-6},
	url = {http://ieeexplore.ieee.org/document/4498370/},
	doi = {10.1109/ICDEW.2008.4498370},
	urldate = {2020-07-02},
	booktitle = {2008 {IEEE} 24th {International} {Conference} on {Data} {Engineering} {Workshop}},
	publisher = {IEEE},
	author = {Denecke, Kerstin},
	month = apr,
	year = {2008},
	pages = {507--512},
}

@article{kiritchenko_effect_2017,
	title = {The {Effect} of {Negators}, {Modals}, and {Degree} {Adverbs} on {Sentiment} {Composition}},
	url = {http://arxiv.org/abs/1712.01794},
	abstract = {Negators, modals, and degree adverbs can significantly affect the sentiment of the words they modify. Often, their impact is modeled with simple heuristics; although, recent work has shown that such heuristics do not capture the true sentiment of multi-word phrases. We created a dataset of phrases that include various negators, modals, and degree adverbs, as well as their combinations. Both the phrases and their constituent content words were annotated with real-valued scores of sentiment association. Using phrasal terms in the created dataset, we analyze the impact of individual modifiers and the average effect of the groups of modifiers on overall sentiment. We find that the effect of modifiers varies substantially among the members of the same group. Furthermore, each individual modifier can affect sentiment words in different ways. Therefore, solutions based on statistical learning seem more promising than fixed hand-crafted rules on the task of automatic sentiment prediction.},
	urldate = {2020-07-03},
	journal = {arXiv:1712.01794 [cs]},
	author = {Kiritchenko, Svetlana and Mohammad, Saif M.},
	month = dec,
	year = {2017},
	note = {arXiv: 1712.01794},
	keywords = {Computer Science - Computation and Language},
	file = {arXiv Fulltext PDF:C\:\\Users\\33623\\Zotero\\storage\\HUGI9A2T\\Kiritchenko et Mohammad - 2017 - The Effect of Negators, Modals, and Degree Adverbs.pdf:application/pdf;arXiv.org Snapshot:C\:\\Users\\33623\\Zotero\\storage\\L2UMQSV9\\1712.html:text/html},
}

@book{boullier_opinion_2012,
	title = {Opinion mining et sentiment analysis méthodes et outils.},
	isbn = {978-2-8218-1887-3 978-2-8218-1227-7 978-2-8218-1226-0},
	language = {French.},
	author = {Boullier, Dominique and Lohard, Audrey},
	year = {2012},
	note = {OCLC: 1096948624},
}

@article{liu_sentiment_2012,
	title = {Sentiment {Analysis} and {Opinion} {Mining}},
	volume = {5},
	issn = {1947-4040, 1947-4059},
	url = {http://www.morganclaypool.com/doi/abs/10.2200/S00416ED1V01Y201204HLT016},
	doi = {10.2200/S00416ED1V01Y201204HLT016},
	language = {en},
	number = {1},
	urldate = {2020-07-03},
	journal = {Synthesis Lectures on Human Language Technologies},
	author = {Liu, Bing},
	month = may,
	year = {2012},
	pages = {1--167},
	file = {Version soumise:C\:\\Users\\33623\\Zotero\\storage\\8CK5SSCL\\Liu - 2012 - Sentiment Analysis and Opinion Mining.pdf:application/pdf},
}

@article{mantyla_evolution_2018,
	title = {The evolution of sentiment analysis—{A} review of research topics, venues, and top cited papers},
	volume = {27},
	issn = {15740137},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S1574013717300606},
	doi = {10.1016/j.cosrev.2017.10.002},
	language = {en},
	urldate = {2020-07-03},
	journal = {Computer Science Review},
	author = {Mäntylä, Mika V. and Graziotin, Daniel and Kuutila, Miikka},
	month = feb,
	year = {2018},
	pages = {16--32},
	file = {Version soumise:C\:\\Users\\33623\\Zotero\\storage\\FZ2H5BMX\\Mäntylä et al. - 2018 - The evolution of sentiment analysis—A review of re.pdf:application/pdf},
}

@inproceedings{baccianella_sentiwordnet_2010,
	address = {Valletta, MT},
	title = {{SentiWordNet} 3.0: {An} {Enhanced} {Lexical} {Resource} for {Sentiment} {Analysis} and {Opinion} {Mining}.},
	volume = {pp. 2200-2204.},
	booktitle = {Proceedings of the 7th {Conference} on {Language} {Resources} and {Evaluation}},
	author = {Baccianella, Stephano and Esuli, Andrea and Sebastiani, Fabrizio},
	year = {2010},
}

@book{elliot_handbook_2008,
	address = {New York},
	title = {Handbook of approach and avoidance motivation},
	isbn = {978-0-8058-6019-1},
	publisher = {Psychology Press},
	editor = {Elliot, Andrew J.},
	year = {2008},
	note = {OCLC: ocn214282414},
	keywords = {Avoidance (Psychology)},
}

@article{ordenes_unveiling_2017,
	title = {Unveiling {What} {Is} {Written} in the {Stars}: {Analyzing} {Explicit}, {Implicit} and {Discourse} {Patterns} of {Sentiment} in {Social} {Media}},
	issn = {0093-5301, 1537-5277},
	shorttitle = {Unveiling {What} {Is} {Written} in the {Stars}},
	url = {https://academic.oup.com/jcr/article-lookup/doi/10.1093/jcr/ucw070},
	doi = {10.1093/jcr/ucw070},
	language = {en},
	urldate = {2020-07-12},
	journal = {Journal of Consumer Research},
	author = {Ordenes, Francisco Villarroel and Ludwig, Stephan and De Ruyter, Ko and Grewal, Dhruv and Wetzels, Martin},
	month = jan,
	year = {2017},
	pages = {ucw070},
	file = {Version acceptée:C\:\\Users\\33623\\Zotero\\storage\\DLHJ8I9U\\Ordenes et al. - 2017 - Unveiling What Is Written in the Stars Analyzing .pdf:application/pdf},
}

@article{chan_sentiment_2017,
	title = {Sentiment analysis in financial texts},
	volume = {94},
	issn = {01679236},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0167923616301828},
	doi = {10.1016/j.dss.2016.10.006},
	language = {en},
	urldate = {2020-07-12},
	journal = {Decision Support Systems},
	author = {Chan, Samuel W.K. and Chong, Mickey W.C.},
	month = feb,
	year = {2017},
	pages = {53--64},
}

@article{grand_valley_state_university_interaction_2018,
	title = {The {Interaction} {Between} {Microblog} {Sentiment} and {Stock} {Returns}: {An} {Empirical} {Examination}},
	volume = {42},
	issn = {02767783, 21629730},
	shorttitle = {The {Interaction} {Between} {Microblog} {Sentiment} and {Stock} {Returns}},
	url = {https://misq.org/the-interaction-between-microblog-sentiment-and-stock-returns-an-empirical-examination.html?SID=a8c3ub0214v89bip4a0qktvs05},
	doi = {10.25300/MISQ/2018/14268},
	number = {3},
	urldate = {2020-07-12},
	journal = {MIS Quarterly},
	author = {{Grand Valley State University} and Deng, Shuyuan and Huang, Zhijian (James) and {Rochester Institute of Technology} and Sinha, Atish P. and {University of Wisconsin - Milwaukee} and Zhao, Huimin and {University of Wisconsin - Milwaukee}},
	month = mar,
	year = {2018},
	pages = {895--918},
	file = {Version soumise:C\:\\Users\\33623\\Zotero\\storage\\7DKWR23D\\Grand Valley State University et al. - 2018 - The Interaction Between Microblog Sentiment and St.pdf:application/pdf},
}

@article{taboada_sentiment_2016,
	title = {Sentiment {Analysis}: {An} {Overview} from {Linguistics}},
	volume = {2},
	issn = {2333-9683, 2333-9691},
	shorttitle = {Sentiment {Analysis}},
	url = {http://www.annualreviews.org/doi/10.1146/annurev-linguistics-011415-040518},
	doi = {10.1146/annurev-linguistics-011415-040518},
	language = {en},
	number = {1},
	urldate = {2020-07-22},
	journal = {Annual Review of Linguistics},
	author = {Taboada, Maite},
	month = jan,
	year = {2016},
	pages = {325--347},
	file = {Version soumise:C\:\\Users\\33623\\Zotero\\storage\\8Q8SYSRU\\Taboada - 2016 - Sentiment Analysis An Overview from Linguistics.pdf:application/pdf},
}

@incollection{park_good_2011,
	address = {Berlin, Heidelberg},
	title = {Good {Friends}, {Bad} {News} - {Affect} and {Virality} in {Twitter}},
	volume = {185},
	isbn = {978-3-642-22308-2 978-3-642-22309-9},
	url = {http://link.springer.com/10.1007/978-3-642-22309-9_5},
	urldate = {2020-07-24},
	booktitle = {Future {Information} {Technology}},
	publisher = {Springer Berlin Heidelberg},
	author = {Hansen, Lars Kai and Arvidsson, Adam and Nielsen, Finn Aarup and Colleoni, Elanor and Etter, Michael},
	editor = {Park, James J. and Yang, Laurence T. and Lee, Changhoon},
	year = {2011},
	doi = {10.1007/978-3-642-22309-9_5},
	note = {Series Title: Communications in Computer and Information Science},
	pages = {34--43},
	file = {Version soumise:C\:\\Users\\33623\\Zotero\\storage\\4HK4XNZB\\Hansen et al. - 2011 - Good Friends, Bad News - Affect and Virality in Tw.pdf:application/pdf},
}

@article{arnold_tidy_2017,
	title = {A {Tidy} {Data} {Model} for {Natural} {Language} {Processing} using {cleanNLP}},
	volume = {9},
	issn = {2073-4859},
	url = {https://journal.r-project.org/archive/2017/RJ-2017-035/index.html},
	doi = {10.32614/RJ-2017-035},
	abstract = {Recent advances in natural language processing have produced libraries that extract lowlevel features from a collection of raw texts. These features, known as annotations, are usually stored internally in hierarchical, tree-based data structures. This paper proposes a data model to represent annotations as a collection of normalized relational data tables optimized for exploratory data analysis and predictive modeling. The R package cleanNLP, which calls one of two state of the art NLP libraries (CoreNLP or spaCy), is presented as an implementation of this data model. It takes raw text as an input and returns a list of normalized tables. Speciﬁc annotations provided include tokenization, part of speech tagging, named entity recognition, sentiment analysis, dependency parsing, coreference resolution, and word embeddings. The package currently supports input text in English, German, French, and Spanish.},
	language = {en},
	number = {2},
	urldate = {2019-08-11},
	journal = {The R Journal},
	author = {Arnold, Taylor},
	year = {2017},
	pages = {248},
}

@article{van_der_maaten_laurens_visualizing_2008,
	title = {Visualizing {Data} using t-{SNE}},
	journal = {Journal of Machine learning},
	author = {{Van der Maaten, Laurens} and Hinton, Geoffrey},
	year = {2008},
	pages = {2579--2605},
}

@article{shirdastian_using_2019,
	title = {Using big data analytics to study brand authenticity sentiments: {The} case of {Starbucks} on {Twitter}},
	volume = {48},
	issn = {02684012},
	shorttitle = {Using big data analytics to study brand authenticity sentiments},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0268401217302657},
	doi = {10.1016/j.ijinfomgt.2017.09.007},
	language = {en},
	urldate = {2019-10-08},
	journal = {International Journal of Information Management},
	author = {Shirdastian, Hamid and Laroche, Michel and Richard, Marie-Odile},
	month = oct,
	year = {2019},
	pages = {291--307},
}

@article{gunter_sentiment_2014,
	title = {Sentiment {Analysis}: {A} {Market}-{Relevant} and {Reliable} {Measure} of {Public} {Feeling}?},
	volume = {56},
	issn = {1470-7853, 2515-2173},
	shorttitle = {Sentiment {Analysis}},
	url = {http://journals.sagepub.com/doi/10.2501/IJMR-2014-014},
	doi = {10.2501/IJMR-2014-014},
	abstract = {This paper critically examines emergent research with sentiment analysis tools to assess their current status and relevance to applied opinion and behaviour measurement. The rapid spread of online news and online chatter in blogs, micro-blogs and social media sites has created a potentially rich source of public opinion. Waves of public feeling are vented spontaneously on a wide range of issues on a minute-by-minute basis in the online world. These online discourses are continually being refreshed, and businesses and advertisers, governments and policy makers have woken up to the fact that this universe of self-perpetuating human sentiment could represent a valuable resource to guide political and business decisions. The massive size of this repository of emotional content renders manual analysis of it feasible only for tiny portions of its totality, and even then can be labour intensive. Computer scientists have however produced software tools that can apply linguistic rules to provide electronic readings of meanings and emotions. These tools are now being utilised by applied social science and market researchers to yield sentiment profiles from online discourses created within specific platforms that purport to represent reliable substitutes for more traditional, offline measures of public opinion. This paper considers what these tools have demonstrated so far and where caution in their application is still called for.},
	language = {en},
	number = {2},
	urldate = {2019-10-20},
	journal = {International Journal of Market Research},
	author = {Gunter, Barrie and Koteyko, Nelya and Atanasova, Dimitrinka},
	month = mar,
	year = {2014},
	pages = {231--247},
}

@article{tausczik_psychological_2010,
	title = {The {Psychological} {Meaning} of {Words}: {LIWC} and {Computerized} {Text} {Analysis} {Methods}},
	volume = {29},
	issn = {0261-927X, 1552-6526},
	shorttitle = {The {Psychological} {Meaning} of {Words}},
	url = {http://journals.sagepub.com/doi/10.1177/0261927X09351676},
	doi = {10.1177/0261927X09351676},
	language = {en},
	number = {1},
	urldate = {2019-11-07},
	journal = {Journal of Language and Social Psychology},
	author = {Tausczik, Yla R. and Pennebaker, James W.},
	month = mar,
	year = {2010},
	pages = {24--54},
}

@inproceedings{nielsen_new_2011,
	series = {{CEUR} {Workshop} {Proceedings}},
	title = {A {New} {ANEW}: {Evaluation} of a {Word} {List} for {Sentiment} {Analysis} in {Microblogs}.},
	volume = {718},
	url = {http://dblp.uni-trier.de/db/conf/msm/msm2011.html#Nielsen11},
	booktitle = {\#{MSM}},
	publisher = {CEUR-WS.org},
	author = {Nielsen, Finn Årup},
	editor = {Rowe, Matthew and Stankovic, Milan and Dadzie, Aba-Sah and Hardey, Mariann},
	year = {2011},
	keywords = {dblp},
	pages = {93--98},
}

@inproceedings{ding_holistic_2008,
	address = {New York, NY, USA},
	series = {{WSDM} '08},
	title = {A {Holistic} {Lexicon}-based {Approach} to {Opinion} {Mining}},
	isbn = {978-1-59593-927-2},
	url = {http://doi.acm.org/10.1145/1341531.1341561},
	doi = {10.1145/1341531.1341561},
	booktitle = {Proceedings of the 2008 {International} {Conference} on {Web} {Search} and {Data} {Mining}},
	publisher = {ACM},
	author = {Ding, Xiaowen and Liu, Bing and Yu, Philip S.},
	year = {2008},
	keywords = {context dependent opinions, opinion mining, sentiment analysis},
	pages = {231--240},
}

@article{duval_analyse_2016,
	title = {L'analyse automatisée du ton médiatique : construction et utilisation de la version française du \textit{{Lexicoder}} {Sentiment} {Dictionary}},
	volume = {49},
	issn = {0008-4239, 1744-9324},
	shorttitle = {L'analyse automatisée du ton médiatique},
	url = {https://www.cambridge.org/core/product/identifier/S000842391600055X/type/journal_article},
	doi = {10.1017/S000842391600055X},
	abstract = {Résumé Cet article introduit un nouveau dictionnaire permettant l'analyse automatisée du ton des médias francophones, que nous avons appelé Lexicoder Sentiment Dictionnaire Français ( LSDFr ) en référence au lexique anglophone de Young et Soroka (2012), Lexicoder Sentiment Dictionary ( LSD ) à partir duquel le LSDFr a été construit. Une fois construit, nous comparons le LSDFr au seul autre dictionnaire francophone existant de ce genre, Linguistic Inquiry and Word Count ( LIWC ). Nous testons ensuite la validité interne du LSDFr en le comparant avec un corpus de textes codés manuellement. Nous testons enfin la validité externe du LSDFr en mesurant jusqu'où le ton médiatique, calculé à l'aide de notre dictionnaire, prédit les intentions de vote des Québécois lors des quatre dernières campagnes électorales. En développant cet outil, notre objectif est de permettre à d'autres chercheurs d'effectuer des analyses médiatiques dans un corpus de textes comparables en français. , Abstract This article introduces a new dictionary for the automated analysis of the tone of French media. We named it the French Lexicoder Sentiment Dictionary ( LSDFr ) in reference to the English lexicon developed by Young and Soroka (2012), the Lexicoder Sentiment Dictionary ( LSD ), from which the LSDFr was built. We compare the LSDFr to the only other French sentiment lexicon, Linguistic Inquiry and Word Count ( LIWC ). First, we detail the construction of the dictionary. We then test the internal validity of the LSDFr comparing it with a corpus of manually coded texts. Finally, we test the external validity of LSDFr by measuring how the media tone, calculated using our dictionary, predicts voting intentions in the last four Quebec elections. Our goal is to enable other researchers to conduct media analyses with a comparable corpus of texts in French.},
	language = {en},
	number = {2},
	urldate = {2020-04-10},
	journal = {Canadian Journal of Political Science},
	author = {Duval, Dominic and Pétry, François},
	month = jun,
	year = {2016},
	pages = {197--220},
}

@book{banda_large-scale_2020,
	title = {A large-scale {COVID}-19 {Twitter} chatter dataset for open scientific research - an international collaboration},
	copyright = {Open Access},
	url = {https://zenodo.org/record/3757272},
	abstract = {{\textbackslash}textlessstrong{\textbackslash}textgreaterDue to the relevance of the COVID-19 global pandemic, we are releasing our dataset of tweets acquired from the Twitter Stream related to COVID-19 chatter. Since our first release we have received additional data from our new collaborators, allowing this resource to grow to its current size. Dedicated data gathering started from March 11th yielding over 4 million tweets a day. We have added additional data provided by our new collaborators from January 27th to March 27th, to provide extra longitudinal coverage.{\textbackslash}textless/strong{\textbackslash}textgreater {\textbackslash}textlessstrong{\textbackslash}textgreaterThe data collected from the stream captures all languages, but the higher prevalence are: English, Spanish, and French. We release all tweets and retweets on the full\_dataset.tsv file (205,409,413 unique tweets), and a cleaned version with no retweets on the full\_dataset-clean.tsv file (44,726,568{\textbackslash}textless/strong{\textbackslash}textgreater{\textbackslash}textlessstrong{\textbackslash}textgreater unique tweets). There are several practical reasons for us to leave the retweets, tracing important tweets and their dissemination is one of them. For NLP tasks we provide the top 1000 frequent terms in frequent\_terms.csv, the top 1000 bigrams in frequent\_bigrams.csv, and the top 1000 trigrams in frequent\_trigrams.csv. Some general statistics per day are included for both datasets in the statistics-full\_dataset.tsv and statistics-full\_dataset-clean.tsv files. {\textbackslash}textless/strong{\textbackslash}textgreater {\textbackslash}textlessstrong{\textbackslash}textgreaterMore details can be found (and will be updated faster at: https://github.com/thepanacealab/covid19\_twitter) and our pre-print about the dataset (https://arxiv.org/abs/2004.03688) {\textbackslash}textless/strong{\textbackslash}textgreater {\textbackslash}textlessstrong{\textbackslash}textgreaterAs always, the tweets distributed here are only tweet identifiers (with date and time added) due to the terms and conditions of Twitter to re-distribute Twitter data ONLY for research purposes. The need to be hydrated to be used. {\textbackslash}textless/strong{\textbackslash}textgreater},
	language = {en},
	urldate = {2020-04-25},
	publisher = {Zenodo},
	author = {Banda, Juan M. and Tekumalla, Ramya and Wang, Guanyu and Yu, Jingyuan and Liu, Tuo and Ding, Yuning and Chowell, Gerardo},
	month = apr,
	year = {2020},
	doi = {10.5281/ZENODO.3757272},
	keywords = {covid-19, covid19, nlp, social media, twitter},
}

@article{pearce_no_2003,
	title = {[{No} title found]},
	volume = {28},
	issn = {0921030X},
	url = {http://link.springer.com/10.1023/A:1022917721797},
	doi = {10.1023/A:1022917721797},
	number = {2/3},
	urldate = {2020-08-22},
	journal = {Natural Hazards},
	author = {Pearce, Laurie},
	year = {2003},
	pages = {211--228},
}

@article{roberts_structural_2014,
	title = {Structural {Topic} {Models} for {Open}-{Ended} {Survey} {Responses}: {STRUCTURAL} {TOPIC} {MODELS} {FOR} {SURVEY} {RESPONSES}},
	volume = {58},
	issn = {00925853},
	shorttitle = {Structural {Topic} {Models} for {Open}-{Ended} {Survey} {Responses}},
	url = {http://doi.wiley.com/10.1111/ajps.12103},
	doi = {10.1111/ajps.12103},
	language = {en},
	number = {4},
	urldate = {2020-08-23},
	journal = {American Journal of Political Science},
	author = {Roberts, Margaret E. and Stewart, Brandon M. and Tingley, Dustin and Lucas, Christopher and Leder-Luis, Jetson and Gadarian, Shana Kushner and Albertson, Bethany and Rand, David G.},
	month = oct,
	year = {2014},
	pages = {1064--1082},
	file = {Version acceptée:C\:\\Users\\33623\\Zotero\\storage\\KXCQEYLT\\Roberts et al. - 2014 - Structural Topic Models for Open-Ended Survey Resp.pdf:application/pdf},
}

@article{balech_first_2020,
	title = {The {First} {French} {COVID19} {Lockdown} {Twitter} {Dataset}},
	url = {http://arxiv.org/abs/2005.05075},
	abstract = {In this paper, we present a mainly French coronavirus Twitter dataset that we have been continuously collecting since lockdown restrictions have been enacted in France (in March 17, 2020). We offer our datasets and sentiment analysis annotations to the research community at https://github.com/calciu/COVID19-LockdownFr. They have been obtained using high performance computing (HPC) capabilities of our university's datacenter. We think that our contribution can facilitate analysis of online conversation dynamics reflecting people sentiments when facing severe home confinement restrictions determined by the outbreak of this world wide epidemic. We hope that our contribution will help decode shared experience and mood but also test the sensitivity of sentiment measurement instruments and incite the development of new instruments, methods and approaches.},
	urldate = {2020-08-23},
	journal = {arXiv:2005.05075 [cs]},
	author = {Balech, Sophie and Benavent, Christophe and Calciu, Mihai},
	month = may,
	year = {2020},
	note = {arXiv: 2005.05075},
	keywords = {Computer Science - Social and Information Networks, J.4},
	file = {arXiv Fulltext PDF:C\:\\Users\\33623\\Zotero\\storage\\Y3RMPSWX\\Balech et al. - 2020 - The First French COVID19 Lockdown Twitter Dataset.pdf:application/pdf;arXiv.org Snapshot:C\:\\Users\\33623\\Zotero\\storage\\PDEU5L9Q\\2005.html:text/html},
}

@article{cambria_jumping_2014,
	title = {Jumping {NLP} {Curves}: {A} {Review} of {Natural} {Language} {Processing} {Research} [{Review} {Article}]},
	volume = {9},
	issn = {1556-603X},
	shorttitle = {Jumping {NLP} {Curves}},
	url = {http://ieeexplore.ieee.org/document/6786458/},
	doi = {10.1109/MCI.2014.2307227},
	number = {2},
	urldate = {2020-08-23},
	journal = {IEEE Computational Intelligence Magazine},
	author = {Cambria, Erik and White, Bebo},
	month = may,
	year = {2014},
	pages = {48--57},
}

@article{anastasopoulos_computational_2017,
	title = {Computational {Text} {Analysis} for {Public} {Management} {Research}},
	issn = {1556-5068},
	url = {https://www.ssrn.com/abstract=3269520},
	doi = {10.2139/ssrn.3269520},
	language = {en},
	urldate = {2020-08-23},
	journal = {SSRN Electronic Journal},
	author = {Anastasopoulos, Lefteris Jason and Moldogaziev, Tima T. and Scott, Tyler},
	year = {2017},
}

@article{kobayashi_text_2018,
	title = {Text {Mining} in {Organizational} {Research}},
	volume = {21},
	issn = {1094-4281, 1552-7425},
	url = {http://journals.sagepub.com/doi/10.1177/1094428117722619},
	doi = {10.1177/1094428117722619},
	language = {en},
	number = {3},
	urldate = {2020-08-23},
	journal = {Organizational Research Methods},
	author = {Kobayashi, Vladimer B. and Mol, Stefan T. and Berkers, Hannah A. and Kismihók, Gábor and Den Hartog, Deanne N.},
	month = jul,
	year = {2018},
	pages = {733--765},
	file = {Texte intégral:C\:\\Users\\33623\\Zotero\\storage\\4XKYYB2Z\\Kobayashi et al. - 2018 - Text Mining in Organizational Research.pdf:application/pdf},
}

@article{kozlowski_geometry_2019,
	title = {The {Geometry} of {Culture}: {Analyzing} the {Meanings} of {Class} through {Word} {Embeddings}},
	volume = {84},
	issn = {0003-1224, 1939-8271},
	shorttitle = {The {Geometry} of {Culture}},
	url = {http://journals.sagepub.com/doi/10.1177/0003122419877135},
	doi = {10.1177/0003122419877135},
	abstract = {We argue word embedding models are a useful tool for the study of culture using a historical analysis of shared understandings of social class as an empirical case. Word embeddings represent semantic relations between words as relationships between vectors in a high-dimensional space, specifying a relational model of meaning consistent with contemporary theories of culture. Dimensions induced by word differences ( rich – poor) in these spaces correspond to dimensions of cultural meaning, and the projection of words onto these dimensions reflects widely shared associations, which we validate with surveys. Analyzing text from millions of books published over 100 years, we show that the markers of class continuously shifted amidst the economic transformations of the twentieth century, yet the basic cultural dimensions of class remained remarkably stable. The notable exception is education, which became tightly linked to affluence independent of its association with cultivated taste.},
	language = {en},
	number = {5},
	urldate = {2020-08-23},
	journal = {American Sociological Review},
	author = {Kozlowski, Austin C. and Taddy, Matt and Evans, James A.},
	month = oct,
	year = {2019},
	pages = {905--949},
	file = {Version soumise:C\:\\Users\\33623\\Zotero\\storage\\5CLHUEUR\\Kozlowski et al. - 2019 - The Geometry of Culture Analyzing the Meanings of.pdf:application/pdf},
}

@article{bourdieu_opinion_1973,
	title = {L'opinion publique n'existe pas},
	number = {n°318},
	journal = {Les Temps modernes},
	author = {Bourdieu, Pierre},
	month = jan,
	year = {1973},
	pages = {1292--1309},
}

@article{lock_quantitative_2015,
	title = {Quantitative content analysis as a method for business ethics research},
	volume = {24},
	issn = {09628770},
	url = {http://doi.wiley.com/10.1111/beer.12095},
	doi = {10.1111/beer.12095},
	language = {en},
	urldate = {2020-11-15},
	journal = {Business Ethics: A European Review},
	author = {Lock, Irina and Seele, Peter},
	month = jul,
	year = {2015},
	pages = {S24--S40},
}

@article{benoit_quanteda_2018,
	title = {quanteda: {An} {R} package for the quantitative analysis of textual data},
	volume = {3},
	issn = {2475-9066},
	shorttitle = {quanteda},
	url = {http://joss.theoj.org/papers/10.21105/joss.00774},
	doi = {10.21105/joss.00774},
	number = {30},
	urldate = {2019-01-15},
	journal = {Journal of Open Source Software},
	author = {Benoit, Kenneth and Watanabe, Kohei and Wang, Haiyan and Nulty, Paul and Obeng, Adam and Müller, Stefan and Matsuo, Akitaka},
	month = oct,
	year = {2018},
	pages = {774},
}

@article{coleman_computer_1975,
	title = {A computer readability formula designed for machine scoring.},
	volume = {60},
	issn = {0021-9010},
	url = {http://content.apa.org/journals/apl/60/2/283},
	doi = {10.1037/h0076540},
	language = {en},
	number = {2},
	urldate = {2019-01-16},
	journal = {Journal of Applied Psychology},
	author = {Coleman, Meri and Liau, T. L.},
	year = {1975},
	pages = {283--284},
}

@inproceedings{canini_online_2009,
	address = {Hilton Clearwater Beach Resort, Clearwater Beach, Florida USA},
	series = {Proceedings of {Machine} {Learning} {Research}},
	title = {Online {Inference} of {Topics} with {Latent} {Dirichlet} {Allocation}},
	volume = {5},
	url = {http://proceedings.mlr.press/v5/canini09a.html},
	abstract = {Inference algorithms for topic models are typically designed to be run over an entire collection of documents after they have been observed. However, in many applications of these models, the collection grows over time, making it infeasible to run batch algorithms repeatedly. This problem can be addressed by using online algorithms, which update estimates of the topics as each document is observed. We introduce two related Rao-Blackwellized online inference algorithms for the latent Dirichlet allocation (LDA) model – incremental Gibbs samplers and particle filters – and compare their runtime and performance to that of existing algorithms.},
	booktitle = {Proceedings of the {Twelth} {International} {Conference} on {Artificial} {Intelligence} and {Statistics}},
	publisher = {PMLR},
	author = {Canini, Kevin and Shi, Lei and Griffiths, Thomas},
	editor = {Dyk, David van and Welling, Max},
	month = apr,
	year = {2009},
	pages = {65--72},
}

@article{suster_investigation_2015,
	title = {An investigation into language complexity of {World}-of-{Warcraft} game-external texts},
	url = {http://arxiv.org/abs/1502.02655},
	abstract = {We present a language complexity analysis of World of Warcraft (WoW) community texts, which we compare to texts from a general corpus of web English. Results from several complexity types are presented, including lexical diversity, density, readability and syntactic complexity. The language of WoW texts is found to be comparable to the general corpus on some complexity measures, yet more specialized on other measures. Our findings can be used by educators willing to include game-related activities into school curricula.},
	urldate = {2019-01-21},
	journal = {arXiv:1502.02655 [cs]},
	author = {Šuster, Simon},
	month = feb,
	year = {2015},
	keywords = {Computer Science - Computation and Language},
}

@article{sievert_ldavis_2014,
	title = {{LDAvis}: {A} method for visualizing and interpreting topics},
	volume = {Baltimore, Maryland, USA},
	journal = {Proceedings of the Workshop on Interactive Language Learning, Visualization, and Interfaces.},
	author = {Sievert, Carson},
	month = jun,
	year = {2014},
	pages = {63--70},
}

@book{sneath_numerical_1973,
	address = {San Francisco},
	series = {A {Series} of books in biology},
	title = {Numerical taxonomy: the principles and practice of numerical classification},
	isbn = {978-0-7167-0697-7},
	shorttitle = {Numerical taxonomy},
	publisher = {W. H. Freeman},
	author = {Sneath, P. H. A. and Sokal, Robert R.},
	year = {1973},
	keywords = {Numerical taxonomy},
}

@article{liu_towards_2017,
	title = {Towards better analysis of machine learning models: {A} visual analytics perspective},
	volume = {1},
	issn = {2468502X},
	shorttitle = {Towards better analysis of machine learning models},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S2468502X17300086},
	doi = {10.1016/j.visinf.2017.01.006},
	language = {en},
	number = {1},
	urldate = {2018-12-22},
	journal = {Visual Informatics},
	author = {Liu, Shixia and Wang, Xiting and Liu, Mengchen and Zhu, Jun},
	month = mar,
	year = {2017},
	pages = {48--56},
}

@article{ribeiro_why_2016,
	title = {"{Why} {Should} {I} {Trust} {You}?": {Explaining} the {Predictions} of {Any} {Classifier}},
	shorttitle = {"{Why} {Should} {I} {Trust} {You}?},
	url = {http://arxiv.org/abs/1602.04938},
	abstract = {Despite widespread adoption, machine learning models remain mostly black boxes. Understanding the reasons behind predictions is, however, quite important in assessing trust, which is fundamental if one plans to take action based on a prediction, or when choosing whether to deploy a new model. Such understanding also provides insights into the model, which can be used to transform an untrustworthy model or prediction into a trustworthy one. In this work, we propose LIME, a novel explanation technique that explains the predictions of any classifier in an interpretable and faithful manner, by learning an interpretable model locally around the prediction. We also propose a method to explain models by presenting representative individual predictions and their explanations in a non-redundant way, framing the task as a submodular optimization problem. We demonstrate the flexibility of these methods by explaining different models for text (e.g. random forests) and image classification (e.g. neural networks). We show the utility of explanations via novel experiments, both simulated and with human subjects, on various scenarios that require trust: deciding if one should trust a prediction, choosing between models, improving an untrustworthy classifier, and identifying why a classifier should not be trusted.},
	urldate = {2018-12-22},
	journal = {arXiv:1602.04938 [cs, stat]},
	author = {Ribeiro, Marco Tulio and Singh, Sameer and Guestrin, Carlos},
	month = feb,
	year = {2016},
	keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Statistics - Machine Learning},
}

@article{tibshirani_regression_2011,
	title = {Regression shrinkage and selection via the lasso: a retrospective: {Regression} {Shrinkage} and {Selection} via the {Lasso}},
	volume = {73},
	issn = {13697412},
	shorttitle = {Regression shrinkage and selection via the lasso},
	url = {http://doi.wiley.com/10.1111/j.1467-9868.2011.00771.x},
	doi = {10.1111/j.1467-9868.2011.00771.x},
	language = {en},
	number = {3},
	urldate = {2018-12-22},
	journal = {Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
	author = {Tibshirani, Robert},
	month = jun,
	year = {2011},
	pages = {273--282},
}

@article{goldstein_peeking_2015,
	title = {Peeking {Inside} the {Black} {Box}: {Visualizing} {Statistical} {Learning} {With} {Plots} of {Individual} {Conditional} {Expectation}},
	volume = {24},
	issn = {1061-8600, 1537-2715},
	shorttitle = {Peeking {Inside} the {Black} {Box}},
	url = {http://www.tandfonline.com/doi/full/10.1080/10618600.2014.907095},
	doi = {10.1080/10618600.2014.907095},
	language = {en},
	number = {1},
	urldate = {2018-12-22},
	journal = {Journal of Computational and Graphical Statistics},
	author = {Goldstein, Alex and Kapelner, Adam and Bleich, Justin and Pitkin, Emil},
	month = jan,
	year = {2015},
	pages = {44--65},
}

@article{selya_practical_2012,
	title = {A {Practical} {Guide} to {Calculating} {Cohen}’s f2, a {Measure} of {Local} {Effect} {Size}, from {PROC} {MIXED}},
	volume = {3},
	issn = {1664-1078},
	url = {http://journal.frontiersin.org/article/10.3389/fpsyg.2012.00111/abstract},
	doi = {10.3389/fpsyg.2012.00111},
	urldate = {2018-12-24},
	journal = {Frontiers in Psychology},
	author = {Selya, Arielle S. and Rose, Jennifer S. and Dierker, Lisa C. and Hedeker, Donald and Mermelstein, Robin J.},
	year = {2012},
}

@article{pekar_discovery_2008,
	title = {Discovery of subjective evaluations of product features in hotel reviews},
	volume = {14},
	issn = {1356-7667, 1479-1870},
	url = {http://journals.sagepub.com/doi/10.1177/1356766707087522},
	doi = {10.1177/1356766707087522},
	language = {en},
	number = {2},
	urldate = {2019-01-15},
	journal = {Journal of Vacation Marketing},
	author = {Pekar, Viktor and {Shiyan Ou}},
	month = apr,
	year = {2008},
	pages = {145--155},
}

@article{puschmann_turning_2018,
	title = {Turning {Words} {Into} {Consumer} {Preferences}: {How} {Sentiment} {Analysis} {Is} {Framed} in {Research} and the {News} {Media}},
	volume = {4},
	issn = {2056-3051, 2056-3051},
	shorttitle = {Turning {Words} {Into} {Consumer} {Preferences}},
	url = {http://journals.sagepub.com/doi/10.1177/2056305118797724},
	doi = {10.1177/2056305118797724},
	language = {en},
	number = {3},
	urldate = {2019-01-15},
	journal = {Social Media + Society},
	author = {Puschmann, Cornelius and Powell, Alison},
	month = jul,
	year = {2018},
	pages = {205630511879772},
}

@article{hug_loi_2004,
	title = {La loi de {Menzerath} appliquée à un ensemble de textes},
	journal = {Lexicometrica},
	author = {Hug, Marc},
	year = {2004},
}

@article{senter_automated_1967,
	title = {Automated {Readability} {Index}},
	author = {Senter, R.J.},
	month = nov,
	year = {1967},
}

@article{plutchik_psychoevolutionary_1982,
	title = {A psychoevolutionary theory of emotions},
	volume = {21},
	issn = {0539-0184, 1461-7412},
	url = {http://journals.sagepub.com/doi/10.1177/053901882021004003},
	doi = {10.1177/053901882021004003},
	language = {en},
	number = {4-5},
	urldate = {2019-01-18},
	journal = {Social Science Information},
	author = {Plutchik, Robert},
	month = jul,
	year = {1982},
	pages = {529--553},
}

@article{tweedie_how_1998,
	title = {How {Variable} {May} a {Constant} be? {Measures} of {Lexical} {Richness} in {Perspective}},
	volume = {32},
	journal = {Computers and the Humanities},
	author = {Tweedie, Fiona J. and Baayen, R. Harald},
	year = {1998},
	pages = {323--352},
}

@article{sigmund_panel_2017,
	title = {Panel {Vector} {Autoregression} in {R} with the {Package} {Panelvar}},
	issn = {1556-5068},
	url = {https://www.ssrn.com/abstract=2896087},
	doi = {10.2139/ssrn.2896087},
	language = {en},
	urldate = {2019-01-27},
	journal = {SSRN Electronic Journal},
	author = {Sigmund, Michael and Ferstl, Robert},
	year = {2017},
}

@article{gijsenberg_losses_2015,
	title = {Losses {Loom} {Longer} than {Gains}: {Modeling} the {Impact} of {Service} {Crises} on {Perceived} {Service} {Quality} over {Time}},
	volume = {52},
	issn = {0022-2437, 1547-7193},
	shorttitle = {Losses {Loom} \textit{{Longer}} than {Gains}},
	url = {http://journals.sagepub.com/doi/10.1509/jmr.14.0140},
	doi = {10.1509/jmr.14.0140},
	language = {en},
	number = {5},
	urldate = {2019-01-27},
	journal = {Journal of Marketing Research},
	author = {Gijsenberg, Maarten J. and Van Heerde, Harald J. and Verhoef, Peter C.},
	month = oct,
	year = {2015},
	pages = {642--656},
}

@techreport{bennani_les_2019,
	type = {{EconomiX} {Working} {Papers}},
	title = {Les déterminants locaux de la participation numérique au {Grand} débat national: une analyse économétrique},
	url = {https://EconPapers.repec.org/RePEc:drm:wpaper:2019-7},
	abstract = {This paper analyses the local determinants of the electronic participation to the "Grand débat". First, we highlight the spatial heterogeneity of the participants using their zip code. Second, we use an econometric approach to assess the local determinants of the general participation and the participation on each of the four topics of the "Grand débat". The results show that the median standard of living and the education level are the main determinants of the general participation, whereas some specific variables explain the participation of each of the four topics.},
	number = {2019-7},
	institution = {University of Paris Nanterre, EconomiX},
	author = {Bennani, Hamza and Gandré, Pauline and Monnery, Benjamin},
	year = {2019},
	keywords = {electronic participation, Grand débat, local determinants},
}

@book{armstrong-warwick_data_nodate,
	title = {Data in {Your} {Language}: {The} {ECI} {Multilingual} {Corpus} 1},
	author = {Armstrong-warwick, Susan and Thompson, Henry S. and McKelvie, David and Petitpierre, Dominique},
}

@book{isabelle_serca_les_2010,
	address = {Paris},
	series = {« {Recherches} proustiennes »},
	title = {{LES} {COUTURES} {APPARENTES} {DE} {LA} {RECHERCHE} {PROUST} {ET} {LA} {PONCTUATION}},
	publisher = {Honoré Champion},
	author = {{ISABELLE SERCA}},
	year = {2010},
}

@article{humphreys_automated_2018,
	title = {Automated {Text} {Analysis} for {Consumer} {Research}},
	volume = {44},
	issn = {0093-5301, 1537-5277},
	url = {https://academic.oup.com/jcr/article/44/6/1274/4283031},
	doi = {10.1093/jcr/ucx104},
	language = {en},
	number = {6},
	urldate = {2019-07-13},
	journal = {Journal of Consumer Research},
	author = {Humphreys, Ashlee and Wang, Rebecca Jen-Hui},
	editor = {Fischer, Eileen and Price, Linda},
	month = apr,
	year = {2018},
	pages = {1274--1306},
}

@article{blei_latent_2003,
	title = {Latent {Dirichlet} {Allocation}},
	volume = {3},
	issn = {1532-4435},
	url = {http://dl.acm.org/citation.cfm?id=944919.944937},
	journal = {J. Mach. Learn. Res.},
	author = {Blei, David M. and Ng, Andrew Y. and Jordan, Michael I.},
	month = mar,
	year = {2003},
	pages = {993--1022},
}

@article{canut_sociolinguistique_2000,
	title = {De la sociolinguistique à la sociologie du langage : de l'usage des frontières},
	volume = {91},
	issn = {0181-4095, 2101-0382},
	shorttitle = {De la sociolinguistique à la sociologie du langage},
	url = {http://www.cairn.info/revue-langage-et-societe-2000-1-page-89.htm},
	doi = {10.3917/ls.091.0089},
	language = {fr},
	number = {1},
	urldate = {2019-07-14},
	journal = {Langage et société},
	author = {Canut, Cécile},
	year = {2000},
	pages = {89},
}

@article{abdaoui_feel_2017,
	title = {{FEEL}: a {French} {Expanded} {Emotion} {Lexicon}},
	volume = {51},
	issn = {1574-020X, 1574-0218},
	shorttitle = {{FEEL}},
	url = {http://link.springer.com/10.1007/s10579-016-9364-5},
	doi = {10.1007/s10579-016-9364-5},
	language = {en},
	number = {3},
	urldate = {2019-07-14},
	journal = {Language Resources and Evaluation},
	author = {Abdaoui, Amine and Azé, Jérôme and Bringay, Sandra and Poncelet, Pascal},
	month = sep,
	year = {2017},
	pages = {833--855},
}

@article{mohammad_crowdsourcing_2013,
	title = {{CROWDSOURCING} {A} {WORD}-{EMOTION} {ASSOCIATION} {LEXICON}},
	volume = {29},
	issn = {08247935},
	url = {http://doi.wiley.com/10.1111/j.1467-8640.2012.00460.x},
	doi = {10.1111/j.1467-8640.2012.00460.x},
	language = {en},
	number = {3},
	urldate = {2019-07-14},
	journal = {Computational Intelligence},
	author = {Mohammad, Saif M. and Turney, Peter D.},
	month = aug,
	year = {2013},
	pages = {436--465},
}

@article{fruchterman_graph_1991,
	title = {Graph drawing by force-directed placement},
	volume = {21},
	issn = {00380644, 1097024X},
	url = {http://doi.wiley.com/10.1002/spe.4380211102},
	doi = {10.1002/spe.4380211102},
	language = {en},
	number = {11},
	urldate = {2019-08-11},
	journal = {Software: Practice and Experience},
	author = {Fruchterman, Thomas M. J. and Reingold, Edward M.},
	month = nov,
	year = {1991},
	pages = {1129--1164},
}

@article{arnold_tidy_2017-1,
	title = {A {Tidy} {Data} {Model} for {Natural} {Language} {Processing} using {cleanNLP}},
	volume = {9},
	issn = {2073-4859},
	url = {https://journal.r-project.org/archive/2017/RJ-2017-035/index.html},
	doi = {10.32614/RJ-2017-035},
	abstract = {Recent advances in natural language processing have produced libraries that extract lowlevel features from a collection of raw texts. These features, known as annotations, are usually stored internally in hierarchical, tree-based data structures. This paper proposes a data model to represent annotations as a collection of normalized relational data tables optimized for exploratory data analysis and predictive modeling. The R package cleanNLP, which calls one of two state of the art NLP libraries (CoreNLP or spaCy), is presented as an implementation of this data model. It takes raw text as an input and returns a list of normalized tables. Speciﬁc annotations provided include tokenization, part of speech tagging, named entity recognition, sentiment analysis, dependency parsing, coreference resolution, and word embeddings. The package currently supports input text in English, German, French, and Spanish.},
	language = {en},
	number = {2},
	urldate = {2019-08-11},
	journal = {The R Journal},
	author = {Arnold, Taylor},
	year = {2017},
	pages = {248},
}

@article{hornik_textcat_2013,
	title = {The \textbf{textcat} {Package} for n -{Gram} {Based} {Text} {Categorization} in \textit{{R}}},
	volume = {52},
	issn = {1548-7660},
	url = {http://www.jstatsoft.org/v52/i06/},
	doi = {10.18637/jss.v052.i06},
	language = {en},
	number = {6},
	urldate = {2019-08-11},
	journal = {Journal of Statistical Software},
	author = {Hornik, Kurt and Mair, Patrick and Rauch, Johannes and Geiger, Wilhelm and Buchta, Christian and Feinerer, Ingo},
	year = {2013},
}

@article{firth_synopsis_1957,
	title = {A synopsis of linguistic theory 1930-55.},
	volume = {1952-59},
	abstract = {Reprinted in: Palmer, F. R. (ed.) (1968). Selected Papers of J. R. Firth 1952-59, pages 168-205. Longmans, London.},
	journal = {Studies in Linguistic Analysis (special volume of the Philological Society)},
	author = {Firth, J. R.},
	year = {1957},
	keywords = {classic linguistics meanign relatedness semantic},
	pages = {1--32},
}

@book{chomsky_aspects_1969,
	series = {The {MIT} {Press}},
	title = {Aspects of the {Theory} of {Syntax}},
	isbn = {978-0-262-26050-3},
	url = {https://books.google.fr/books?id=u0ksbFqagU8C},
	publisher = {MIT Press},
	author = {Chomsky, N.},
	year = {1969},
}

@article{grishman_message_1997,
	title = {Message {Understanding} {Conference}- 6: {A} {Brief} {History}},
	abstract = {We have recently completed the sixth in a series of "Message Understanding Conferences" which are designed to promote and evaluate research in information extraction. MUC-6 introduced several innovations over prior MUCs, most notably in the range of different tasks for which evaluations were conducted. We describe some of the motivations for the new format and briefly discuss some of the results of the evaluations.},
	language = {en},
	author = {Grishman, Ralph and Sundheim, Beth},
	year = {1997},
	pages = {6},
}

@article{wickham_layered_2010,
	title = {A {Layered} {Grammar} of {Graphics}},
	volume = {19},
	issn = {1061-8600, 1537-2715},
	url = {http://www.tandfonline.com/doi/abs/10.1198/jcgs.2009.07098},
	doi = {10.1198/jcgs.2009.07098},
	language = {en},
	number = {1},
	urldate = {2019-08-15},
	journal = {Journal of Computational and Graphical Statistics},
	author = {Wickham, Hadley},
	month = jan,
	year = {2010},
	pages = {3--28},
}

@book{r_core_team_r_2020,
	address = {Vienna, Austria},
	title = {R: {A} {Language} and {Environment} for {Statistical} {Computing}},
	url = {https://www.R-project.org/},
	publisher = {R Foundation for Statistical Computing},
	author = {{R Core Team}},
	year = {2020},
}

@book{xie_bookdown_2021,
	title = {bookdown: {Authoring} {Books} and {Technical} {Documents} with {R} {Markdown}},
	author = {Xie, Yihui},
	year = {2021},
}

@book{xie_knitr_2021,
	title = {knitr: {A} {General}-{Purpose} {Package} for {Dynamic} {Report} {Generation} in {R}},
	url = {https://yihui.org/knitr/},
	author = {Xie, Yihui},
	year = {2021},
}

@book{allaire_rmarkdown_2021,
	title = {rmarkdown: {Dynamic} {Documents} for {R}},
	url = {https://CRAN.R-project.org/package=rmarkdown},
	author = {Allaire, J. J. and Xie, Yihui and McPherson, Jonathan and Luraschi, Javier and Ushey, Kevin and Atkins, Aron and Wickham, Hadley and Cheng, Joe and Chang, Winston and Iannone, Richard},
	year = {2021},
}

@incollection{stodden_knitr_2014,
	title = {knitr: {A} {Comprehensive} {Tool} for {Reproducible} {Research} in {R}},
	url = {http://www.crcpress.com/product/isbn/9781466561595},
	booktitle = {Implementing {Reproducible} {Computational} {Research}},
	publisher = {Chapman and Hall/CRC},
	author = {Xie, Yihui},
	editor = {Stodden, Victoria and Leisch, Friedrich and Peng, Roger D.},
	year = {2014},
}

@book{xie_r_2018,
	address = {Boca Raton, Florida},
	title = {R {Markdown}: {The} {Definitive} {Guide}},
	url = {https://bookdown.org/yihui/rmarkdown},
	publisher = {Chapman and Hall/CRC},
	author = {Xie, Yihui and Allaire, J. J. and Grolemund, Garrett},
	year = {2018},
}

@book{xie_r_2020,
	address = {Boca Raton, Florida},
	title = {R {Markdown} {Cookbook}},
	url = {https://bookdown.org/yihui/rmarkdown-cookbook},
	publisher = {Chapman and Hall/CRC},
	author = {Xie, Yihui and Dervieux, Christophe and Riederer, Emily},
	year = {2020},
}

@book{xie_dynamic_2015,
	address = {Boca Raton, Florida},
	edition = {2nd},
	title = {Dynamic {Documents} with {R} and knitr},
	url = {http://yihui.name/knitr/},
	publisher = {Chapman and Hall/CRC},
	author = {Xie, Yihui},
	year = {2015},
}

@article{verdelhan-bourgade_lucien_2020,
	title = {Lucien {Tesnière}, professeur de linguistique à {Montpellier} de 1937 à 1954. {L}’aventure d’une grammaire},
	volume = {51},
	number = {4562},
	journal = {Bulletin de l'Academie des sciences et lettres de Montpellier},
	author = {Verdelhan-Bourgade,, M.},
	month = dec,
	year = {2020},
}

@book{fellbaum_wordnet_1998,
	address = {Cambridge, Mass},
	series = {Language, speech, and communication},
	title = {{WordNet}: an electronic lexical database},
	isbn = {978-0-262-06197-1},
	shorttitle = {{WordNet}},
	publisher = {MIT Press},
	editor = {Fellbaum, Christiane},
	year = {1998},
	keywords = {Data processing, English language, Lexicology, Semantics, WordNet},
}

@article{hornik_changes_2019,
	title = {Changes on {CRAN}.},
	volume = {11},
	url = {http://journal.r-project.org/archive/2019-1/cran.pdf},
	number = {1},
	journal = {The R Journal.},
	author = {Hornik, Kurt and Ligges, Uwe and Zeileis, Achim},
	year = {2019},
	pages = {438--441},
}

@article{eichstaedt_facebook_2018,
	title = {Facebook language predicts depression in medical records},
	volume = {115},
	issn = {0027-8424, 1091-6490},
	url = {http://www.pnas.org/lookup/doi/10.1073/pnas.1802331115},
	doi = {10.1073/pnas.1802331115},
	abstract = {Depression, the most prevalent mental illness, is underdiagnosed and undertreated, highlighting the need to extend the scope of current screening methods. Here, we use language from Facebook posts of consenting individuals to predict depression recorded in electronic medical records. We accessed the history of Facebook statuses posted by 683 patients visiting a large urban academic emergency department, 114 of whom had a diagnosis of depression in their medical records. Using only the language preceding their first documentation of a diagnosis of depression, we could identify depressed patients with fair accuracy [area under the curve (AUC) = 0.69], approximately matching the accuracy of screening surveys benchmarked against medical records. Restricting Facebook data to only the 6 months immediately preceding the first documented diagnosis of depression yielded a higher prediction accuracy (AUC = 0.72) for those users who had sufficient Facebook data. Significant prediction of future depression status was possible as far as 3 months before its first documentation. We found that language predictors of depression include emotional (sadness), interpersonal (loneliness, hostility), and cognitive (preoccupation with the self, rumination) processes. Unobtrusive depression assessment through social media of consenting individuals may become feasible as a scalable complement to existing screening and monitoring procedures.},
	language = {en},
	number = {44},
	urldate = {2021-06-27},
	journal = {Proceedings of the National Academy of Sciences},
	author = {Eichstaedt, Johannes C. and Smith, Robert J. and Merchant, Raina M. and Ungar, Lyle H. and Crutchley, Patrick and Preoţiuc-Pietro, Daniel and Asch, David A. and Schwartz, H. Andrew},
	month = oct,
	year = {2018},
	pages = {11203--11208},
	file = {Texte intégral:C\:\\Users\\33623\\Zotero\\storage\\8YVA33T9\\Eichstaedt et al. - 2018 - Facebook language predicts depression in medical r.pdf:application/pdf},
}

@article{flesch_new_1948,
	title = {A new readability yardstick.},
	volume = {32},
	issn = {1939-1854, 0021-9010},
	url = {http://doi.apa.org/getdoi.cfm?doi=10.1037/h0057532},
	doi = {10.1037/h0057532},
	language = {en},
	number = {3},
	urldate = {2021-07-08},
	journal = {Journal of Applied Psychology},
	author = {Flesch, Rudolph},
	year = {1948},
	pages = {221--233},
}

@article{thompson_programming_1968,
	title = {Programming {Techniques}: {Regular} expression search algorithm},
	volume = {11},
	issn = {0001-0782, 1557-7317},
	shorttitle = {Programming {Techniques}},
	url = {https://dl.acm.org/doi/10.1145/363347.363387},
	doi = {10.1145/363347.363387},
	abstract = {A method for locating specific character strings embedded in character text is described and an implementation of this method in the form of a compiler is discussed. The compiler accepts a regular expression as source language and produces an IBM 7094 program as object language. The object program then accepts the text to be searched as input and produces a signal every time an embedded string in the text matches the given regular expression. Examples, problems, and solutions are also presented.},
	language = {en},
	number = {6},
	urldate = {2021-07-08},
	journal = {Communications of the ACM},
	author = {Thompson, Ken},
	month = jun,
	year = {1968},
	pages = {419--422},
}

@article{khan_benchmark_2021,
	title = {A benchmark study of machine learning models for online fake news detection},
	volume = {4},
	issn = {26668270},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S266682702100013X},
	doi = {10.1016/j.mlwa.2021.100032},
	language = {en},
	urldate = {2021-07-10},
	journal = {Machine Learning with Applications},
	author = {Khan, Junaed Younus and Khondaker, Md. Tawkat Islam and Afroz, Sadia and Uddin, Gias and Iqbal, Anindya},
	month = jun,
	year = {2021},
	pages = {100032},
	file = {Version soumise:C\:\\Users\\33623\\Zotero\\storage\\J7B2C38D\\Khan et al. - 2021 - A benchmark study of machine learning models for o.pdf:application/pdf},
}

@article{rauter_open_2019,
	title = {Open innovation and its effects on economic and sustainability innovation performance},
	volume = {4},
	issn = {2444569X},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S2444569X18300325},
	doi = {10.1016/j.jik.2018.03.004},
	language = {en},
	number = {4},
	urldate = {2021-07-19},
	journal = {Journal of Innovation \& Knowledge},
	author = {Rauter, Romana and Globocnik, Dietfried and Perl-Vorbach, Elke and Baumgartner, Rupert J.},
	month = oct,
	year = {2019},
	pages = {226--233},
}

@incollection{jakobson_linguistics_1981,
	title = {Linguistics and {Poetics}},
	isbn = {978-90-279-3178-8},
	url = {https://www.degruyter.com/document/doi/10.1515/9783110802122.18/html},
	urldate = {2021-07-29},
	booktitle = {Poetry of {Grammar} and {Grammar} of {Poetry}},
	publisher = {De Gruyter Mouton},
	collaborator = {Jakobson, Roman},
	month = dec,
	year = {1981},
	doi = {10.1515/9783110802122.18},
	pages = {18--51},
}

@article{torruella_lexical_2013,
	title = {Lexical {Statistics} and {Tipological} {Structures}: {A} {Measure} of {Lexical} {Richness}},
	volume = {95},
	issn = {18770428},
	shorttitle = {Lexical {Statistics} and {Tipological} {Structures}},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S1877042813041888},
	doi = {10.1016/j.sbspro.2013.10.668},
	language = {en},
	urldate = {2021-08-04},
	journal = {Procedia - Social and Behavioral Sciences},
	author = {Torruella, Joan and Capsada, Ramon},
	month = oct,
	year = {2013},
	pages = {447--454},
	file = {Texte intégral:C\:\\Users\\33623\\Zotero\\storage\\H6N79VTW\\Torruella et Capsada - 2013 - Lexical Statistics and Tipological Structures A M.pdf:application/pdf},
}

@article{eichstaedt_facebook_2018-1,
	title = {Facebook language predicts depression in medical records},
	volume = {115},
	issn = {0027-8424, 1091-6490},
	url = {http://www.pnas.org/lookup/doi/10.1073/pnas.1802331115},
	doi = {10.1073/pnas.1802331115},
	abstract = {Depression, the most prevalent mental illness, is underdiagnosed and undertreated, highlighting the need to extend the scope of current screening methods. Here, we use language from Facebook posts of consenting individuals to predict depression recorded in electronic medical records. We accessed the history of Facebook statuses posted by 683 patients visiting a large urban academic emergency department, 114 of whom had a diagnosis of depression in their medical records. Using only the language preceding their first documentation of a diagnosis of depression, we could identify depressed patients with fair accuracy [area under the curve (AUC) = 0.69], approximately matching the accuracy of screening surveys benchmarked against medical records. Restricting Facebook data to only the 6 months immediately preceding the first documented diagnosis of depression yielded a higher prediction accuracy (AUC = 0.72) for those users who had sufficient Facebook data. Significant prediction of future depression status was possible as far as 3 months before its first documentation. We found that language predictors of depression include emotional (sadness), interpersonal (loneliness, hostility), and cognitive (preoccupation with the self, rumination) processes. Unobtrusive depression assessment through social media of consenting individuals may become feasible as a scalable complement to existing screening and monitoring procedures.},
	language = {en},
	number = {44},
	urldate = {2021-08-15},
	journal = {Proceedings of the National Academy of Sciences},
	author = {Eichstaedt, Johannes C. and Smith, Robert J. and Merchant, Raina M. and Ungar, Lyle H. and Crutchley, Patrick and Preoţiuc-Pietro, Daniel and Asch, David A. and Schwartz, H. Andrew},
	month = oct,
	year = {2018},
	pages = {11203--11208},
	file = {Texte intégral:C\:\\Users\\33623\\Zotero\\storage\\5YE3KZ9C\\Eichstaedt et al. - 2018 - Facebook language predicts depression in medical r.pdf:application/pdf},
}

@article{fergadiotis_psychometric_2015,
	title = {Psychometric {Evaluation} of {Lexical} {Diversity} {Indices}: {Assessing} {Length} {Effects}},
	volume = {58},
	issn = {1092-4388, 1558-9102},
	shorttitle = {Psychometric {Evaluation} of {Lexical} {Diversity} {Indices}},
	url = {http://pubs.asha.org/doi/10.1044/2015_JSLHR-L-14-0280},
	doi = {10.1044/2015_JSLHR-L-14-0280},
	abstract = {Purpose
              
                Several novel techniques have been developed recently to assess the breadth of a speaker's vocabulary exhibited in a language sample. The specific aim of this study was to increase our understanding of the validity of the scores generated by different lexical diversity (LD) estimation techniques. Four techniques were explored:
                D
                , Maas, measure of textual lexical diversity, and moving-average type–token ratio.
              
            
            
              Method
              Four LD indices were estimated for language samples on 4 discourse tasks (procedures, eventcasts, story retell, and recounts) from 442 adults who are neurologically intact. The resulting data were analyzed using structural equation modeling.
            
            
              Results
              The scores for measure of textual lexical diversity and moving-average type–token ratio were stronger indicators of the LD of the language samples. The results for the other 2 techniques were consistent with the presence of method factors representing construct-irrelevant sources.
            
            
              Conclusion
              These findings offer a deeper understanding of the relative validity of the 4 estimation techniques and should assist clinicians and researchers in the selection of LD measures of language samples that minimize construct-irrelevant sources.},
	language = {en},
	number = {3},
	urldate = {2021-08-20},
	journal = {Journal of Speech, Language, and Hearing Research},
	author = {Fergadiotis, Gerasimos and Wright, Heather Harris and Green, Samuel B.},
	month = jun,
	year = {2015},
	pages = {840--852},
	file = {Texte intégral:C\:\\Users\\33623\\Zotero\\storage\\LWV852S5\\Fergadiotis et al. - 2015 - Psychometric Evaluation of Lexical Diversity Indic.pdf:application/pdf},
}

@article{forse_cibois_nodate,
	title = {Cibois {Philippe}, {L}'analyse factorielle.},
	author = {Forsé, Michel},
	pages = {4},
}

@article{lemoine_application_1981,
	title = {Application de l'analyse factorielle à l'étude de la croissance en hauteur des arbres : exemple du {Pin} maritime},
	volume = {38},
	url = {http://www.afs-journal.org/10.1051/forest:19810102},
	doi = {10.1051/forest:19810102},
	number = {1},
	journal = {Annales des Sciences Forestières},
	author = {Lemoine, B. and Sartolou, A.},
	year = {1981},
	pages = {31--54},
}

@article{hungerford_sheepskin_1987,
	title = {Sheepskin {Effects} in the {Returns} to {Education}},
	volume = {69},
	url = {https://www.jstor.org/stable/1937919?origin=crossref},
	doi = {10.2307/1937919},
	number = {1},
	journal = {The Review of Economics and Statistics},
	author = {Hungerford, Thomas and Solon, Gary},
	month = feb,
	year = {1987},
	pages = {175},
}

@article{malbos_analyse_nodate,
	title = {{ANALYSE} {MATRICIELLE} {ET} {ALGÈBRE} {LINÉAIRE} {APPLIQUÉE}},
	author = {Malbos, Philippe},
	pages = {245},
}

@article{newhouse_rocky_nodate,
	title = {Rocky {Mountain} {Spotted} {Fever} {In} {Georgia}, 1961-75: {Analysis} of {Social} and {Environmental} {Factors} {Affecting} {Occurrence}},
	author = {Newhouse, Verne F and Choi, Keewhan and Holman, Robert C and Thacker, Stephen B and D'Angelo, Lawrence J and Smith, J David},
	pages = {10},
}

@article{vallet_analyse_1985,
	title = {Analyse des données},
	author = {Vallet, Michel},
	year = {1985},
	pages = {8},
}

@article{blanchard_les_nodate,
	title = {Les usages de l’analyse factorielle},
	author = {Blanchard, Philippe and Patou, Charles},
	pages = {28},
}

@article{thurstone_v_1949,
	title = {V. - {L}'analyse factorielle, méthode scientifique},
	volume = {50},
	url = {https://www.persee.fr/doc/psy_0003-5033_1949_hos_50_1_8425},
	doi = {10.3406/psy.1949.8425},
	number = {1},
	journal = {L'année psychologique},
	author = {Thurstone, L. L.},
	year = {1949},
	pages = {61--75},
}

@article{benzcri_analyse_2006,
	title = {L'analyse de données : {Histoire}, {Bilan}, {Projets} et {Perspectives}},
	author = {Benzcri, Jean-Paul},
	year = {2006},
	pages = {5},
}

@article{benzecri_analyse_2006,
	title = {L'analyse de données : {Histoire}, {Bilan}, {Projets} et {Perspectives}},
	author = {Benzecri, Jean-Paul},
	year = {2006},
	pages = {5},
}

@techreport{fodor_survey_2002,
	title = {A {Survey} of {Dimension} {Reduction} {Techniques}},
	url = {http://www.osti.gov/servlets/purl/15002155-mumfPN/native/},
	author = {Fodor, I K},
	month = may,
	year = {2002},
	doi = {10.2172/15002155},
}

@article{blei_latent_nodate,
	title = {Latent {Dirichlet} {Allocation}},
	author = {Blei, David M},
	pages = {30},
}

@article{roberts_model_2016,
	title = {A {Model} of {Text} for {Experimentation} in the {Social} {Sciences}},
	volume = {111},
	url = {https://www.tandfonline.com/doi/full/10.1080/01621459.2016.1141684},
	doi = {10.1080/01621459.2016.1141684},
	number = {515},
	journal = {Journal of the American Statistical Association},
	author = {Roberts, Margaret E. and Stewart, Brandon M. and Airoldi, Edoardo M.},
	month = jul,
	year = {2016},
	pages = {988--1003},
}

@article{benzecri_analyse_2006-1,
	title = {L'analyse de données : {Histoire}, {Bilan}, {Projets} et {Perspectives}},
	language = {fr},
	author = {Benzecri, Jean-Paul},
	year = {2006},
	pages = {5},
}

@article{balech_nlp_2019,
	title = {{NLP} text mining {V4}.0 - une introduction - cours programme doctoral},
	url = {http://rgdoi.net/10.13140/RG.2.2.34248.06405},
	doi = {10.13140/RG.2.2.34248.06405},
	abstract = {The purpose of this chapter is to introduce natural language processing techniques and textual analysis, such as the developments of data mining and linguistics define it, automating it by taking advantage of the distributional properties of language. Largely automated, natural language processing techniques sequence a series of operations from the constitution of the corpus to its annotation, resulting in representation and qualification models. These methods are now widely available through the r and python language libraries. They make it possible to exploit the large corpus that digitisation makes it possible to build: consumer comments, news bases, activity reports, interview reports. The purpose of this text is essentially technical, however without giving any operating method. It indicates generic methods that can be used via r and their context of use. This is a short manual of modern textual analysis. For business research.},
	language = {en},
	urldate = {2021-08-14},
	author = {Balech, Sophie and Benavent, Christophe},
	year = {2019},
}

@article{beaudouin_retour_2016,
	title = {Retour aux origines de la statistique textuelle: {Benzécri} et l'école française d'analyse des données},
	abstract = {In this article, we have attempted to trace the history of the statistical analysis of textual data, focusing on the influence of Benzécri’s work and school, and to make explicit their theoretical positions, clearly opposed to AI and to Chomskyan linguistics. After a presentation of the intellectual project, as an inductive approach to language based on the exploration of corpora, we present the principles of correspondence analysis, which is the main method developed in the Data Analysis School, used for corpus analysis but also for many other types of datasets. Then, we will focus on textual data analysis. Based on the fact that software programmes have played a major role in the use of these statistical techniques, we shall examine a selection of these, display their specificities and their underlying theoretical bases.},
	language = {fr},
	author = {Beaudouin, Valérie},
	year = {2016},
	pages = {21},
}

@article{quijano_modeles_2008,
	title = {Modèles d'auto-organisation pour l'émergence de formes urbaines à partir de comportements individuels à {Bogota}},
	language = {fr},
	author = {Quijano, Javier Gil},
	year = {2008},
	pages = {295},
}

@article{chen_nonnegative_1984,
	title = {The nonnegative rank factorizations of nonnegative matrices},
	volume = {62},
	issn = {00243795},
	url = {https://linkinghub.elsevier.com/retrieve/pii/002437958490096X},
	doi = {10.1016/0024-3795(84)90096-X},
	language = {en},
	urldate = {2021-08-03},
	journal = {Linear Algebra and its Applications},
	author = {Chen, Ji-Cheng},
	month = nov,
	year = {1984},
	pages = {207--217},
}

@article{limem_methodes_nodate,
	title = {Méthodes informées de factorisation matricielle non-négative. {Application} à l'identification de sources de particules industrielles.},
	language = {fr},
	author = {Limem, Abdelhakim},
	pages = {232},
}

@article{gillis_why_2014,
	title = {The {Why} and {How} of {Nonnegative} {Matrix} {Factorization}},
	url = {http://arxiv.org/abs/1401.5226},
	abstract = {Nonnegative matrix factorization (NMF) has become a widely used tool for the analysis of high-dimensional data as it automatically extracts sparse and meaningful features from a set of nonnegative data vectors. We ﬁrst illustrate this property of NMF on three applications, in image processing, text mining and hyperspectral imaging –this is the why. Then we address the problem of solving NMF, which is NP-hard in general. We review some standard NMF algorithms, and also present a recent subclass of NMF problems, referred to as near-separable NMF, that can be solved eﬃciently (that is, in polynomial time), even in the presence of noise –this is the how. Finally, we brieﬂy describe some problems in mathematics and computer science closely related to NMF via the nonnegative rank.},
	language = {en},
	urldate = {2021-08-03},
	journal = {arXiv:1401.5226 [cs, math, stat]},
	author = {Gillis, Nicolas},
	month = mar,
	year = {2014},
	keywords = {Computer Science - Information Retrieval, Computer Science - Machine Learning, Statistics - Machine Learning, Mathematics - Optimization and Control},
}

@article{cazalet_nonnegative_nodate,
	title = {Nonnegative {Matrix} {Factorization} and {Financial} {Applications}},
	abstract = {Nonnegative matrix factorization (NMF) is a recent tool to analyse multivariate data. It can be compared to other decomposition methods like principal component analysis (PCA) or independent component analysis (ICA). However, NMF diﬀers from them because it requires and imposes the nonnegativity of matrices. In this paper, we use this special feature in order to identify patterns in stock market data. Indeed, we may use NMF to estimate common factors from the dynamics of stock prices. In this perspective, we compare NMF and clustering algorithms to identify endogenous equity sectors.},
	language = {en},
	author = {Cazalet, Zélia and Roncalli, Thierry},
	pages = {31},
}

@inproceedings{shu_beyond_2019,
	address = {Melbourne VIC Australia},
	title = {Beyond {News} {Contents}: {The} {Role} of {Social} {Context} for {Fake} {News} {Detection}},
	isbn = {978-1-4503-5940-5},
	shorttitle = {Beyond {News} {Contents}},
	url = {https://dl.acm.org/doi/10.1145/3289600.3290994},
	doi = {10.1145/3289600.3290994},
	abstract = {Social media is becoming popular for news consumption due to its fast dissemination, easy access, and low cost. However, it also enables the wide propagation of fake news, i.e., news with intentionally false information. Detecting fake news is an important task, which not only ensures users receive authentic information but also helps maintain a trustworthy news ecosystem. The majority of existing detection algorithms focus on finding clues from news contents, which are generally not effective because fake news is often intentionally written to mislead users by mimicking true news. Therefore, we need to explore auxiliary information to improve detection. The social context during news dissemination process on social media forms the inherent tri-relationship, the relationship among publishers, news pieces, and users, which has potential to improve fake news detection. For example, partisan-biased publishers are more likely to publish fake news, and low-credible users are more likely to share fake news. In this paper, we study the novel problem of exploiting social context for fake news detection. We propose a tri-relationship embedding framework TriFN, which models publisher-news relations and user-news interactions simultaneously for fake news classification. We conduct experiments on two real-world datasets, which demonstrate that the proposed approach significantly outperforms other baseline methods for fake news detection.},
	language = {en},
	urldate = {2021-08-03},
	booktitle = {Proceedings of the {Twelfth} {ACM} {International} {Conference} on {Web} {Search} and {Data} {Mining}},
	publisher = {ACM},
	author = {Shu, Kai and Wang, Suhang and Liu, Huan},
	month = jan,
	year = {2019},
	pages = {312--320},
}

@article{evangelopoulos_latent_2012,
	title = {Latent {Semantic} {Analysis}: five methodological recommendations},
	volume = {21},
	issn = {0960-085X, 1476-9344},
	shorttitle = {Latent {Semantic} {Analysis}},
	url = {https://www.tandfonline.com/doi/full/10.1057/ejis.2010.61},
	doi = {10.1057/ejis.2010.61},
	abstract = {The recent influx in generation, storage and availability of textual data presents researchers with the challenge of developing suitable methods for their analysis. Latent Semantic Analysis (LSA), a member of a family of methodological approaches that offers an opportunity to address this gap by describing the semantic content in textual data as a set of vectors, was pioneered by researchers in psychology, information retrieval, and bibliometrics. LSA involves a matrix operation called singular value decomposition, an extension of principal component analysis. LSA generates latent semantic dimensions that are either interpreted, if the researcher’s primary interest lies with the understanding of the thematic structure in the textual data, or used for purposes of clustering, categorisation and predictive modelling, if the interest lies with the conversion of raw text into numerical data, as a precursor to subsequent analysis. This paper reviews five methodological issues that need to be addressed by the researcher who will embark on LSA. We examine the dilemmas, present the choices, and discuss the considerations under which good methodological decisions are made. We illustrate these issues with the help of four small studies, involving the analysis of abstracts for papers published in the European Journal of Information Systems.},
	language = {en},
	number = {1},
	urldate = {2021-08-09},
	journal = {European Journal of Information Systems},
	author = {Evangelopoulos, Nicholas and Zhang, Xiaoni and Prybutok, Victor R},
	month = jan,
	year = {2012},
	pages = {70--86},
}

@article{song_genetic_2009,
	title = {Genetic algorithm for text clustering based on latent semantic indexing},
	volume = {57},
	issn = {08981221},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0898122108005300},
	doi = {10.1016/j.camwa.2008.10.010},
	abstract = {In this paper, we develop a genetic algorithm method based on a latent semantic model (GAL) for text clustering. The main difficulty in the application of genetic algorithms (GAs) for document clustering is thousands or even tens of thousands of dimensions in feature space which is typical for textual data. Because the most straightforward and popular approach represents texts with the vector space model (VSM), that is, each unique term in the vocabulary represents one dimension. Latent semantic indexing (LSI) is a successful technology in information retrieval which attempts to explore the latent semantics implied by a query or a document through representing them in a dimension-reduced space. Meanwhile, LSI takes into account the effects of synonymy and polysemy, which constructs a semantic structure in textual data. GA belongs to search techniques that can efficiently evolve the optimal solution in the reduced space. We propose a variable string length genetic algorithm which has been exploited for automatically evolving the proper number of clusters as well as providing near optimal data set clustering. GA can be used in conjunction with the reduced latent semantic structure and improve clustering efficiency and accuracy. The superiority of GAL approach over conventional GA applied in VSM model is demonstrated by providing good Reuter document clustering results.},
	language = {en},
	number = {11-12},
	urldate = {2021-08-09},
	journal = {Computers \& Mathematics with Applications},
	author = {Song, Wei and Park, Soon Cheol},
	month = jun,
	year = {2009},
	pages = {1901--1907},
}

@incollection{buntine_variational_2002,
	address = {Berlin, Heidelberg},
	title = {Variational {Extensions} to {EM} and {Multinomial} {PCA}},
	volume = {2430},
	isbn = {978-3-540-44036-9 978-3-540-36755-0},
	url = {http://link.springer.com/10.1007/3-540-36755-1_3},
	abstract = {Several authors in recent years have proposed discrete analogues to principle component analysis intended to handle discrete or positive only data, for instance suited to analyzing sets of documents. Methods include non-negative matrix factorization, probabilistic latent semantic analysis, and latent Dirichlet allocation. This paperbegins with a review of the basic theory of the variational extension to the expectationmaximization algorithm, and then presents discrete component ﬁnding algorithms in that light. Experiments are conducted on both bigram word data and document bag-of-word to expose some of the subtleties of this new class of algorithms.},
	language = {en},
	urldate = {2021-08-09},
	booktitle = {Machine {Learning}: {ECML} 2002},
	publisher = {Springer Berlin Heidelberg},
	author = {Buntine, Wray},
	editor = {Goos, Gerhard and Hartmanis, Juris and van Leeuwen, Jan and Elomaa, Tapio and Mannila, Heikki and Toivonen, Hannu},
	year = {2002},
	doi = {10.1007/3-540-36755-1_3},
	pages = {23--34},
}

@article{pochon_recherche_2020,
	title = {A la recherche d'une classification de problèmes de mathématiques scolaires: une chronique},
	language = {fr},
	author = {Pochon, Luc-Olivier and Favre, Alain},
	year = {2020},
	pages = {60},
}

@article{hassani_text_2020,
	title = {Text {Mining} using {Nonnegative} {Matrix} {Factorization} and {Latent} {Semantic} {Analysis}},
	url = {http://arxiv.org/abs/1911.04705},
	abstract = {Text clustering is arguably one of the most important topics in modern data mining. Nevertheless, text data require tokenization which usually yields a very large and highly sparse term-document matrix, which is usually diﬃcult to process using conventional machine learning algorithms. Methods such as Latent Semantic Analysis have helped mitigate this issue, but are nevertheless not completely stable in practice. As a result, we propose a new feature agglomeration method based on Nonnegative Matrix Factorization, which is employed to separate the terms into groups, and then each group’s term vectors are agglomerated into a new feature vector. Together, these feature vectors create a new feature space much more suitable for clustering. In addition, we propose a new deterministic initialization for spherical K-Means, which proves very useful for this speciﬁc type of data. In order to evaluate the proposed method, we compare it to some of the latest research done in this ﬁeld, as well as some of the most practiced methods. In our experiments, we conclude that the proposed method either signiﬁcantly improves clustering performance, or maintains the performance of other methods, while improving stability in results.},
	language = {en},
	urldate = {2021-08-14},
	journal = {arXiv:1911.04705 [cs, stat]},
	author = {Hassani, Ali and Iranmanesh, Amir and Mansouri, Najme},
	month = feb,
	year = {2020},
	keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
}

@article{vavasis_complexity_2007,
	title = {On the complexity of nonnegative matrix factorization},
	url = {http://arxiv.org/abs/0708.4149},
	abstract = {Nonnegative matrix factorization (NMF) has become a prominent technique for the analysis of image databases, text databases and other information retrieval and clustering applications. In this report, we deﬁne an exact version of NMF. Then we establish several results about exact NMF: (1) that it is equivalent to a problem in polyhedral combinatorics; (2) that it is NP-hard; and (3) that a polynomial-time local search heuristic exists.},
	language = {en},
	urldate = {2021-08-14},
	journal = {arXiv:0708.4149 [cs]},
	author = {Vavasis, Stephen A.},
	month = sep,
	year = {2007},
	keywords = {Computer Science - Information Retrieval, G.1.3, H.3.3, Mathematics - Numerical Analysis},
}

@article{lee_algorithms_nodate,
	title = {Algorithms for {Non}-negative {Matrix} {Factorization}},
	abstract = {Non-negative matrix factorization (NMF) has previously been shown to be a useful decomposition for multivariate data. Two different multiplicative algorithms for NMF are analyzed. They differ only slightly in the multiplicative factor used in the update rules. One algorithm can be shown to minimize the conventional least squares error while the other minimizes the generalized Kullback-Leibler divergence. The monotonic convergence of both algorithms can be proven using an auxiliary function analogous to that used for proving convergence of the ExpectationMaximization algorithm. The algorithms can also be interpreted as diagonally rescaled gradient descent, where the rescaling factor is optimally chosen to ensure convergence.},
	language = {en},
	author = {Lee, Daniel D and Seung, H Sebastian},
	pages = {7},
}

@inproceedings{li_documents_2014,
	address = {Lanzhou, China},
	title = {Documents clustering based on max-correntropy nonnegative matrix factorization},
	isbn = {978-1-4799-4215-2 978-1-4799-4216-9 978-1-4799-4217-6},
	url = {http://ieeexplore.ieee.org/document/7009720/},
	doi = {10.1109/ICMLC.2014.7009720},
	abstract = {Nonnegative matrix factorization (NMF) has been successfully applied to many areas for classiﬁcation and clustering. Commonly-used NMF algorithms mainly target on minimizing the l2 distance or Kullback-Leibler (KL) divergence, which may not be suitable for nonlinear case. In this paper, we propose a new decomposition method by maximizing the correntropy between the original and the product of two low-rank matrices for document clustering. This method also allows us to learn the new basis vectors of the semantic feature space from the data. To our knowledge, we haven’t seen any work has been done by maximizing correntropy in NMF to cluster high dimensional document data. Our experiment results show the supremacy of our proposed method over other variants of NMF algorithm on Reuters21578 and TDT2 databasets.},
	language = {en},
	urldate = {2021-08-14},
	booktitle = {2014 {International} {Conference} on {Machine} {Learning} and {Cybernetics}},
	publisher = {IEEE},
	author = {Li, Le and Yang, Jianjun and Xu, Yang and Qin, Zhen and Zhang, Honggang},
	month = jul,
	year = {2014},
	pages = {850--855},
}

@article{votte_algorithmes_nodate,
	title = {Algorithmes de factorisation en matrices non-ne´gatives fonde´e sur la β-divergence},
	abstract = {This paper describes algorithms for nonnegative matrix factorization (NMF) with the β-divergence (β-NMF). The β-divergence is a family of cost functions parametrized by a single shape parameter β that takes the Euclidean distance, the Kullback-Leibler divergence and the Itakura-Saito divergence as special cases (β = 2, 1, 0 respectively). The proposed algorithms are based on a surrogate auxiliary function (an upper bound of the objective function constructed locally). We ﬁrst describe a majorization-minimization (MM) algorithm that leads to multiplicative updates. Then we introduce the concept of majorization-equalization (ME) algorithm which produces updates that move along constant level sets of the auxiliary function and lead to larger steps than MM. Simulations illustrate the faster convergence of the ME approach.},
	language = {fr},
	author = {Votte, Cedric FE and Idier, Jerome},
	pages = {4},
}

@article{shitov_nonnegative_2017,
	title = {The nonnegative rank of a matrix: {Hard} problems, easy solutions},
	shorttitle = {The nonnegative rank of a matrix},
	url = {http://arxiv.org/abs/1605.04000},
	abstract = {Using elementary linear algebra, we develop a technique that leads to solutions of two widely known problems on nonnegative matrices. First, we give a short proof of the result by Vavasis stating that the nonnegative rank of a matrix is NP-hard to compute. This proof is essentially contained in the paper by Jiang and Ravikumar, who discussed this topic in diﬀerent terms ﬁfteen years before the work of Vavasis. Secondly, we present a solution of the Cohen–Rothblum problem on rational nonnegative factorizations, which was posed in 1993 and remained open.},
	language = {en},
	urldate = {2021-08-14},
	journal = {arXiv:1605.04000 [cs, math]},
	author = {Shitov, Yaroslav},
	month = dec,
	year = {2017},
	keywords = {Computer Science - Computational Complexity, Mathematics - Combinatorics},
}

@article{zurada_nonnegative_2013,
	title = {Nonnegative {Matrix} {Factorization} and {Its} {Application} to {Pattern} {Analysis} and {Text} {Mining}},
	abstract = {Nonnegative Matrix Factorization (NMF) is one of the most promising techniques to reduce the dimensionality of the data. This presentation compares the method with other popular matrix decomposition approaches for various pattern analysis tasks. Among others, NMF has been also widely applied for clustering and latent feature extraction. Several types of the objective functions have been used for NMF in the literature. Instead of minimizing the common Euclidean Distance (EucD) error, we review an alternative method that maximizes the correntropy similarity measure to produce the factorization. Correntropy is an entropy-based criterion defined as a nonlinear similarity measure. Following the discussion of maximization of the correntropy function, we use it to cluster document data set and compare the clustering performance with the EucD-based NMF. Our approach was applied and illustrated for the clustering of documents in the 20-Newsgroups data set. The comparison is illustrated with 20-Newsgroups data set. The results show that our approach produces per average better clustering compared with other methods which use EucD as an objective function.},
	language = {en},
	author = {Zurada, Jacek M and Ensari, Tolga and Asl, Ehsan Hosseini and Chorowski, Jan},
	year = {2013},
	pages = {6},
}

@article{gaujoux_generating_nodate,
	title = {Generating heatmaps for {Nonnegative} {Matrix} {Factorization}},
	abstract = {This vignette describes how to produce diﬀerent informative heatmaps from NMF objects, such as returned by the function nmf in the NMF package1 (Gaujoux et al. 2010). The main drawing engine is implemented by the function aheatmap, which is a highly enhanced modiﬁcation of the function pheatmap from the pheatmap package2, and provides convenient and quick ways of producing high quality and customizable annotated heatmaps. Currently this function is part of the package NMF , but may eventually compose a separate package on its own.},
	language = {en},
	author = {Gaujoux, Renaud},
	pages = {12},
}

@article{gaujoux_flexible_2010,
	title = {A flexible {R} package for nonnegative matrix factorization},
	volume = {11},
	issn = {1471-2105},
	url = {https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-11-367},
	doi = {10.1186/1471-2105-11-367},
	language = {en},
	number = {1},
	urldate = {2021-08-15},
	journal = {BMC Bioinformatics},
	author = {Gaujoux, Renaud and Seoighe, Cathal},
	month = dec,
	year = {2010},
	pages = {367},
}

@misc{vaswani_attention_2017,
	title = {Attention {Is} {All} {You} {Need}},
	url = {http://arxiv.org/abs/1706.03762},
	abstract = {The dominant sequence transduction models are based on complex recurrent or convolutional neural networks in an encoder-decoder configuration. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. Our model achieves 28.4 BLEU on the WMT 2014 English-to-German translation task, improving over the existing best results, including ensembles by over 2 BLEU. On the WMT 2014 English-to-French translation task, our model establishes a new single-model state-of-the-art BLEU score of 41.8 after training for 3.5 days on eight GPUs, a small fraction of the training costs of the best models from the literature. We show that the Transformer generalizes well to other tasks by applying it successfully to English constituency parsing both with large and limited training data.},
	urldate = {2022-06-20},
	publisher = {arXiv},
	author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N. and Kaiser, Lukasz and Polosukhin, Illia},
	month = dec,
	year = {2017},
	note = {Number: arXiv:1706.03762
arXiv:1706.03762 [cs]},
	keywords = {Computer Science - Computation and Language, Computer Science - Machine Learning},
	file = {arXiv Fulltext PDF:C\:\\Users\\33623\\Zotero\\storage\\GWCRVHKN\\Vaswani et al. - 2017 - Attention Is All You Need.pdf:application/pdf;arXiv.org Snapshot:C\:\\Users\\33623\\Zotero\\storage\\9ZSHJ9WD\\1706.html:text/html},
}