From c3747bc598892602366419d7c3affefca396155c Mon Sep 17 00:00:00 2001 From: tswsxk Date: Thu, 26 Sep 2019 14:27:33 +0800 Subject: [PATCH 1/9] =?UTF-8?q?junyi=E6=95=B0=E6=8D=AE=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- EduData/DataSet/junyi/KnowledgeTracing.py | 82 +++++++++++++++------ EduData/DataSet/junyi/junyi.py | 9 +-- EduData/Task/KnowledgeTracing/statistics.py | 27 +++++++ 3 files changed, 89 insertions(+), 29 deletions(-) create mode 100644 EduData/Task/KnowledgeTracing/statistics.py diff --git a/EduData/DataSet/junyi/KnowledgeTracing.py b/EduData/DataSet/junyi/KnowledgeTracing.py index 4c80458..f1ecb07 100644 --- a/EduData/DataSet/junyi/KnowledgeTracing.py +++ b/EduData/DataSet/junyi/KnowledgeTracing.py @@ -5,12 +5,11 @@ import json from longling import wf_open +from longling.lib.candylib import as_list from tqdm import tqdm -from EduData.Tools import train_valid_test - -def extract_students_log(source, target, ku_dict): +def _read(source, ku_dict): """require big memory to run this function""" outcome = { @@ -35,39 +34,74 @@ def extract_students_log(source, target, ku_dict): students[student][session] = [] students[student][session].append([int(timestamp), exercise, correct]) + return students + +def _write(students, target): with wf_open(target) as wf: - for student_id, sessions in tqdm(students.items(), "sorting"): + for student_id, sessions in tqdm(students.items(), "writing"): for session_id, exercises in sessions.items(): exercises.sort(key=lambda x: x[0]) exercise_response = [(exercise[1], exercise[2]) for exercise in exercises] print(json.dumps(exercise_response), file=wf) -def select_n_most_frequent_students(source, target, n=1000): +def extract_students_log(source, target, ku_dict): + students = _read(source, ku_dict) + _write(students, target) + + +def _frequency(students): + frequency = {} + for student_id, sessions in tqdm(students.items(), "calculating frequency"): + frequency[student_id] = sum([len(session) for session in sessions]) + return sorted(frequency.items(), key=lambda x: x[1], reverse=True) - pass + +def get_n_most_frequent_students(students, n, frequency=None): + frequency = _frequency(students) if frequency is None else frequency + _students = {} + for _id, _ in frequency[:n]: + _students[_id] = students[_id] + return _students + + +def select_n_most_frequent_students(source, target_prefix, ku_dict, n): + n_list = as_list(n) + students = _read(source, ku_dict) + frequency = _frequency(students) + for _n in n_list: + _write(get_n_most_frequent_students(students, _n, frequency), target_prefix + "%s" % _n) if __name__ == '__main__': - root = "../../" - student_log_raw_file = root + "raw_data/junyi/junyi_ProblemLog_for_PSLC.txt" - student_log_file = root + "data/junyi/student_log_kt.json" + root = "../../../" + student_log_raw_file = root + "data/junyi/junyi_ProblemLog_for_PSLC.txt" + # student_log_file = root + "data/junyi/student_log_kt.json" ku_dict_file = root + "data/junyi/graph_vertex.json" + + select_n_most_frequent_students( + student_log_raw_file, + root + "data/junyi/student_log_kt_", + ku_dict_file, + [100, 200, 300] + ) + # [500, 1000, 2000] + # extract_students_log(student_log_raw_file, student_log_file, ku_dict_file) - student_log_file_small = student_log_file + ".small" - - with open(student_log_file) as f, wf_open(student_log_file_small) as wf: - for i, line in tqdm(enumerate(f)): - if i > 50000: - break - print(line, end="", file=wf) - - print(train_valid_test( - student_log_file_small, - valid_ratio=0., - test_ratio=0.2, - root_dir=root + "data/junyi/", - silent=False, - )) + # student_log_file_small = student_log_file + ".small" + # + # with open(student_log_file) as f, wf_open(student_log_file_small) as wf: + # for i, line in tqdm(enumerate(f)): + # if i > 50000: + # break + # print(line, end="", file=wf) + # + # print(train_valid_test( + # student_log_file_small, + # valid_ratio=0., + # test_ratio=0.2, + # root_dir=root + "data/junyi/", + # silent=False, + # )) diff --git a/EduData/DataSet/junyi/junyi.py b/EduData/DataSet/junyi/junyi.py index 33a083a..9200de0 100644 --- a/EduData/DataSet/junyi/junyi.py +++ b/EduData/DataSet/junyi/junyi.py @@ -87,18 +87,17 @@ def extract_similarity(source, target, ku_dict): if __name__ == '__main__': - root = "../../" - raw_file = root + "raw_data/junyi/junyi_Exercise_table.csv" + root = "../../../" + raw_file = root + "data/junyi/junyi_Exercise_table.csv" ku_dict_file = root + "data/junyi/graph_vertex.json" prerequisite_file = root + "data/junyi/prerequisite.json" similarity_raw_files = [ - root + "raw_data/junyi/relationship_annotation_{}.csv".format(name) for name in ["testing", "training"] + root + "data/junyi/relationship_annotation_{}.csv".format(name) for name in ["testing", "training"] ] similarity_raw_file = root + "raw_data/junyi/relationship_annotation.csv" similarity_file = root + "data/junyi/similarity.json" # merge_relationship_annotation(similarity_raw_files, similarity_raw_file) - - # build_ku_dict(raw_file, ku_dict_file) + build_ku_dict(raw_file, ku_dict_file) extract_prerequisite(raw_file, prerequisite_file, ku_dict_file) # extract_similarity(similarity_raw_file, similarity_file, ku_dict_file) diff --git a/EduData/Task/KnowledgeTracing/statistics.py b/EduData/Task/KnowledgeTracing/statistics.py new file mode 100644 index 0000000..e716fb0 --- /dev/null +++ b/EduData/Task/KnowledgeTracing/statistics.py @@ -0,0 +1,27 @@ +# coding: utf-8 +# 2019/8/24 @ tongshiwei + +__all__ = ["analysis_records"] + +from tqdm import tqdm +import json + + +def analysis_records(source): + ku_set = set() + records_num = 0 + seq_count = 0 + correct_num = 0 + with open(source) as f: + for line in tqdm(f, "doing statistics"): + seq_count += 1 + responses = json.loads(line) + records_num += len(responses) + correct_num += len([r[1] for r in responses if int(r[1]) == 1]) + ku_set.update(set([_id for _id, _ in responses])) + + print("in %s" % source) + print("knowledge units number: %s" % len(ku_set)) + print("records number: %s" % records_num) + print("correct records number: %s" % correct_num) + print("the number of sequence: %s" % seq_count) From 87b78be0f27342df49173fd2e19c6ede7f43d1fb Mon Sep 17 00:00:00 2001 From: tswsxk Date: Thu, 26 Sep 2019 14:27:47 +0800 Subject: [PATCH 2/9] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E5=88=86=E6=9E=90=E5=B7=A5=E5=85=B7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- EduData/Tools/train_valid_test.py | 10 +++++----- EduData/main.py | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/EduData/Tools/train_valid_test.py b/EduData/Tools/train_valid_test.py index 11f6bfd..7e7c082 100644 --- a/EduData/Tools/train_valid_test.py +++ b/EduData/Tools/train_valid_test.py @@ -7,6 +7,7 @@ from longling.ML.toolkit.dataset.splitter import train_valid_test import random import math +from tqdm import tqdm def KFold(filename, train_prefix, valid_prefix, n_splits=5, shuffle=False): @@ -16,19 +17,18 @@ def KFold(filename, train_prefix, valid_prefix, n_splits=5, shuffle=False): if shuffle is True: random.shuffle(indices) - proportion = sample_num / n_splits - step = math.floor(proportion * sample_num) + step = math.ceil(sample_num / n_splits) indices_buckets = [ (i, i + step) for i in range(0, sample_num, step) ] train_wfs = [ - io.open(train_prefix + index, "w", encoding="utf-8") for index in range(n_splits) + io.open(train_prefix + str(index), "w", encoding="utf-8") for index in range(n_splits) ] valid_wfs = [ - io.open(valid_prefix + index, "w", encoding="utf-8") for index in range(n_splits) + io.open(valid_prefix + str(index), "w", encoding="utf-8") for index in range(n_splits) ] with open(filename) as f: - for line_no, line in enumerate(f): + for line_no, line in tqdm(enumerate(f), "splitting dataset"): for idx, (start, end) in enumerate(indices_buckets): if start <= line_no < end: print(line, end="", file=valid_wfs[idx]) diff --git a/EduData/main.py b/EduData/main.py index dadcf83..7693e22 100644 --- a/EduData/main.py +++ b/EduData/main.py @@ -5,6 +5,8 @@ from EduData.DataSet.download_data.download_data import get_data as download, list_resources as ls from EduData.Task.KnowledgeTracing.format import tl2json, json2tl +from EduData.Task.KnowledgeTracing.statistics import analysis_records as kt_stat +from EduData.Tools.train_valid_test import train_valid_test, KFold as kfold if __name__ == '__main__': fire.Fire() From 471f97ece996a69dd4eae54fcb2f98e1b1736240 Mon Sep 17 00:00:00 2001 From: tswsxk Date: Thu, 26 Sep 2019 14:30:50 +0800 Subject: [PATCH 3/9] =?UTF-8?q?=E5=91=BD=E4=BB=A4=E8=A1=8C=E6=8E=A5?= =?UTF-8?q?=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- EduData/main.py | 7 ++++++- README.md | 8 +++++++- setup.py | 2 +- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/EduData/main.py b/EduData/main.py index 7693e22..902925a 100644 --- a/EduData/main.py +++ b/EduData/main.py @@ -8,5 +8,10 @@ from EduData.Task.KnowledgeTracing.statistics import analysis_records as kt_stat from EduData.Tools.train_valid_test import train_valid_test, KFold as kfold -if __name__ == '__main__': + +def cli(): fire.Fire() + + +if __name__ == '__main__': + cli() diff --git a/README.md b/README.md index 70b544c..e1545a1 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Convenient interface for downloading and preprocessing dataset in education ## Installation ```bash -python setup.py install +pip install -e . ``` ## DataSet @@ -13,3 +13,9 @@ python setup.py install ```bash ``` + +## Tools + +```bash +edudata kt_stat $Dataset +``` diff --git a/setup.py b/setup.py index 406d04e..b202fdc 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ ], # And any other dependencies foo needs entry_points={ "console_scripts": [ - "edudata = EduData.main:__main__", + "edudata = EduData.main:cli", ], }, ) From 5b960781d542dd6f901ae44edcb1d8062039c0a5 Mon Sep 17 00:00:00 2001 From: tswsxk Date: Thu, 26 Sep 2019 14:38:19 +0800 Subject: [PATCH 4/9] fix bug --- EduData/Tools/train_valid_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/EduData/Tools/train_valid_test.py b/EduData/Tools/train_valid_test.py index 7e7c082..34c6386 100644 --- a/EduData/Tools/train_valid_test.py +++ b/EduData/Tools/train_valid_test.py @@ -1,10 +1,10 @@ # coding: utf-8 # create by tongshiwei on 2019-7-5 -__all__ = ["train_valid_test"] +__all__ = ["train_valid_test", "KFold"] import io -from longling.ML.toolkit.dataset.splitter import train_valid_test +from longling.ML.toolkit.dataset import train_valid_test import random import math from tqdm import tqdm From 4b7c455e04dcd6f5bcb31d12ecaf77df47a54122 Mon Sep 17 00:00:00 2001 From: tswsxk Date: Thu, 26 Sep 2019 15:01:04 +0800 Subject: [PATCH 5/9] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=8F=90=E7=A4=BA?= =?UTF-8?q?=E4=BF=A1=E6=81=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- EduData/DataSet/junyi/KnowledgeTracing.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/EduData/DataSet/junyi/KnowledgeTracing.py b/EduData/DataSet/junyi/KnowledgeTracing.py index f1ecb07..65b5746 100644 --- a/EduData/DataSet/junyi/KnowledgeTracing.py +++ b/EduData/DataSet/junyi/KnowledgeTracing.py @@ -39,7 +39,7 @@ def _read(source, ku_dict): def _write(students, target): with wf_open(target) as wf: - for student_id, sessions in tqdm(students.items(), "writing"): + for student_id, sessions in tqdm(students.items(), "writing -> %s" % target): for session_id, exercises in sessions.items(): exercises.sort(key=lambda x: x[0]) exercise_response = [(exercise[1], exercise[2]) for exercise in exercises] @@ -58,10 +58,11 @@ def _frequency(students): return sorted(frequency.items(), key=lambda x: x[1], reverse=True) -def get_n_most_frequent_students(students, n, frequency=None): +def get_n_most_frequent_students(students, n=None, frequency=None): frequency = _frequency(students) if frequency is None else frequency + __frequency = frequency if n is None else frequency[:n] _students = {} - for _id, _ in frequency[:n]: + for _id, _ in __frequency: _students[_id] = students[_id] return _students @@ -84,8 +85,15 @@ def select_n_most_frequent_students(source, target_prefix, ku_dict, n): student_log_raw_file, root + "data/junyi/student_log_kt_", ku_dict_file, - [100, 200, 300] + [None] ) + + # select_n_most_frequent_students( + # student_log_raw_file, + # root + "data/junyi/student_log_kt_", + # ku_dict_file, + # [100, 200, 300] + # ) # [500, 1000, 2000] # extract_students_log(student_log_raw_file, student_log_file, ku_dict_file) From 00ae2b43205e0ccb4be6c005e1208d4eb37e280a Mon Sep 17 00:00:00 2001 From: tswsxk Date: Wed, 13 Nov 2019 11:05:41 +0800 Subject: [PATCH 6/9] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E9=9A=BE=E5=BA=A6?= =?UTF-8?q?=E5=85=B3=E7=B3=BB=E8=A7=A3=E7=A6=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- EduData/DataSet/junyi/junyi.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/EduData/DataSet/junyi/junyi.py b/EduData/DataSet/junyi/junyi.py index 9200de0..58be4e4 100644 --- a/EduData/DataSet/junyi/junyi.py +++ b/EduData/DataSet/junyi/junyi.py @@ -86,6 +86,24 @@ def extract_similarity(source, target, ku_dict): json.dump(similarity, wf, indent=2) +def extract_difficulty(source, target, ku_dict): + """ + In target: (A, B, v) means A is similar with B in v degree. + If v is small, A and B should be considered as not similar. + """ + difficulty = [] + with codecs.open(source, encoding="utf-8") as f, open(ku_dict) as kf, wf_open(target) as wf: + f.readline() + ku_dict = json.load(kf) + for line in csv.reader(f): + difficulty.append((ku_dict[line[0]], ku_dict[line[1]], float(line[4]))) + + logger.info("edges: %s" % len(difficulty)) + + logger.info(pandas.Series([sim[-1] for sim in difficulty]).describe()) + json.dump(difficulty, wf, indent=2) + + if __name__ == '__main__': root = "../../../" raw_file = root + "data/junyi/junyi_Exercise_table.csv" @@ -96,8 +114,10 @@ def extract_similarity(source, target, ku_dict): ] similarity_raw_file = root + "raw_data/junyi/relationship_annotation.csv" similarity_file = root + "data/junyi/similarity.json" + difficulty_file = root + "data/junyi/difficulty.json" # merge_relationship_annotation(similarity_raw_files, similarity_raw_file) - build_ku_dict(raw_file, ku_dict_file) - extract_prerequisite(raw_file, prerequisite_file, ku_dict_file) - # extract_similarity(similarity_raw_file, similarity_file, ku_dict_file) + # build_ku_dict(raw_file, ku_dict_file) + # extract_prerequisite(raw_file, prerequisite_file, ku_dict_file) + extract_similarity(similarity_raw_file, similarity_file, ku_dict_file) + # extract_difficulty(similarity_raw_file, difficulty_file, ku_dict_file) From a9047618a852c594d6ee8e90c720a70cf68c1c7a Mon Sep 17 00:00:00 2001 From: tswsxk Date: Wed, 13 Nov 2019 14:41:00 +0800 Subject: [PATCH 7/9] =?UTF-8?q?=E5=AE=8C=E5=96=84=E8=AF=B4=E6=98=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 109 +++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 100 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index e1545a1..a41cfe5 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,112 @@ # EduData -Convenient interface for downloading and preprocessing dataset in education +Convenient interface for downloading and preprocessing dataset in education. -# Tutorial +The dataset includes: -## Installation -```bash +* [KDD Cup 2010](https://pslcdatashop.web.cmu.edu/KDDCup/downloads.jsp) + +* [ASSISTments](https://sites.google.com/site/assistmentsdata/) + +* [OLI Engineering Statics 2011](https://pslcdatashop.web.cmu.edu/DatasetInfo?datasetId=507) + +* [JunyiAcademy Math Practicing Log](https://pslcdatashop.web.cmu.edu/DatasetInfo?datasetId=1198) + +* [slepemapy.cz](https://www.fi.muni.cz/adaptivelearning/?a=data) + +* [synthetic](https://github.com/chrispiech/DeepKnowledgeTracing/tree/master/data/synthetic) + +Your can also visit our datashop [BaseData](http://base.ustc.edu.cn/data/) to get those mentioned-above (most of them) dataset. + +## Tutorial + +### Installation +Git and install by `pip` + +```shell pip install -e . ``` -## DataSet +### CLI +```shell +edudata $subcommand $parameters1 $parameters2 +``` + +To see the `help` information: +```shell +edudata -- --help +edudata $subcommand --help +``` + +The cli tools is constructed based on [fire](https://github.com/google/python-fire). +Refer to the [documentation](https://github.com/google/python-fire/blob/master/docs/using-cli.md) for detailed usage. + +#### Download Dataset +Before downloading dataset, first check the available dataset: +```shell +edudata ls +``` + +Download the dataset by specifying the name of dataset: +```shell +edudata download assistment-2009-2010-skill +``` + +#### Task Specified Tools + +##### Knowledge Tracing + +###### Format converter +In Knowledge Tracing task, there is a popular format (we named it ``) to represent the interaction sequence records: +```text +15 +1,1,1,1,7,7,9,10,10,10,10,11,11,45,54 +0,1,1,1,1,1,0,0,1,1,1,1,1,0,0 +``` +which can be found in [Deep Knowledge Tracing](https://github.com/chrispiech/DeepKnowledgeTracing/tree/master/data/assistments). + +In order to deal with the issue that some special symbols are hard to be stored in the mentioned-above format, +we offer another one format, named `json sequence` to represent the interaction sequence records: +```json +[[419, 1], [419, 1], [419, 1], [665, 0], [665, 0]] +``` + +Each item in the sequence represent one interaction. The first element of the item is the exercise +(some works call it knowledge unit or knowledge item) id +and the second one indicates whether the learner correctly answer the exercise, 0 for wrongly while 1 for correctly +One line, one `json` record, which is corresponded to a learner's interaction sequence. + +We provide tools for converting two format: +```shell +# convert tl sequence to json sequence +edudata tl2json $src $tar +# convert json sequence to tl sequence +edudata json2tl $src $tar +``` + +###### Dataset Preprocess +The cli tools to quickly convert the "raw" data of the dataset into "mature" data for knowledge tracing task. +The "mature" data is in `json sequence` format +and can be modeled by [XKT](https://github.com/bigdata-ustc/XKT) and TKT(TBA) + +TBA + +###### Analysis Dataset +This tool only supports the `json sequence` format. To check the following statical indexes of the dataset: -```bash +* knowledge units number +* correct records number +* the number of sequence +```shell +edudata kt_stat $filename ``` -## Tools +#### Evaluation +In order to better verify the effectiveness of model, +the dataset is usually divided into `train/valid/test` or using `kfold` method. -```bash -edudata kt_stat $Dataset +```shell +edudata longling train_valid_test $filename1 $filename2 -- --train_ratio 0.8 --valid_ratio 0.1 --test_ratio 0.1 +longling kfold $filename1 $filename2 -- --n_splits 5 ``` +Refer to [longling](https://longling.readthedocs.io/zh/latest/#dataset) for more tools and detailed information. From 859a85e25269c27dbc77c2a690aac0967d159b6f Mon Sep 17 00:00:00 2001 From: tswsxk Date: Wed, 13 Nov 2019 15:01:32 +0800 Subject: [PATCH 8/9] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E8=AF=B4=E6=98=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index a41cfe5..5d9451d 100644 --- a/README.md +++ b/README.md @@ -56,11 +56,11 @@ edudata download assistment-2009-2010-skill ##### Knowledge Tracing ###### Format converter -In Knowledge Tracing task, there is a popular format (we named it ``) to represent the interaction sequence records: +In Knowledge Tracing task, there is a popular format (we named it `triple line (tl)` format) to represent the interaction sequence records: ```text -15 -1,1,1,1,7,7,9,10,10,10,10,11,11,45,54 -0,1,1,1,1,1,0,0,1,1,1,1,1,0,0 +5 +419,419,419,665,665 +1,1,1,0,0 ``` which can be found in [Deep Knowledge Tracing](https://github.com/chrispiech/DeepKnowledgeTracing/tree/master/data/assistments). From 9bf7eaec93c7c2a48b7dd93256f73513b3bd7b11 Mon Sep 17 00:00:00 2001 From: tswsxk Date: Wed, 13 Nov 2019 15:06:34 +0800 Subject: [PATCH 9/9] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E8=AF=B4=E6=98=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 5d9451d..b7117cc 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,11 @@ In Knowledge Tracing task, there is a popular format (we named it `triple line ( 1,1,1,0,0 ``` which can be found in [Deep Knowledge Tracing](https://github.com/chrispiech/DeepKnowledgeTracing/tree/master/data/assistments). +In this format, three lines are composed of an interaction sequence. +The first line indicates the length of the interaction sequence, +and the second line represents the exercise id followed by the third line, +where each elements stands for correct answer (i.e., 1) or wrong answer (i.e., 0) + In order to deal with the issue that some special symbols are hard to be stored in the mentioned-above format, we offer another one format, named `json sequence` to represent the interaction sequence records: