From c3747bc598892602366419d7c3affefca396155c Mon Sep 17 00:00:00 2001
From: tswsxk <tongsw@mail.ustc.edu.cn>
Date: Thu, 26 Sep 2019 14:27:33 +0800
Subject: [PATCH 1/9] =?UTF-8?q?junyi=E6=95=B0=E6=8D=AE=E5=A4=84=E7=90=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 EduData/DataSet/junyi/KnowledgeTracing.py   | 82 +++++++++++++++------
 EduData/DataSet/junyi/junyi.py              |  9 +--
 EduData/Task/KnowledgeTracing/statistics.py | 27 +++++++
 3 files changed, 89 insertions(+), 29 deletions(-)
 create mode 100644 EduData/Task/KnowledgeTracing/statistics.py

diff --git a/EduData/DataSet/junyi/KnowledgeTracing.py b/EduData/DataSet/junyi/KnowledgeTracing.py
index 4c80458..f1ecb07 100644
--- a/EduData/DataSet/junyi/KnowledgeTracing.py
+++ b/EduData/DataSet/junyi/KnowledgeTracing.py
@@ -5,12 +5,11 @@
 import json
 
 from longling import wf_open
+from longling.lib.candylib import as_list
 from tqdm import tqdm
 
-from EduData.Tools import train_valid_test
 
-
-def extract_students_log(source, target, ku_dict):
+def _read(source, ku_dict):
     """require big memory to run this function"""
 
     outcome = {
@@ -35,39 +34,74 @@ def extract_students_log(source, target, ku_dict):
                 students[student][session] = []
 
             students[student][session].append([int(timestamp), exercise, correct])
+    return students
+
 
+def _write(students, target):
     with wf_open(target) as wf:
-        for student_id, sessions in tqdm(students.items(), "sorting"):
+        for student_id, sessions in tqdm(students.items(), "writing"):
             for session_id, exercises in sessions.items():
                 exercises.sort(key=lambda x: x[0])
                 exercise_response = [(exercise[1], exercise[2]) for exercise in exercises]
                 print(json.dumps(exercise_response), file=wf)
 
 
-def select_n_most_frequent_students(source, target, n=1000):
+def extract_students_log(source, target, ku_dict):
+    students = _read(source, ku_dict)
+    _write(students, target)
+
+
+def _frequency(students):
+    frequency = {}
+    for student_id, sessions in tqdm(students.items(), "calculating frequency"):
+        frequency[student_id] = sum([len(session) for session in sessions])
+    return sorted(frequency.items(), key=lambda x: x[1], reverse=True)
 
-    pass
+
+def get_n_most_frequent_students(students, n, frequency=None):
+    frequency = _frequency(students) if frequency is None else frequency
+    _students = {}
+    for _id, _ in frequency[:n]:
+        _students[_id] = students[_id]
+    return _students
+
+
+def select_n_most_frequent_students(source, target_prefix, ku_dict, n):
+    n_list = as_list(n)
+    students = _read(source, ku_dict)
+    frequency = _frequency(students)
+    for _n in n_list:
+        _write(get_n_most_frequent_students(students, _n, frequency), target_prefix + "%s" % _n)
 
 
 if __name__ == '__main__':
-    root = "../../"
-    student_log_raw_file = root + "raw_data/junyi/junyi_ProblemLog_for_PSLC.txt"
-    student_log_file = root + "data/junyi/student_log_kt.json"
+    root = "../../../"
+    student_log_raw_file = root + "data/junyi/junyi_ProblemLog_for_PSLC.txt"
+    # student_log_file = root + "data/junyi/student_log_kt.json"
     ku_dict_file = root + "data/junyi/graph_vertex.json"
+
+    select_n_most_frequent_students(
+        student_log_raw_file,
+        root + "data/junyi/student_log_kt_",
+        ku_dict_file,
+        [100, 200, 300]
+    )
+    # [500, 1000, 2000]
+
     # extract_students_log(student_log_raw_file, student_log_file, ku_dict_file)
 
-    student_log_file_small = student_log_file + ".small"
-
-    with open(student_log_file) as f, wf_open(student_log_file_small) as wf:
-        for i, line in tqdm(enumerate(f)):
-            if i > 50000:
-                break
-            print(line, end="", file=wf)
-
-    print(train_valid_test(
-        student_log_file_small,
-        valid_ratio=0.,
-        test_ratio=0.2,
-        root_dir=root + "data/junyi/",
-        silent=False,
-    ))
+    # student_log_file_small = student_log_file + ".small"
+    #
+    # with open(student_log_file) as f, wf_open(student_log_file_small) as wf:
+    #     for i, line in tqdm(enumerate(f)):
+    #         if i > 50000:
+    #             break
+    #         print(line, end="", file=wf)
+    #
+    # print(train_valid_test(
+    #     student_log_file_small,
+    #     valid_ratio=0.,
+    #     test_ratio=0.2,
+    #     root_dir=root + "data/junyi/",
+    #     silent=False,
+    # ))
diff --git a/EduData/DataSet/junyi/junyi.py b/EduData/DataSet/junyi/junyi.py
index 33a083a..9200de0 100644
--- a/EduData/DataSet/junyi/junyi.py
+++ b/EduData/DataSet/junyi/junyi.py
@@ -87,18 +87,17 @@ def extract_similarity(source, target, ku_dict):
 
 
 if __name__ == '__main__':
-    root = "../../"
-    raw_file = root + "raw_data/junyi/junyi_Exercise_table.csv"
+    root = "../../../"
+    raw_file = root + "data/junyi/junyi_Exercise_table.csv"
     ku_dict_file = root + "data/junyi/graph_vertex.json"
     prerequisite_file = root + "data/junyi/prerequisite.json"
     similarity_raw_files = [
-        root + "raw_data/junyi/relationship_annotation_{}.csv".format(name) for name in ["testing", "training"]
+        root + "data/junyi/relationship_annotation_{}.csv".format(name) for name in ["testing", "training"]
     ]
     similarity_raw_file = root + "raw_data/junyi/relationship_annotation.csv"
     similarity_file = root + "data/junyi/similarity.json"
 
     # merge_relationship_annotation(similarity_raw_files, similarity_raw_file)
-
-    # build_ku_dict(raw_file, ku_dict_file)
+    build_ku_dict(raw_file, ku_dict_file)
     extract_prerequisite(raw_file, prerequisite_file, ku_dict_file)
     # extract_similarity(similarity_raw_file, similarity_file, ku_dict_file)
diff --git a/EduData/Task/KnowledgeTracing/statistics.py b/EduData/Task/KnowledgeTracing/statistics.py
new file mode 100644
index 0000000..e716fb0
--- /dev/null
+++ b/EduData/Task/KnowledgeTracing/statistics.py
@@ -0,0 +1,27 @@
+# coding: utf-8
+# 2019/8/24 @ tongshiwei
+
+__all__ = ["analysis_records"]
+
+from tqdm import tqdm
+import json
+
+
+def analysis_records(source):
+    ku_set = set()
+    records_num = 0
+    seq_count = 0
+    correct_num = 0
+    with open(source) as f:
+        for line in tqdm(f, "doing statistics"):
+            seq_count += 1
+            responses = json.loads(line)
+            records_num += len(responses)
+            correct_num += len([r[1] for r in responses if int(r[1]) == 1])
+            ku_set.update(set([_id for _id, _ in responses]))
+
+    print("in %s" % source)
+    print("knowledge units number: %s" % len(ku_set))
+    print("records number: %s" % records_num)
+    print("correct records number: %s" % correct_num)
+    print("the number of sequence: %s" % seq_count)

From 87b78be0f27342df49173fd2e19c6ede7f43d1fb Mon Sep 17 00:00:00 2001
From: tswsxk <tongsw@mail.ustc.edu.cn>
Date: Thu, 26 Sep 2019 14:27:47 +0800
Subject: [PATCH 2/9] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E6=95=B0=E6=8D=AE?=
 =?UTF-8?q?=E5=88=86=E6=9E=90=E5=B7=A5=E5=85=B7?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 EduData/Tools/train_valid_test.py | 10 +++++-----
 EduData/main.py                   |  2 ++
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/EduData/Tools/train_valid_test.py b/EduData/Tools/train_valid_test.py
index 11f6bfd..7e7c082 100644
--- a/EduData/Tools/train_valid_test.py
+++ b/EduData/Tools/train_valid_test.py
@@ -7,6 +7,7 @@
 from longling.ML.toolkit.dataset.splitter import train_valid_test
 import random
 import math
+from tqdm import tqdm
 
 
 def KFold(filename, train_prefix, valid_prefix, n_splits=5, shuffle=False):
@@ -16,19 +17,18 @@ def KFold(filename, train_prefix, valid_prefix, n_splits=5, shuffle=False):
     if shuffle is True:
         random.shuffle(indices)
 
-    proportion = sample_num / n_splits
-    step = math.floor(proportion * sample_num)
+    step = math.ceil(sample_num / n_splits)
     indices_buckets = [
         (i, i + step) for i in range(0, sample_num, step)
     ]
     train_wfs = [
-        io.open(train_prefix + index, "w", encoding="utf-8") for index in range(n_splits)
+        io.open(train_prefix + str(index), "w", encoding="utf-8") for index in range(n_splits)
     ]
     valid_wfs = [
-        io.open(valid_prefix + index, "w", encoding="utf-8") for index in range(n_splits)
+        io.open(valid_prefix + str(index), "w", encoding="utf-8") for index in range(n_splits)
     ]
     with open(filename) as f:
-        for line_no, line in enumerate(f):
+        for line_no, line in tqdm(enumerate(f), "splitting dataset"):
             for idx, (start, end) in enumerate(indices_buckets):
                 if start <= line_no < end:
                     print(line, end="", file=valid_wfs[idx])
diff --git a/EduData/main.py b/EduData/main.py
index dadcf83..7693e22 100644
--- a/EduData/main.py
+++ b/EduData/main.py
@@ -5,6 +5,8 @@
 
 from EduData.DataSet.download_data.download_data import get_data as download, list_resources as ls
 from EduData.Task.KnowledgeTracing.format import tl2json, json2tl
+from EduData.Task.KnowledgeTracing.statistics import analysis_records as kt_stat
+from EduData.Tools.train_valid_test import train_valid_test, KFold as kfold
 
 if __name__ == '__main__':
     fire.Fire()

From 471f97ece996a69dd4eae54fcb2f98e1b1736240 Mon Sep 17 00:00:00 2001
From: tswsxk <tongsw@mail.ustc.edu.cn>
Date: Thu, 26 Sep 2019 14:30:50 +0800
Subject: [PATCH 3/9] =?UTF-8?q?=E5=91=BD=E4=BB=A4=E8=A1=8C=E6=8E=A5?=
 =?UTF-8?q?=E5=8F=A3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 EduData/main.py | 7 ++++++-
 README.md       | 8 +++++++-
 setup.py        | 2 +-
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/EduData/main.py b/EduData/main.py
index 7693e22..902925a 100644
--- a/EduData/main.py
+++ b/EduData/main.py
@@ -8,5 +8,10 @@
 from EduData.Task.KnowledgeTracing.statistics import analysis_records as kt_stat
 from EduData.Tools.train_valid_test import train_valid_test, KFold as kfold
 
-if __name__ == '__main__':
+
+def cli():
     fire.Fire()
+
+
+if __name__ == '__main__':
+    cli()
diff --git a/README.md b/README.md
index 70b544c..e1545a1 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@ Convenient interface for downloading and preprocessing dataset in education
 
 ## Installation
 ```bash
-python setup.py install
+pip install -e .
 ```
 
 ## DataSet
@@ -13,3 +13,9 @@ python setup.py install
 ```bash
 
 ```
+
+## Tools
+
+```bash
+edudata kt_stat $Dataset
+```
diff --git a/setup.py b/setup.py
index 406d04e..b202fdc 100644
--- a/setup.py
+++ b/setup.py
@@ -27,7 +27,7 @@
     ],  # And any other dependencies foo needs
     entry_points={
         "console_scripts": [
-            "edudata = EduData.main:__main__",
+            "edudata = EduData.main:cli",
         ],
     },
 )

From 5b960781d542dd6f901ae44edcb1d8062039c0a5 Mon Sep 17 00:00:00 2001
From: tswsxk <tongsw@mail.ustc.edu.cn>
Date: Thu, 26 Sep 2019 14:38:19 +0800
Subject: [PATCH 4/9] fix bug

---
 EduData/Tools/train_valid_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/EduData/Tools/train_valid_test.py b/EduData/Tools/train_valid_test.py
index 7e7c082..34c6386 100644
--- a/EduData/Tools/train_valid_test.py
+++ b/EduData/Tools/train_valid_test.py
@@ -1,10 +1,10 @@
 # coding: utf-8
 # create by tongshiwei on 2019-7-5
 
-__all__ = ["train_valid_test"]
+__all__ = ["train_valid_test", "KFold"]
 
 import io
-from longling.ML.toolkit.dataset.splitter import train_valid_test
+from longling.ML.toolkit.dataset import train_valid_test
 import random
 import math
 from tqdm import tqdm

From 4b7c455e04dcd6f5bcb31d12ecaf77df47a54122 Mon Sep 17 00:00:00 2001
From: tswsxk <tongsw@mail.ustc.edu.cn>
Date: Thu, 26 Sep 2019 15:01:04 +0800
Subject: [PATCH 5/9] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=8F=90=E7=A4=BA?=
 =?UTF-8?q?=E4=BF=A1=E6=81=AF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 EduData/DataSet/junyi/KnowledgeTracing.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/EduData/DataSet/junyi/KnowledgeTracing.py b/EduData/DataSet/junyi/KnowledgeTracing.py
index f1ecb07..65b5746 100644
--- a/EduData/DataSet/junyi/KnowledgeTracing.py
+++ b/EduData/DataSet/junyi/KnowledgeTracing.py
@@ -39,7 +39,7 @@ def _read(source, ku_dict):
 
 def _write(students, target):
     with wf_open(target) as wf:
-        for student_id, sessions in tqdm(students.items(), "writing"):
+        for student_id, sessions in tqdm(students.items(), "writing -> %s" % target):
             for session_id, exercises in sessions.items():
                 exercises.sort(key=lambda x: x[0])
                 exercise_response = [(exercise[1], exercise[2]) for exercise in exercises]
@@ -58,10 +58,11 @@ def _frequency(students):
     return sorted(frequency.items(), key=lambda x: x[1], reverse=True)
 
 
-def get_n_most_frequent_students(students, n, frequency=None):
+def get_n_most_frequent_students(students, n=None, frequency=None):
     frequency = _frequency(students) if frequency is None else frequency
+    __frequency = frequency if n is None else frequency[:n]
     _students = {}
-    for _id, _ in frequency[:n]:
+    for _id, _ in __frequency:
         _students[_id] = students[_id]
     return _students
 
@@ -84,8 +85,15 @@ def select_n_most_frequent_students(source, target_prefix, ku_dict, n):
         student_log_raw_file,
         root + "data/junyi/student_log_kt_",
         ku_dict_file,
-        [100, 200, 300]
+        [None]
     )
+
+    # select_n_most_frequent_students(
+    #     student_log_raw_file,
+    #     root + "data/junyi/student_log_kt_",
+    #     ku_dict_file,
+    #     [100, 200, 300]
+    # )
     # [500, 1000, 2000]
 
     # extract_students_log(student_log_raw_file, student_log_file, ku_dict_file)

From 00ae2b43205e0ccb4be6c005e1208d4eb37e280a Mon Sep 17 00:00:00 2001
From: tswsxk <tongsw@mail.ustc.edu.cn>
Date: Wed, 13 Nov 2019 11:05:41 +0800
Subject: [PATCH 6/9] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E9=9A=BE=E5=BA=A6?=
 =?UTF-8?q?=E5=85=B3=E7=B3=BB=E8=A7=A3=E7=A6=BB?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 EduData/DataSet/junyi/junyi.py | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/EduData/DataSet/junyi/junyi.py b/EduData/DataSet/junyi/junyi.py
index 9200de0..58be4e4 100644
--- a/EduData/DataSet/junyi/junyi.py
+++ b/EduData/DataSet/junyi/junyi.py
@@ -86,6 +86,24 @@ def extract_similarity(source, target, ku_dict):
         json.dump(similarity, wf, indent=2)
 
 
+def extract_difficulty(source, target, ku_dict):
+    """
+    In target: (A, B, v) means A is similar with B in v degree.
+    If v is small, A and B should be considered as not similar.
+    """
+    difficulty = []
+    with codecs.open(source, encoding="utf-8") as f, open(ku_dict) as kf, wf_open(target) as wf:
+        f.readline()
+        ku_dict = json.load(kf)
+        for line in csv.reader(f):
+            difficulty.append((ku_dict[line[0]], ku_dict[line[1]], float(line[4])))
+
+        logger.info("edges: %s" % len(difficulty))
+
+        logger.info(pandas.Series([sim[-1] for sim in difficulty]).describe())
+        json.dump(difficulty, wf, indent=2)
+
+
 if __name__ == '__main__':
     root = "../../../"
     raw_file = root + "data/junyi/junyi_Exercise_table.csv"
@@ -96,8 +114,10 @@ def extract_similarity(source, target, ku_dict):
     ]
     similarity_raw_file = root + "raw_data/junyi/relationship_annotation.csv"
     similarity_file = root + "data/junyi/similarity.json"
+    difficulty_file = root + "data/junyi/difficulty.json"
 
     # merge_relationship_annotation(similarity_raw_files, similarity_raw_file)
-    build_ku_dict(raw_file, ku_dict_file)
-    extract_prerequisite(raw_file, prerequisite_file, ku_dict_file)
-    # extract_similarity(similarity_raw_file, similarity_file, ku_dict_file)
+    # build_ku_dict(raw_file, ku_dict_file)
+    # extract_prerequisite(raw_file, prerequisite_file, ku_dict_file)
+    extract_similarity(similarity_raw_file, similarity_file, ku_dict_file)
+    # extract_difficulty(similarity_raw_file, difficulty_file, ku_dict_file)

From a9047618a852c594d6ee8e90c720a70cf68c1c7a Mon Sep 17 00:00:00 2001
From: tswsxk <tongsw@mail.ustc.edu.cn>
Date: Wed, 13 Nov 2019 14:41:00 +0800
Subject: [PATCH 7/9] =?UTF-8?q?=E5=AE=8C=E5=96=84=E8=AF=B4=E6=98=8E?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md | 109 +++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 100 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index e1545a1..a41cfe5 100644
--- a/README.md
+++ b/README.md
@@ -1,21 +1,112 @@
 # EduData
-Convenient interface for downloading and preprocessing dataset in education
+Convenient interface for downloading and preprocessing dataset in education.
 
-# Tutorial
+The dataset includes:
 
-## Installation
-```bash
+* [KDD Cup 2010](https://pslcdatashop.web.cmu.edu/KDDCup/downloads.jsp)
+
+* [ASSISTments](https://sites.google.com/site/assistmentsdata/)
+
+* [OLI Engineering Statics 2011](https://pslcdatashop.web.cmu.edu/DatasetInfo?datasetId=507)
+
+* [JunyiAcademy Math Practicing Log](https://pslcdatashop.web.cmu.edu/DatasetInfo?datasetId=1198)
+
+* [slepemapy.cz](https://www.fi.muni.cz/adaptivelearning/?a=data)
+
+* [synthetic](https://github.com/chrispiech/DeepKnowledgeTracing/tree/master/data/synthetic)
+
+Your can also visit our datashop [BaseData](http://base.ustc.edu.cn/data/) to get those mentioned-above (most of them) dataset. 
+
+## Tutorial
+
+### Installation
+Git and install by `pip`
+
+```shell
 pip install -e .
 ```
 
-## DataSet
+### CLI
+```shell
+edudata $subcommand $parameters1 $parameters2
+```
+
+To see the `help` information:
+```shell
+edudata -- --help
+edudata $subcommand --help
+```
+
+The cli tools is constructed based on [fire](https://github.com/google/python-fire). 
+Refer to the [documentation](https://github.com/google/python-fire/blob/master/docs/using-cli.md) for detailed usage.
+
+#### Download Dataset
+Before downloading dataset, first check the available dataset:
+```shell
+edudata ls
+```
+
+Download the dataset by specifying the name of dataset:
+```shell
+edudata download assistment-2009-2010-skill
+```
+
+#### Task Specified Tools
+
+##### Knowledge Tracing
+
+###### Format converter
+In Knowledge Tracing task, there is a popular format (we named it ``) to represent the interaction sequence records:
+```text
+15
+1,1,1,1,7,7,9,10,10,10,10,11,11,45,54
+0,1,1,1,1,1,0,0,1,1,1,1,1,0,0
+```
+which can be found in [Deep Knowledge Tracing](https://github.com/chrispiech/DeepKnowledgeTracing/tree/master/data/assistments).
+
+In order to deal with the issue that some special symbols are hard to be stored in the mentioned-above format,
+we offer another one format, named `json sequence` to represent the interaction sequence records:
+```json
+[[419, 1], [419, 1], [419, 1], [665, 0], [665, 0]]
+```
+
+Each item in the sequence represent one interaction. The first element of the item is the exercise 
+(some works call it knowledge unit or knowledge item) id 
+and the second one indicates whether the learner correctly answer the exercise, 0 for wrongly while 1 for correctly  
+One line, one `json` record, which is corresponded to a learner's interaction sequence.
+
+We provide tools for converting two format:
+```shell
+# convert tl sequence to json sequence
+edudata tl2json $src $tar
+# convert json sequence to tl sequence
+edudata json2tl $src $tar
+```
+
+###### Dataset Preprocess
+The cli tools to quickly convert the "raw" data of the dataset into "mature" data for knowledge tracing task. 
+The "mature" data is in `json sequence` format 
+and can be modeled by [XKT](https://github.com/bigdata-ustc/XKT) and TKT(TBA)
+
+TBA
+
+###### Analysis Dataset
+This tool only supports the `json sequence` format. To check the following statical indexes of the dataset:
 
-```bash
+* knowledge units number
+* correct records number
+* the number of sequence
 
+```shell
+edudata kt_stat $filename
 ```
 
-## Tools
+#### Evaluation
+In order to better verify the effectiveness of model, 
+the dataset is usually divided into `train/valid/test` or using `kfold` method.
 
-```bash
-edudata kt_stat $Dataset
+```shell
+edudata longling train_valid_test $filename1 $filename2 -- --train_ratio 0.8 --valid_ratio 0.1 --test_ratio 0.1
+longling kfold $filename1 $filename2 -- --n_splits 5
 ```
+Refer to [longling](https://longling.readthedocs.io/zh/latest/#dataset) for more tools and detailed information.

From 859a85e25269c27dbc77c2a690aac0967d159b6f Mon Sep 17 00:00:00 2001
From: tswsxk <tongsw@mail.ustc.edu.cn>
Date: Wed, 13 Nov 2019 15:01:32 +0800
Subject: [PATCH 8/9] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E6=95=B0=E6=8D=AE?=
 =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E8=AF=B4=E6=98=8E?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index a41cfe5..5d9451d 100644
--- a/README.md
+++ b/README.md
@@ -56,11 +56,11 @@ edudata download assistment-2009-2010-skill
 ##### Knowledge Tracing
 
 ###### Format converter
-In Knowledge Tracing task, there is a popular format (we named it ``) to represent the interaction sequence records:
+In Knowledge Tracing task, there is a popular format (we named it `triple line (tl)` format) to represent the interaction sequence records:
 ```text
-15
-1,1,1,1,7,7,9,10,10,10,10,11,11,45,54
-0,1,1,1,1,1,0,0,1,1,1,1,1,0,0
+5
+419,419,419,665,665
+1,1,1,0,0
 ```
 which can be found in [Deep Knowledge Tracing](https://github.com/chrispiech/DeepKnowledgeTracing/tree/master/data/assistments).
 

From 9bf7eaec93c7c2a48b7dd93256f73513b3bd7b11 Mon Sep 17 00:00:00 2001
From: tswsxk <tongsw@mail.ustc.edu.cn>
Date: Wed, 13 Nov 2019 15:06:34 +0800
Subject: [PATCH 9/9] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E6=95=B0=E6=8D=AE?=
 =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E8=AF=B4=E6=98=8E?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/README.md b/README.md
index 5d9451d..b7117cc 100644
--- a/README.md
+++ b/README.md
@@ -63,6 +63,11 @@ In Knowledge Tracing task, there is a popular format (we named it `triple line (
 1,1,1,0,0
 ```
 which can be found in [Deep Knowledge Tracing](https://github.com/chrispiech/DeepKnowledgeTracing/tree/master/data/assistments).
+In this format, three lines are composed of an interaction sequence.
+The first line indicates the length of the interaction sequence, 
+and the second line represents the exercise id followed by the third line, 
+where each elements stands for correct answer (i.e., 1) or wrong answer (i.e., 0) 
+
 
 In order to deal with the issue that some special symbols are hard to be stored in the mentioned-above format,
 we offer another one format, named `json sequence` to represent the interaction sequence records: