Skip to content

Commit

Permalink
Merge pull request #15 from tswsxk/master
Browse files Browse the repository at this point in the history
v0.0.5: support graph construction and analysis
  • Loading branch information
tswsxk authored Dec 13, 2019
2 parents aef157f + 8258a97 commit 8c67dbf
Show file tree
Hide file tree
Showing 8 changed files with 146 additions and 7 deletions.
83 changes: 83 additions & 0 deletions EduData/Task/KnowledgeTracing/graph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# coding: utf-8
# 2019/12/12 @ tongshiwei

import json
from longling import wf_open
from tqdm import tqdm
import numpy as np


def dense_graph(ku_num, tar):
with wf_open(tar) as wf:
for i in range(ku_num):
for j in range(ku_num):
if i != j:
print(json.dumps([i, j]), file=wf)


def _count_to_probability(count_graph):
_transition_graph = np.asarray(count_graph)

_transition_graph = (_transition_graph.T / _transition_graph.sum(axis=-1)).T

return _transition_graph.tolist()


def _output_graph(graph, tar):
ku_num = len(graph)

with wf_open(tar) as wf:
for i in range(ku_num):
for j in range(ku_num):
if i != j and graph[i][j] > 0:
print(json.dumps([i, j, graph[i][j]]), file=wf)


def correct_transition_graph(ku_num, *src, tar):
count_graph = [[0] * ku_num for _ in range(ku_num)]

for filename in src:
with open(filename) as f:
for line in tqdm(f, "constructing transition graph"):
if not line.strip(): # pragma: no cover
continue
seq = json.loads(line)
pre_c = None
for eid, r in seq:
if pre_c is not None:
if eid != pre_c and r == 1:
count_graph[pre_c][eid] += 1
elif r == 1:
# count_graph[pre_c][eid] += 1
pass
if r == 1:
pre_c = eid
else:
pre_c = None

_transition_graph = _count_to_probability(count_graph)

_output_graph(_transition_graph, tar)


def transition_graph(ku_num, *src, tar):
count_graph = [[0] * ku_num for _ in range(ku_num)]

for filename in src:
with open(filename) as f:
for line in tqdm(f, "constructing transition graph"):
if not line.strip(): # pragma: no cover
continue
seq = json.loads(line)
pre = None
for eid, _ in seq:
if pre is not None:
if eid != pre:
count_graph[pre][eid] += 1
else:
# count_graph[pre][eid] += 1
pass
pre = eid

_transition_graph = _count_to_probability(count_graph)
_output_graph(_transition_graph, tar)
22 changes: 22 additions & 0 deletions EduData/Task/KnowledgeTracing/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,25 @@ def analysis_records(source):
print("records number: %s" % records_num)
print("correct records number: %s" % correct_num)
print("the number of sequence: %s" % seq_count)


def analysis_edges(src, threshold=None):
edge_num = 0

with open(src) as f:
for line in f:
if not line.strip(): # pragma: no cover
continue
data = json.loads(line)
if len(data) == 2:
edge_num += 1
elif len(data) >= 3:
if threshold is None:
edge_num += 1
elif data[2] >= threshold:
edge_num += 1
else: # pragma: no cover
raise ValueError("each edge in src should have at least two element")

print("in %s" % src)
print("%s edges" % edge_num)
9 changes: 8 additions & 1 deletion EduData/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@

from EduData.DataSet.download_data.download_data import get_data, list_resources
from EduData.Task.KnowledgeTracing.format import tl2json, json2tl
from EduData.Task.KnowledgeTracing.statistics import analysis_records
from EduData.Task.KnowledgeTracing.statistics import analysis_records, analysis_edges
from longling.ML.toolkit.dataset import train_valid_test, kfold
from EduData.DataSet.junyi import extract_relations, build_json_sequence
from EduData.Task.KnowledgeTracing.graph import dense_graph, transition_graph, correct_transition_graph


def cli(): # pragma: no cover
Expand All @@ -18,6 +19,7 @@ def cli(): # pragma: no cover
"tl2json": tl2json,
"json2tl": json2tl,
"kt_stat": analysis_records,
"edge_stat": analysis_edges,
"train_valid_test": train_valid_test,
"kfold": kfold,
"dataset": {
Expand All @@ -27,6 +29,11 @@ def cli(): # pragma: no cover
"build_json_sequence": build_json_sequence,
}
}
},
"graph": {
"dense": dense_graph,
"trans": transition_graph,
"ctrans": correct_transition_graph,
}
}
)
6 changes: 6 additions & 0 deletions docs/graph.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Constructing Knowledge graph

## Reference
[1] Piech C, Bassen J, Huang J, et al. Deep knowledge tracing[C]//Advances in neural information processing systems. 2015: 505-513.
[2] Nakagawa H, Iwasawa Y, Matsuo Y. Graph-based Knowledge Tracing: Modeling Student Proficiency Using Graph Neural Network[C]//IEEE/WIC/ACM International Conference on Web Intelligence. ACM, 2019: 156-163.

1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
'pandas',
'fire',
'lxml',
'numpy'
], # And any other dependencies foo needs
entry_points={
"console_scripts": [
Expand Down
4 changes: 4 additions & 0 deletions tests/test_graph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# coding: utf-8
# 2019/12/13 @ tongshiwei

# redirect to test_junyi.py
24 changes: 18 additions & 6 deletions tests/test_junyi.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,45 @@
from longling import path_append
from EduData.DataSet.junyi import extract_relations, build_json_sequence
from EduData.Task.KnowledgeTracing.format import tl2json, json2tl
from EduData.Task.KnowledgeTracing.statistics import analysis_records
from EduData.Task.KnowledgeTracing.statistics import analysis_records, analysis_edges
from EduData.Task.KnowledgeTracing.graph import dense_graph, transition_graph, correct_transition_graph


def test_junyi(shared_data_dir):
src_root = path_append(shared_data_dir, "junyi", to_str=True)
extract_relations(src_root, path_append(src_root, "data"))
assert True


def test_junyi_kt(shared_data_dir):
src_root = path_append(shared_data_dir, "junyi", to_str=True)
ku_dict_path = path_append(shared_data_dir, "junyi", "data", "graph_vertex.json")
build_json_sequence(src_root, path_append(src_root, "data", to_str=True), ku_dict_path)
assert True


def test_json2tl(shared_data_dir):
src = path_append(shared_data_dir, "junyi", "data", "student_log_kt_1000", to_str=True)
tl_tar = path_append(shared_data_dir, "junyi", "data", "student_log_kt_1000.tl", to_str=True)
json_tar = path_append(shared_data_dir, "junyi", "data", "student_log_kt_1000.json", to_str=True)
json2tl(src, tl_tar)
tl2json(tl_tar, json_tar, to_int=True)
tl2json(tl_tar, json_tar, to_int=False)
assert True
tl2json(tl_tar, json_tar, to_int=True)


def test_graph(shared_data_dir):
json_src = path_append(shared_data_dir, "junyi", "data", "student_log_kt_1000.json", to_str=True)

dense_graph(835, path_append(shared_data_dir, "dense_graph", to_str=True))
transition_graph(835, json_src, tar=path_append(shared_data_dir, "transition_graph", to_str=True))
correct_transition_graph(835, json_src, tar=path_append(shared_data_dir, "correct_transition_graph", to_str=True))


def test_analysis(shared_data_dir):
src = path_append(shared_data_dir, "junyi", "data", "student_log_kt_1000", to_str=True)
analysis_records(src)
assert True

graph_src = path_append(shared_data_dir, "dense_graph", to_str=True)
analysis_edges(graph_src)

graph_src = path_append(shared_data_dir, "transition_graph", to_str=True)
analysis_edges(graph_src, threshold=0.5)
analysis_edges(graph_src, threshold=None)
4 changes: 4 additions & 0 deletions tests/test_statics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# coding: utf-8
# 2019/12/13 @ tongshiwei

# redirect to test_junyi.py

0 comments on commit 8c67dbf

Please sign in to comment.