Skip to content

Commit a11444f

Browse files
committed
add new
1 parent 10b40d3 commit a11444f

File tree

1 file changed

+28
-0
lines changed

1 file changed

+28
-0
lines changed

sample_tanaka_corpus.py

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# -*- coding: utf-8 -*-
2+
3+
import random
4+
5+
6+
f_j = open('tanaka_corpus_j.txt', 'r')
7+
f_e = open('tanaka_corpus_e.txt', 'r')
8+
9+
js = []
10+
es = []
11+
for row in f_j: js.append(row)
12+
for row in f_e: es.append(row)
13+
14+
size_j = len(js)
15+
size_e = len(es)
16+
17+
index = random.sample(range(size_e), 10000)
18+
19+
f_j_w = open('tanaka_corpus_j_10000.txt', 'w')
20+
f_e_w = open('tanaka_corpus_e_10000.txt', 'w')
21+
22+
for i in index:
23+
hj = js[i]
24+
he = es[i]
25+
hj = hj.strip()
26+
he = he.strip()
27+
print(hj, file=f_j_w)
28+
print(he, file=f_e_w)

0 commit comments

Comments
 (0)