forked from shuyang790/QA
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathq_analysis.cpp
119 lines (96 loc) · 3.1 KB
/
q_analysis.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
/*************************************************************************
> File Name: word_segmentation.cpp
> Author:
> Created Time: 一 12/14 21:51:19 2015
************************************************************************/
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <cassert>
#include <string>
#include <vector>
#include <iostream>
#include <unistd.h>
#include <pthread.h>
#include "segment_dll.h"
#include "postag_dll.h"
#include "ner_dll.h"
#include "parser_dll.h"
using namespace std;
#define STR_LEN 65536
void * segment(char *src_filename, char *segment_filename,
char *ner_filename, char *parser_filename,
int startno, int endno){
void * segmentor = segmentor_create_segmentor("./ltp_data/cws.model");
void * postagger = postagger_create_postagger("./ltp_data/pos.model");
void * recognizer = ner_create_recognizer("./ltp_data/ner.model");
void * parser = parser_create_parser("./ltp_data/parser.model");
char *str = new char[STR_LEN];
FILE *fin, *fseg, *fner, *fparser;
fin = fopen(src_filename, "r");
if (startno) {
fseg = fopen(segment_filename, "a"),
fner = fopen(ner_filename, "a"),
fparser = fopen(parser_filename, "a");
}
else {
fseg = fopen(segment_filename, "w"),
fner = fopen(ner_filename, "w"),
fparser = fopen(parser_filename, "w");
}
vector <string> questions;
while (fgets(str, STR_LEN, fin) != NULL) {
string tmp = string(str);
while (tmp.size() > 0 &&
(tmp[tmp.size()-1] == '\n' || tmp[tmp.size()-1] == '\r'))
tmp.erase(tmp.size()-1);
questions.push_back(tmp);
}
fclose(fin);
for (int i=startno; i<questions.size() && i<endno; i++) {
vector <string> words, postags;
vector <string> netags;
vector <int> heads;
vector <string> deprels;
int len;
if (!questions[i].size())
goto nxt;
len = segmentor_segment(segmentor, questions[i], words);
postagger_postag(postagger, words, postags);
ner_recognize(recognizer, words, postags, netags);
parser_parse(parser, words, postags, heads, deprels);
for (int j=0; j<len; j++) {
fprintf(fseg, "%s/%s\t", words[j].c_str(), postags[j].c_str());
if (netags[j]!=string("O"))
fprintf(fner, "%s/%s\t", words[j].c_str(), netags[j].c_str());
/* TODO: parser info print */
}
nxt:
fprintf(stderr, "\r#\t%d", i);
fprintf(fseg, "\n");
fprintf(fner, "\n");
fprintf(fparser, "\n");
}
fclose(fseg);
fclose(fner);
fclose(fparser);
delete []str;
parser_release_parser(parser);
ner_release_recognizer(recognizer);
postagger_release_postagger(postagger);
segmentor_release_segmentor(segmentor);
return 0;
}
int main(int argc, char *argv[]) {
/* segment("./questions/provided/q_facts_sample.txt",
"./questions/q_facts_sample_segged.txt",
"./questions/q_facts_sample_segged_ner.txt",
"./questions/q_facts_sample_segged_psr.txt",
000, 9000);*/
/* segment("./questions/provided/q_facts.txt",
"./questions/q_facts_segged.txt",
"./questions/q_facts_segged_ner.txt",
"./questions/q_facts_segged_psr.txt",
000, 9000);*/
return 0;
}