forked from bundesAPI/strukturen-ml
-
Notifications
You must be signed in to change notification settings - Fork 0
/
orgchart_entry.py
73 lines (62 loc) · 2.68 KB
/
orgchart_entry.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import re
import spacy
from models import Organisation, Person
class OrgChartEntryParser:
def __init__(self):
self.model = spacy.load("model-last/")
def parse(self, text):
nlp = self.model(text)
return nlp, nlp.to_json()
def clean_str(self, str_):
str_ = str_.replace("\n", " ")
str_ = re.sub(" +", " ", str_)
return str_.strip()
def parse_to_entities(self, nlp):
entities = []
current_org_entity = Organisation()
person = None
for entity in nlp.ents:
if entity.label_ == "NAME":
if not current_org_entity.name:
current_org_entity.name = self.clean_str(entity.text)
else:
if person:
current_org_entity.people.append(person)
person = None
entities.append(current_org_entity)
current_org_entity = Organisation()
current_org_entity.name = self.clean_str(entity.text)
if entity.label_ == "SHORT_NAME":
if not current_org_entity.shortName:
current_org_entity.shortName = self.clean_str(entity.text)
else:
if person:
current_org_entity.people.append(person)
person = None
entities.append(current_org_entity)
current_org_entity = Organisation()
current_org_entity.shortName = self.clean_str(entity.text)
if entity.label_ == "DIAL_CODE":
current_org_entity.dialCodes.append(self.clean_str(entity.text))
if entity.label_ == "PERSON":
if not person:
person = Person(name=self.clean_str(entity.text))
else:
if person.name:
current_org_entity.people.append(person)
person = Person(name=self.clean_str(entity.text))
else:
person.name = self.clean_str(entity.text)
if entity.label_ == "POSITION":
if not person:
person = Person(position=self.clean_str(entity.text))
else:
if person.position:
current_org_entity.people.append(person)
person = Person(position=self.clean_str(entity.text))
else:
person.position = self.clean_str(entity.text)
if person is not None:
current_org_entity.people.append(person)
entities.append(current_org_entity)
return entities