From 9945988e447ea3322c3e792574b39d1e5f6bdc84 Mon Sep 17 00:00:00 2001 From: yqkcn <410728991@qq.com> Date: Mon, 30 Sep 2024 16:29:15 +0800 Subject: [PATCH] format mind_map_extractor code (#2686) ### What problem does this PR solve? format mind_map_extractor code ### Type of change - [x] Refactoring --- graphrag/mind_map_extractor.py | 40 +++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/graphrag/mind_map_extractor.py b/graphrag/mind_map_extractor.py index e33650ba5..2bedf9639 100644 --- a/graphrag/mind_map_extractor.py +++ b/graphrag/mind_map_extractor.py @@ -65,17 +65,20 @@ def _be_children(self, obj: dict, keyset: set): if isinstance(obj, str): obj = [obj] if isinstance(obj, list): - for i in obj: keyset.add(i) - return [{"id": re.sub(r"\*+", "", i), "children": []} for i in obj if re.sub(r"\*+", "", i)] + keyset.update(obj) + obj = [re.sub(r"\*+", "", i) for i in obj] + return [{"id": i, "children": []} for i in obj if i] arr = [] for k, v in obj.items(): k = self._key(k) - if not k or k in keyset: continue - keyset.add(k) - arr.append({ - "id": k, - "children": self._be_children(v, keyset) - }) + if k and k not in keyset: + keyset.add(k) + arr.append( + { + "id": k, + "children": self._be_children(v, keyset) + } + ) return arr def __call__( @@ -110,15 +113,22 @@ def __call__( return MindMapResult(output={"id": "root", "children": []}) merge_json = reduce(self._merge, res) - if len(merge_json.keys()) > 1: - keyset = set( - [re.sub(r"\*+", "", k) for k, v in merge_json.items() if isinstance(v, dict) and re.sub(r"\*+", "", k)]) - merge_json = {"id": "root", - "children": [{"id": self._key(k), "children": self._be_children(v, keyset)} for k, v in - merge_json.items() if isinstance(v, dict) and self._key(k)]} + if len(merge_json) > 1: + keys = [re.sub(r"\*+", "", k) for k, v in merge_json.items() if isinstance(v, dict)] + keyset = set(i for i in keys if i) + merge_json = { + "id": "root", + "children": [ + { + "id": self._key(k), + "children": self._be_children(v, keyset) + } + for k, v in merge_json.items() if isinstance(v, dict) and self._key(k) + ] + } else: k = self._key(list(merge_json.keys())[0]) - merge_json = {"id": k, "children": self._be_children(list(merge_json.items())[0][1], set([k]))} + merge_json = {"id": k, "children": self._be_children(list(merge_json.items())[0][1], {k})} except Exception as e: logging.exception("error mind graph")