Skip to content

Commit

Permalink
feat(ChatKnowledge): Support Financial Report Analysis (#1702)
Browse files Browse the repository at this point in the history
Co-authored-by: hzh97 <[email protected]>
Co-authored-by: Fangyin Cheng <[email protected]>
Co-authored-by: licunxing <[email protected]>
  • Loading branch information
4 people committed Jul 26, 2024
1 parent 22e0680 commit 167d972
Show file tree
Hide file tree
Showing 160 changed files with 1,343 additions and 795 deletions.
7 changes: 6 additions & 1 deletion .env.template
Original file line number Diff line number Diff line change
Expand Up @@ -292,4 +292,9 @@ DBGPT_LOG_LEVEL=INFO
# OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE=
# OTEL_EXPORTER_OTLP_TRACES_HEADERS=
# OTEL_EXPORTER_OTLP_TRACES_TIMEOUT=
# OTEL_EXPORTER_OTLP_TRACES_COMPRESSION=
# OTEL_EXPORTER_OTLP_TRACES_COMPRESSION=

#*******************************************************************#
#** FINANCIAL CHAT Config **#
#*******************************************************************#
# FIN_REPORT_MODEL=/app/models/bge-large-zh
1 change: 1 addition & 0 deletions assets/schema/dbgpt.sql
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ CREATE TABLE IF NOT EXISTS `knowledge_space`
`id` int NOT NULL AUTO_INCREMENT COMMENT 'auto increment id',
`name` varchar(100) NOT NULL COMMENT 'knowledge space name',
`vector_type` varchar(50) NOT NULL COMMENT 'vector type',
`domain_type` varchar(50) NOT NULL COMMENT 'domain type',
`desc` varchar(500) NOT NULL COMMENT 'description',
`owner` varchar(100) DEFAULT NULL COMMENT 'owner',
`context` TEXT DEFAULT NULL COMMENT 'context argument',
Expand Down
3 changes: 3 additions & 0 deletions assets/schema/upgrade/v0_5_10/upgrade_to_v0.5.10.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
USE dbgpt;
ALTER TABLE knowledge_space
ADD COLUMN `domain_type` varchar(50) null comment 'space domain type' after `vector_type`;
396 changes: 396 additions & 0 deletions assets/schema/upgrade/v0_5_10/v0.5.9.sql

Large diffs are not rendered by default.

Binary file modified assets/wechat.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
28 changes: 15 additions & 13 deletions dbgpt/_private/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,18 +166,18 @@ def __init__(self) -> None:
self.execute_local_commands = (
os.getenv("EXECUTE_LOCAL_COMMANDS", "False").lower() == "true"
)
### message stor file
# message stor file
self.message_dir = os.getenv("MESSAGE_HISTORY_DIR", "../../message")

### Native SQL Execution Capability Control Configuration
# Native SQL Execution Capability Control Configuration
self.NATIVE_SQL_CAN_RUN_DDL = (
os.getenv("NATIVE_SQL_CAN_RUN_DDL", "True").lower() == "true"
)
self.NATIVE_SQL_CAN_RUN_WRITE = (
os.getenv("NATIVE_SQL_CAN_RUN_WRITE", "True").lower() == "true"
)

### dbgpt meta info database connection configuration
# dbgpt meta info database connection configuration
self.LOCAL_DB_HOST = os.getenv("LOCAL_DB_HOST")
self.LOCAL_DB_PATH = os.getenv("LOCAL_DB_PATH", "data/default_sqlite.db")
self.LOCAL_DB_TYPE = os.getenv("LOCAL_DB_TYPE", "sqlite")
Expand All @@ -193,13 +193,13 @@ def __init__(self) -> None:

self.CHAT_HISTORY_STORE_TYPE = os.getenv("CHAT_HISTORY_STORE_TYPE", "db")

### LLM Model Service Configuration
# LLM Model Service Configuration
self.LLM_MODEL = os.getenv("LLM_MODEL", "glm-4-9b-chat")
self.LLM_MODEL_PATH = os.getenv("LLM_MODEL_PATH")

### Proxy llm backend, this configuration is only valid when "LLM_MODEL=proxyllm"
### When we use the rest API provided by deployment frameworks like fastchat as a proxyllm, "PROXYLLM_BACKEND" is the model they actually deploy.
### We need to use "PROXYLLM_BACKEND" to load the prompt of the corresponding scene.
# Proxy llm backend, this configuration is only valid when "LLM_MODEL=proxyllm"
# When we use the rest API provided by deployment frameworks like fastchat as a proxyllm, "PROXYLLM_BACKEND" is the model they actually deploy.
# We need to use "PROXYLLM_BACKEND" to load the prompt of the corresponding scene.
self.PROXYLLM_BACKEND = None
if self.LLM_MODEL == "proxyllm":
self.PROXYLLM_BACKEND = os.getenv("PROXYLLM_BACKEND")
Expand All @@ -211,7 +211,7 @@ def __init__(self) -> None:
"MODEL_SERVER", "http://127.0.0.1" + ":" + str(self.MODEL_PORT)
)

### Vector Store Configuration
# Vector Store Configuration
self.VECTOR_STORE_TYPE = os.getenv("VECTOR_STORE_TYPE", "Chroma")
self.MILVUS_URL = os.getenv("MILVUS_URL", "127.0.0.1")
self.MILVUS_PORT = os.getenv("MILVUS_PORT", "19530")
Expand All @@ -223,7 +223,7 @@ def __init__(self) -> None:
self.ELASTICSEARCH_USERNAME = os.getenv("ELASTICSEARCH_USERNAME", None)
self.ELASTICSEARCH_PASSWORD = os.getenv("ELASTICSEARCH_PASSWORD", None)

## OceanBase Configuration
# OceanBase Configuration
self.OB_HOST = os.getenv("OB_HOST", "127.0.0.1")
self.OB_PORT = int(os.getenv("OB_PORT", "2881"))
self.OB_USER = os.getenv("OB_USER", "root")
Expand All @@ -245,7 +245,7 @@ def __init__(self) -> None:
os.environ["load_8bit"] = str(self.IS_LOAD_8BIT)
os.environ["load_4bit"] = str(self.IS_LOAD_4BIT)

### EMBEDDING Configuration
# EMBEDDING Configuration
self.EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "text2vec")
# Rerank model configuration
self.RERANK_MODEL = os.getenv("RERANK_MODEL")
Expand Down Expand Up @@ -276,17 +276,17 @@ def __init__(self) -> None:
os.getenv("KNOWLEDGE_CHAT_SHOW_RELATIONS", "False").lower() == "true"
)

### SUMMARY_CONFIG Configuration
# SUMMARY_CONFIG Configuration
self.SUMMARY_CONFIG = os.getenv("SUMMARY_CONFIG", "FAST")

self.MAX_GPU_MEMORY = os.getenv("MAX_GPU_MEMORY", None)

### Log level
# Log level
self.DBGPT_LOG_LEVEL = os.getenv("DBGPT_LOG_LEVEL", "INFO")

self.SYSTEM_APP: Optional["SystemApp"] = None

### Temporary configuration
# Temporary configuration
self.USE_FASTCHAT: bool = os.getenv("USE_FASTCHAT", "True").lower() == "true"

self.MODEL_CACHE_ENABLE: bool = (
Expand All @@ -312,6 +312,8 @@ def __init__(self) -> None:
self.DBGPT_APP_SCENE_NON_STREAMING_PARALLELISM_BASE = int(
os.getenv("DBGPT_APP_SCENE_NON_STREAMING_PARALLELISM_BASE", 1)
)
# experimental financial report model configuration
self.FIN_REPORT_MODEL = os.getenv("FIN_REPORT_MODEL", None)

@property
def local_db_manager(self) -> "ConnectorManager":
Expand Down
2 changes: 1 addition & 1 deletion dbgpt/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
version = "0.5.9"
version = "0.5.10"
88 changes: 63 additions & 25 deletions dbgpt/app/knowledge/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,27 @@
from dbgpt.app.knowledge.service import KnowledgeService
from dbgpt.app.openapi.api_v1.api_v1 import no_stream_generator, stream_generator
from dbgpt.app.openapi.api_view_model import Result
from dbgpt.configs import TAG_KEY_KNOWLEDGE_FACTORY_DOMAIN_TYPE
from dbgpt.configs.model_config import (
EMBEDDING_MODEL_CONFIG,
KNOWLEDGE_UPLOAD_ROOT_PATH,
)
from dbgpt.core.awel.dag.dag_manager import DAGManager
from dbgpt.rag import ChunkParameters
from dbgpt.rag.embedding.embedding_factory import EmbeddingFactory
from dbgpt.rag.knowledge.base import ChunkStrategy
from dbgpt.rag.knowledge.factory import KnowledgeFactory
from dbgpt.rag.retriever.embedding import EmbeddingRetriever
from dbgpt.serve.rag.api.schemas import KnowledgeSyncRequest
from dbgpt.serve.rag.api.schemas import (
KnowledgeConfigResponse,
KnowledgeDomainType,
KnowledgeStorageType,
KnowledgeSyncRequest,
)
from dbgpt.serve.rag.connector import VectorStoreConnector
from dbgpt.serve.rag.service.service import Service
from dbgpt.storage.vector_store.base import VectorStoreConfig
from dbgpt.util.i18n_utils import _
from dbgpt.util.tracer import SpanType, root_tracer

logger = logging.getLogger(__name__)
Expand All @@ -52,6 +60,11 @@ def get_rag_service() -> Service:
return Service.get_instance(CFG.SYSTEM_APP)


def get_dag_manager() -> DAGManager:
"""Get DAG Manager."""
return DAGManager.get_instance(CFG.SYSTEM_APP)


@router.post("/knowledge/space/add")
def space_add(request: KnowledgeSpaceRequest):
print(f"/space/add params: {request}")
Expand Down Expand Up @@ -147,6 +160,55 @@ def chunk_strategies():
return Result.failed(code="E000X", msg=f"chunk strategies error {e}")


@router.get("/knowledge/space/config", response_model=Result[KnowledgeConfigResponse])
async def space_config() -> Result[KnowledgeConfigResponse]:
"""Get space config"""
try:
storage_list: List[KnowledgeStorageType] = []
dag_manager: DAGManager = get_dag_manager()
# Vector Storage
vs_domain_types = [KnowledgeDomainType(name="Normal", desc="Normal")]
dag_map = dag_manager.get_dags_by_tag_key(TAG_KEY_KNOWLEDGE_FACTORY_DOMAIN_TYPE)
for domain_type, dags in dag_map.items():
vs_domain_types.append(
KnowledgeDomainType(
name=domain_type, desc=dags[0].description or domain_type
)
)

storage_list.append(
KnowledgeStorageType(
name="VectorStore",
desc=_("Vector Store"),
domain_types=vs_domain_types,
)
)
# Graph Storage
storage_list.append(
KnowledgeStorageType(
name="KnowledgeGraph",
desc=_("Knowledge Graph"),
domain_types=[KnowledgeDomainType(name="Normal", desc="Normal")],
)
)
# Full Text
storage_list.append(
KnowledgeStorageType(
name="FullText",
desc=_("Full Text"),
domain_types=[KnowledgeDomainType(name="Normal", desc="Normal")],
)
)

return Result.succ(
KnowledgeConfigResponse(
storage=storage_list,
)
)
except Exception as e:
return Result.failed(code="E000X", msg=f"space config error {e}")


@router.post("/knowledge/{space_name}/document/list")
def document_list(space_name: str, query_request: DocumentQueryRequest):
print(f"/document/list params: {space_name}, {query_request}")
Expand Down Expand Up @@ -350,27 +412,3 @@ async def document_summary(request: DocumentSummaryRequest):
)
except Exception as e:
return Result.failed(code="E000X", msg=f"document summary error {e}")


@router.post("/knowledge/entity/extract")
async def entity_extract(request: EntityExtractRequest):
logger.info(f"Received params: {request}")
try:
import uuid

from dbgpt.app.scene import ChatScene
from dbgpt.util.chat_util import llm_chat_response_nostream

chat_param = {
"chat_session_id": uuid.uuid1(),
"current_user_input": request.text,
"select_param": "entity",
"model_name": request.model_name,
}

res = await llm_chat_response_nostream(
ChatScene.ExtractEntity.value(), **{"chat_param": chat_param}
)
return Result.succ(res)
except Exception as e:
return Result.failed(code="E000X", msg=f"entity extract error {e}")
9 changes: 9 additions & 0 deletions dbgpt/app/knowledge/request/request.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from enum import Enum
from typing import List, Optional

from dbgpt._private.pydantic import BaseModel, ConfigDict
Expand All @@ -19,12 +20,20 @@ class KnowledgeSpaceRequest(BaseModel):
name: str = None
"""vector_type: vector type"""
vector_type: str = None
"""vector_type: vector type"""
domain_type: str = "normal"
"""desc: description"""
desc: str = None
"""owner: owner"""
owner: str = None


class BusinessFieldType(Enum):
"""BusinessFieldType"""

NORMAL = "Normal"


class KnowledgeDocumentRequest(BaseModel):
"""doc_name: doc path"""

Expand Down
2 changes: 2 additions & 0 deletions dbgpt/app/knowledge/request/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ class SpaceQueryResponse(BaseModel):
name: str = None
"""vector_type: vector type"""
vector_type: str = None
"""field_type: field type"""
domain_type: str = None
"""desc: description"""
desc: str = None
"""context: context"""
Expand Down
6 changes: 6 additions & 0 deletions dbgpt/app/knowledge/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
SpaceQueryResponse,
)
from dbgpt.component import ComponentType
from dbgpt.configs import DOMAIN_TYPE_FINANCIAL_REPORT
from dbgpt.configs.model_config import EMBEDDING_MODEL_CONFIG
from dbgpt.core import LLMClient
from dbgpt.model import DefaultLLMClient
Expand Down Expand Up @@ -133,6 +134,7 @@ def get_knowledge_space(self, request: KnowledgeSpaceRequest):
res.id = space.id
res.name = space.name
res.vector_type = space.vector_type
res.domain_type = space.domain_type
res.desc = space.desc
res.owner = space.owner
res.gmt_created = space.gmt_created
Expand Down Expand Up @@ -299,6 +301,10 @@ def delete_space(self, space_name: str):
llm_client=self.llm_client,
model_name=None,
)
if space.domain_type == DOMAIN_TYPE_FINANCIAL_REPORT:
conn_manager = CFG.local_db_manager
conn_manager.delete_db(f"{space.name}_fin_report")

vector_store_connector = VectorStoreConnector(
vector_store_type=space.vector_type, vector_store_config=config
)
Expand Down
Loading

0 comments on commit 167d972

Please sign in to comment.