Skip to content

Commit

Permalink
Merge pull request #800 from Abirdcfly/MultiQueryRetriever
Browse files Browse the repository at this point in the history
feat: add multiQueryRetriever
  • Loading branch information
bjwswang authored Mar 18, 2024
2 parents 60cf39a + a0d35cb commit 9cfb0e2
Show file tree
Hide file tree
Showing 31 changed files with 1,168 additions and 40 deletions.
21 changes: 13 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -284,22 +284,27 @@ bff-sdk-generator:
config_rule_line_num = $(shell grep -n "rules:" config/rbac/role.yaml | cut -d: -f1)
chart_rule_line_num = $(shell grep -n "rules:" deploy/charts/arcadia/templates/rbac.yaml | cut -d: -f1)
prepare-push: manifests generate fmt vet gql-gen
@echo "go mod tidy..."
@go mod tidy
@echo "install golangci-lint"
@go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest
@echo "run golangci-lint with auto-fix"
@echo "check or install golangci-lint..."
@test -s golangci-lint || go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest
@echo "run golangci-lint with auto-fix..."
@golangci-lint run --fix -v ./...
@echo "copy crds to charts"
@echo "copy crds to charts..."
@cp config/crd/bases/* deploy/charts/arcadia/crds
@echo "copy role to charts"
@echo "copy role to charts..."
@sed -n '$(config_rule_line_num),$$p' config/rbac/role.yaml > tmp_role.yaml
@sed -i '' '$(chart_rule_line_num),$$d' deploy/charts/arcadia/templates/rbac.yaml
@sed -i.bak '$(chart_rule_line_num),$$d' deploy/charts/arcadia/templates/rbac.yaml
@rm -f deploy/charts/arcadia/templates/rbac.yaml.bak
@cat tmp_role.yaml >> deploy/charts/arcadia/templates/rbac.yaml
@rm -f tmp_role.yaml
@echo "install swag"
@go install github.com/swaggo/swag/cmd/swag@latest
@echo "check or install swag..."
@test -s swag || go install github.com/swaggo/swag/cmd/swag@latest
@echo "swag init..."
@swag init -g apiserver/main.go -o apiserver/docs .
@echo "swag fmt..."
@swag fmt
@echo "done"

PYTHON_INDEX_URL ?=https://pypi.mirrors.ustc.edu.cn/simple/
.PHONY: prepare-push-pypi
Expand Down
77 changes: 77 additions & 0 deletions api/app-node/retriever/v1alpha1/multiqueryretriever_types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/*
Copyright 2024 KubeAGI.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package v1alpha1

import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

node "github.com/kubeagi/arcadia/api/app-node"
"github.com/kubeagi/arcadia/api/base/v1alpha1"
)

// MultiQueryRetrieverSpec defines the desired state of MultiQueryRetriever
type MultiQueryRetrieverSpec struct {
v1alpha1.CommonSpec `json:",inline"`
CommonRetrieverConfig `json:",inline"`
}

// MultiQueryRetrieverStatus defines the observed state of MultiQueryRetriever
type MultiQueryRetrieverStatus struct {
// ObservedGeneration is the last observed generation.
// +optional
ObservedGeneration int64 `json:"observedGeneration,omitempty"`

// ConditionedStatus is the current status
v1alpha1.ConditionedStatus `json:",inline"`
}

//+kubebuilder:object:root=true
//+kubebuilder:subresource:status

// MultiQueryRetriever is the Schema for the MultiQueryRetriever API
type MultiQueryRetriever struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`

Spec MultiQueryRetrieverSpec `json:"spec,omitempty"`
Status MultiQueryRetrieverStatus `json:"status,omitempty"`
}

//+kubebuilder:object:root=true

// MultiQueryRetrieverList contains a list of MultiQueryRetriever
type MultiQueryRetrieverList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []MultiQueryRetriever `json:"items"`
}

func init() {
SchemeBuilder.Register(&MultiQueryRetriever{}, &MultiQueryRetrieverList{})
}

var _ node.Node = (*MultiQueryRetriever)(nil)

func (c *MultiQueryRetriever) SetRef() {
annotations := node.SetRefAnnotations(c.GetAnnotations(), []node.Ref{node.RetrieverRef.Len(1)}, []node.Ref{node.RetrievalQAChainRef.Len(1)})
if c.GetAnnotations() == nil {
c.SetAnnotations(annotations)
}
for k, v := range annotations {
c.Annotations[k] = v
}
}
92 changes: 92 additions & 0 deletions api/app-node/retriever/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion apiserver/pkg/application/application.go
Original file line number Diff line number Diff line change
Expand Up @@ -686,7 +686,7 @@ func redefineNodes(knowledgebase *string, name string, llmName string, tools []*
NodeConfig: v1alpha1.NodeConfig{
Name: "chain-node",
DisplayName: "RetrievalQA chain",
Description: "chain是langchain的核心概念RetrievalQAChain用于从retriver中提取信息,供llm调用",
Description: "chain是langchain的核心概念RetrievalQAChain用于从retriever中提取信息,供llm调用",
Ref: &v1alpha1.TypedObjectReference{
APIGroup: pointer.String("chain.arcadia.kubeagi.k8s.com.cn"),
Kind: "RetrievalQAChain",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.9.2
creationTimestamp: null
name: multiqueryretrievers.retriever.arcadia.kubeagi.k8s.com.cn
spec:
group: retriever.arcadia.kubeagi.k8s.com.cn
names:
kind: MultiQueryRetriever
listKind: MultiQueryRetrieverList
plural: multiqueryretrievers
singular: multiqueryretriever
scope: Namespaced
versions:
- name: v1alpha1
schema:
openAPIV3Schema:
description: MultiQueryRetriever is the Schema for the MultiQueryRetriever
API
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
metadata:
type: object
spec:
description: MultiQueryRetrieverSpec defines the desired state of MultiQueryRetriever
properties:
creator:
description: Creator defines datasource creator (AUTO-FILLED by webhook)
type: string
description:
description: Description defines datasource description
type: string
displayName:
description: DisplayName defines datasource display name
type: string
numDocuments:
default: 5
description: NumDocuments is the max number of documents to return.
maximum: 50
minimum: 1
type: integer
scoreThreshold:
default: 0.3
description: ScoreThreshold is the cosine distance float score threshold.
Lower score represents more similarity.
maximum: 1
minimum: 0
type: number
type: object
status:
description: MultiQueryRetrieverStatus defines the observed state of MultiQueryRetriever
properties:
conditions:
description: Conditions of the resource.
items:
description: A Condition that may apply to a resource.
properties:
lastSuccessfulTime:
description: LastSuccessfulTime is repository Last Successful
Update Time
format: date-time
type: string
lastTransitionTime:
description: LastTransitionTime is the last time this condition
transitioned from one status to another.
format: date-time
type: string
message:
description: A Message containing details about this condition's
last transition from one status to another, if any.
type: string
reason:
description: A Reason for this condition's last transition from
one status to another.
type: string
status:
description: Status of this condition; is it currently True,
False, or Unknown
type: string
type:
description: Type of this condition. At most one of each condition
type may apply to a resource at any point in time.
type: string
required:
- lastTransitionTime
- reason
- status
- type
type: object
type: array
observedGeneration:
description: ObservedGeneration is the last observed generation.
format: int64
type: integer
type: object
type: object
served: true
storage: true
subresources:
status: {}
26 changes: 26 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,32 @@ rules:
- get
- patch
- update
- apiGroups:
- retriever.arcadia.kubeagi.k8s.com.cn
resources:
- multiqueryRetrievers/finalizers
verbs:
- update
- apiGroups:
- retriever.arcadia.kubeagi.k8s.com.cn
resources:
- multiqueryretrievers
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- retriever.arcadia.kubeagi.k8s.com.cn
resources:
- multiqueryretrievers/status
verbs:
- get
- patch
- update
- apiGroups:
- retriever.arcadia.kubeagi.k8s.com.cn
resources:
Expand Down
2 changes: 1 addition & 1 deletion config/samples/app_llmchain_chat_with_bot.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,4 @@ spec:
description: "llm chain"
memory:
conversionWindowSize: 2
model: chatglm_turbo # notice: default model chatglm_lite gets poor results in most cases, openai's gpt-3.5-turbo is also good enough
model: glm-4 # notice: default model chatglm_lite gets poor results in most cases, openai's gpt-3.5-turbo is also good enough
2 changes: 1 addition & 1 deletion config/samples/app_llmchain_chat_with_bot_tool.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ spec:
description: "llm chain"
memory:
conversionWindowSize: 2
model: chatglm_turbo # notice: default model chatglm_lite gets poor results in most cases, openai's gpt-3.5-turbo is also good enough
model: glm-4 # notice: default model chatglm_lite gets poor results in most cases, openai's gpt-3.5-turbo is also good enough
---
apiVersion: arcadia.kubeagi.k8s.com.cn/v1alpha1
kind: Agent
Expand Down
1 change: 1 addition & 0 deletions config/samples/app_retrievalqachain_knowledgebase.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ spec:
description: "用于搜索QA的Chain"
memory:
maxTokenLimit: 20480
model: glm-4
---
apiVersion: retriever.arcadia.kubeagi.k8s.com.cn/v1alpha1
kind: KnowledgeBaseRetriever
Expand Down
Loading

0 comments on commit 9cfb0e2

Please sign in to comment.