From 3a9b478e6d78c67bcc0b2c860670c0ee0f452a33 Mon Sep 17 00:00:00 2001 From: kunlongli <16629885+cnlkl@users.noreply.github.com> Date: Wed, 31 Jul 2024 17:44:02 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=94=AF=E6=8C=81=E6=9F=A5=E8=AF=A2?= =?UTF-8?q?=E7=9B=B8=E4=BC=BC=E8=B7=AF=E5=BE=84=20#1949?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: 支持查询相似路径 #1949 * feat: 支持查询相似路径 #1949 * feat: 支持查询相似路径 #1949 * feat: 支持查询相似路径 #1949 * feat: 支持查询相似路径 #1949 * feat: 支持查询相似路径 #1949 * feat: 支持查询相似路径 #1949 * feat: 支持查询相似路径 #1949 * feat: 支持查询相似路径 #1949 * feat: 支持查询相似路径 #1949 * feat: 支持根据历史访问记录计算下次加载时间 #1949 * feat: 移除流水线仓库路径的pid与bid #1949 * feat: 移除流水线仓库路径的bid #1949 * feat: 判断是否有相似路径被访问后根据配置生成预加载计划 #1949 * feat: 判断是否有相似路径被访问后根据配置生成预加载计划 #1949 * feat: 增量生成向量化数据 #1949 * feat: 判断是否有相似路径被访问后根据配置生成预加载计划 #1949 * feat: 判断是否有相似路径被访问后根据配置生成预加载计划 #1949 * feat: 判断是否有相似路径被访问后根据配置生成预加载计划 #1949 * feat: 支持创建预加载策略时不设置cron,增加相似路径预加载计划生成日志 #1949 * feat: 支持创建INTELLIGENT类型的预加载策略 #1949 * feat: 批量向量化并耗时日志 #1949 * feat: 批量向量化并耗时日志 #1949 * feat: 修复milvus配置失败 #1949 * feat: 修复milvus配置失败 #1949 * feat: 支持配置模型维度 #1949 * feat: 支持模拟预加载 #1949 --- src/backend/build.gradle.kts | 1 + .../buildSrc/src/main/kotlin/Versions.kt | 3 +- .../cache/config/ArtifactPreloadProperties.kt | 16 + .../ArtifactPreloadStrategyCreateRequest.kt | 2 +- .../cache/pojo/PreloadStrategyType.kt | 2 +- .../impl/ArtifactPreloadPlanServiceImpl.kt | 7 +- .../impl/DefaultPreloadPlanExecutor.kt | 35 +- src/backend/job/biz-job/build.gradle.kts | 11 + .../preload/ArtifactAccessLogEmbeddingJob.kt | 224 +++++++++++++ .../ArtifactAccessRecordCleanupJob.kt | 2 +- .../ArtifactPreloadPlanExecuteJob.kt | 2 +- .../ArtifactPreloadStrategyGenerateJob.kt | 2 +- .../ArtifactSimilarityPreloadPlanGenerator.kt | 129 ++++++++ .../batch/task/cache/preload/PreloadConfig.kt | 51 +++ .../task/cache/preload/ai/AiProperties.kt | 64 ++++ .../batch/task/cache/preload/ai/Document.kt | 51 +++ .../task/cache/preload/ai/EmbeddingModel.kt | 60 ++++ .../cache/preload/ai/HttpEmbeddingModel.kt | 91 ++++++ .../preload/ai/MilvusClientProperties.kt | 46 +++ .../cache/preload/ai/MilvusVectorStore.kt | 298 ++++++++++++++++++ .../preload/ai/MilvusVectorStoreProperties.kt | 40 +++ .../task/cache/preload/ai/SearchRequest.kt | 40 +++ .../cache/preload/ai/SpringAiConfiguration.kt | 76 +++++ .../task/cache/preload/ai/VectorStore.kt | 56 ++++ ...ArtifactAccessLogEmbeddingJobProperties.kt | 40 +++ .../user/UserArtifactPreloadController.kt | 3 +- 26 files changed, 1327 insertions(+), 25 deletions(-) create mode 100644 src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ArtifactAccessLogEmbeddingJob.kt rename src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/{ => preload}/ArtifactAccessRecordCleanupJob.kt (97%) rename src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/{ => preload}/ArtifactPreloadPlanExecuteJob.kt (97%) rename src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/{ => preload}/ArtifactPreloadStrategyGenerateJob.kt (97%) create mode 100644 src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ArtifactSimilarityPreloadPlanGenerator.kt create mode 100644 src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/PreloadConfig.kt create mode 100644 src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/AiProperties.kt create mode 100644 src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/Document.kt create mode 100644 src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/EmbeddingModel.kt create mode 100644 src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/HttpEmbeddingModel.kt create mode 100644 src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/MilvusClientProperties.kt create mode 100644 src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/MilvusVectorStore.kt create mode 100644 src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/MilvusVectorStoreProperties.kt create mode 100644 src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/SearchRequest.kt create mode 100644 src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/SpringAiConfiguration.kt create mode 100644 src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/VectorStore.kt create mode 100644 src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/config/properties/ArtifactAccessLogEmbeddingJobProperties.kt diff --git a/src/backend/build.gradle.kts b/src/backend/build.gradle.kts index f4f74cf69a..fd65d4e066 100644 --- a/src/backend/build.gradle.kts +++ b/src/backend/build.gradle.kts @@ -64,6 +64,7 @@ allprojects { dependency("commons-io:commons-io:${Versions.CommonsIO}") dependency("com.squareup.okhttp3:okhttp:${Versions.OKhttp}") dependency("com.google.guava:guava:${Versions.Guava}") + dependency("com.google.protobuf:protobuf-java:${Versions.ProtobufJava}") dependency("com.google.protobuf:protobuf-java-util:${Versions.ProtobufJava}") dependency("com.tencent.polaris:polaris-discovery-factory:${Versions.Polaris}") dependency("org.apache.commons:commons-text:${Versions.CommonsText}") diff --git a/src/backend/buildSrc/src/main/kotlin/Versions.kt b/src/backend/buildSrc/src/main/kotlin/Versions.kt index 5fbb7de387..e60a75bdd8 100644 --- a/src/backend/buildSrc/src/main/kotlin/Versions.kt +++ b/src/backend/buildSrc/src/main/kotlin/Versions.kt @@ -37,7 +37,7 @@ object Versions { const val Redline = "1.2.10" const val SkyWalkingApmToolkit = "8.10.0" const val Gson = "2.9.0" - const val ProtobufJava = "3.19.4" + const val ProtobufJava = "3.24.0" const val Guava = "31.1-jre" const val Shedlock = "4.12.0" const val JGit = "5.11.0.202103091610-r" @@ -70,4 +70,5 @@ object Versions { const val JavaCpp = "1.5.9" const val Notice = "1.0.0" const val SpringCloudFunction = "3.2.11" + const val Milvus = "2.4.1" } diff --git a/src/backend/common/common-artifact/artifact-cache/src/main/kotlin/com/tencent/bkrepo/common/artifact/cache/config/ArtifactPreloadProperties.kt b/src/backend/common/common-artifact/artifact-cache/src/main/kotlin/com/tencent/bkrepo/common/artifact/cache/config/ArtifactPreloadProperties.kt index 373736d215..814ca613a7 100644 --- a/src/backend/common/common-artifact/artifact-cache/src/main/kotlin/com/tencent/bkrepo/common/artifact/cache/config/ArtifactPreloadProperties.kt +++ b/src/backend/common/common-artifact/artifact-cache/src/main/kotlin/com/tencent/bkrepo/common/artifact/cache/config/ArtifactPreloadProperties.kt @@ -69,4 +69,20 @@ data class ArtifactPreloadProperties( * 允许同时预加载的制品个数 */ var preloadConcurrency: Int = 8, + /** + * 预加载策略未配置时间时使用的预加载时间,取值范围[0,24],可配置多个,将选择离当前时间最近的一个作为预加载时间 + */ + var preloadHourOfDay: Set = emptySet(), + /** + * 减去随机时间,避免同时加载过多文件 + */ + var maxRandomSeconds: Long = 600L, + /** + * 根据sha256查询到的node数量超过该值时将不生成预加载计划,避免预加载计划创建时间过久 + */ + var maxNodes: Int = 10, + /** + * 是否仅模拟预加载,为true时不执行加载计划,仅输出一条日志 + */ + var mock: Boolean = false, ) diff --git a/src/backend/common/common-artifact/artifact-cache/src/main/kotlin/com/tencent/bkrepo/common/artifact/cache/pojo/ArtifactPreloadStrategyCreateRequest.kt b/src/backend/common/common-artifact/artifact-cache/src/main/kotlin/com/tencent/bkrepo/common/artifact/cache/pojo/ArtifactPreloadStrategyCreateRequest.kt index d5e51d8aeb..ff08aae5e6 100644 --- a/src/backend/common/common-artifact/artifact-cache/src/main/kotlin/com/tencent/bkrepo/common/artifact/cache/pojo/ArtifactPreloadStrategyCreateRequest.kt +++ b/src/backend/common/common-artifact/artifact-cache/src/main/kotlin/com/tencent/bkrepo/common/artifact/cache/pojo/ArtifactPreloadStrategyCreateRequest.kt @@ -44,7 +44,7 @@ data class ArtifactPreloadStrategyCreateRequest( @ApiModelProperty("限制只对最近一段时间内创建的制品执行预加载") val recentSeconds: Long, @ApiModelProperty("预加载执行时间") - val preloadCron: String, + val preloadCron: String? = null, @ApiModelProperty("策略类型") val type: String = PreloadStrategyType.CUSTOM.name, @ApiModelProperty("操作人") diff --git a/src/backend/common/common-artifact/artifact-cache/src/main/kotlin/com/tencent/bkrepo/common/artifact/cache/pojo/PreloadStrategyType.kt b/src/backend/common/common-artifact/artifact-cache/src/main/kotlin/com/tencent/bkrepo/common/artifact/cache/pojo/PreloadStrategyType.kt index 6688800b2d..ef9ab68579 100644 --- a/src/backend/common/common-artifact/artifact-cache/src/main/kotlin/com/tencent/bkrepo/common/artifact/cache/pojo/PreloadStrategyType.kt +++ b/src/backend/common/common-artifact/artifact-cache/src/main/kotlin/com/tencent/bkrepo/common/artifact/cache/pojo/PreloadStrategyType.kt @@ -42,7 +42,7 @@ enum class PreloadStrategyType { CUSTOM_GENERATED, /** - * 智能预加载策略,通过BkBase中的模型预测预加载时间 + * 智能预加载策略 */ INTELLIGENT } diff --git a/src/backend/common/common-artifact/artifact-cache/src/main/kotlin/com/tencent/bkrepo/common/artifact/cache/service/impl/ArtifactPreloadPlanServiceImpl.kt b/src/backend/common/common-artifact/artifact-cache/src/main/kotlin/com/tencent/bkrepo/common/artifact/cache/service/impl/ArtifactPreloadPlanServiceImpl.kt index 5afc6559e2..6a2788f76d 100644 --- a/src/backend/common/common-artifact/artifact-cache/src/main/kotlin/com/tencent/bkrepo/common/artifact/cache/service/impl/ArtifactPreloadPlanServiceImpl.kt +++ b/src/backend/common/common-artifact/artifact-cache/src/main/kotlin/com/tencent/bkrepo/common/artifact/cache/service/impl/ArtifactPreloadPlanServiceImpl.kt @@ -74,12 +74,12 @@ class ArtifactPreloadPlanServiceImpl( if (!properties.enabled) { return } - val option = NodeListOption(pageSize = MAX_PAGE_SIZE, includeFolder = false) + val option = NodeListOption(pageSize = properties.maxNodes, includeFolder = false) val res = nodeClient.listPageNodeBySha256(sha256, option) val nodes = res.data?.records ?: return - if (nodes.size >= MAX_PAGE_SIZE) { + if (nodes.size >= properties.maxNodes) { // 限制查询出来的最大node数量,避免预加载计划创建时间过久 - logger.warn("nodes of sha256[$sha256] exceed max page size[$MAX_PAGE_SIZE]") + logger.warn("nodes of sha256[$sha256] exceed max page size[${properties.maxNodes}]") return } // node属于同一项目仓库的概率较大,缓存避免频繁查询策略 @@ -179,6 +179,5 @@ class ArtifactPreloadPlanServiceImpl( companion object { private val logger = LoggerFactory.getLogger(ArtifactPreloadPlanServiceImpl::class.java) private const val REPO_ID_DELIMITERS = "/" - private const val MAX_PAGE_SIZE = 1000 } } diff --git a/src/backend/common/common-artifact/artifact-cache/src/main/kotlin/com/tencent/bkrepo/common/artifact/cache/service/impl/DefaultPreloadPlanExecutor.kt b/src/backend/common/common-artifact/artifact-cache/src/main/kotlin/com/tencent/bkrepo/common/artifact/cache/service/impl/DefaultPreloadPlanExecutor.kt index 92393b1e49..f01d697bac 100644 --- a/src/backend/common/common-artifact/artifact-cache/src/main/kotlin/com/tencent/bkrepo/common/artifact/cache/service/impl/DefaultPreloadPlanExecutor.kt +++ b/src/backend/common/common-artifact/artifact-cache/src/main/kotlin/com/tencent/bkrepo/common/artifact/cache/service/impl/DefaultPreloadPlanExecutor.kt @@ -102,19 +102,7 @@ class DefaultPreloadPlanExecutor( if (System.currentTimeMillis() - plan.executeTime > preloadProperties.planTimeout.toMillis()) { throw RuntimeException("plan timeout[${plan.executeTime}], ${plan.artifactInfo()}") } - val cacheFile = Paths.get(credentials.cache.path, fileLocator.locate(plan.sha256), plan.sha256) - val cacheFileLock = Paths.get(credentials.cache.path, StringPool.TEMP, "${plan.sha256}.locked") - val throughput = if (cacheFile.existReal()) { - Files.setLastModifiedTime(cacheFile, FileTime.fromMillis(System.currentTimeMillis())) - logger.info("cache already exists, update LastModifiedTime, ${plan.artifactInfo()}") - null - } else if (cacheFileLock.existReal()) { - logger.info("cache file is loading, skip preload, ${plan.artifactInfo()}") - null - } else { - // 执行预加载 - doLoad(plan.sha256, plan.size, credentials) - } + val throughput = load(plan, credentials) logger.info("preload success, ${plan.artifactInfo()}, throughput[$throughput]") listener?.onPreloadSuccess(plan) } catch (e: Exception) { @@ -125,6 +113,27 @@ class DefaultPreloadPlanExecutor( } } + private fun load(plan: ArtifactPreloadPlan, credentials: StorageCredentials): Throughput? { + if (preloadProperties.mock) { + logger.info("mock load cache, ${plan.artifactInfo()}") + return null + } + + val cacheFile = Paths.get(credentials.cache.path, fileLocator.locate(plan.sha256), plan.sha256) + val cacheFileLock = Paths.get(credentials.cache.path, StringPool.TEMP, "${plan.sha256}.locked") + return if (cacheFile.existReal()) { + Files.setLastModifiedTime(cacheFile, FileTime.fromMillis(System.currentTimeMillis())) + logger.info("cache already exists, update LastModifiedTime, ${plan.artifactInfo()}") + null + } else if (cacheFileLock.existReal()) { + logger.info("cache file is loading, skip preload, ${plan.artifactInfo()}") + null + } else { + // 执行预加载 + doLoad(plan.sha256, plan.size, credentials) + } + } + private fun doLoad(sha256: String, size: Long, credentials: StorageCredentials?): Throughput { return cacheStorageService.load(sha256, Range.full(size), credentials)?.use { val aisCacheEnabled = it.getMetadata(ArtifactInputStream.METADATA_KEY_CACHE_ENABLED) diff --git a/src/backend/job/biz-job/build.gradle.kts b/src/backend/job/biz-job/build.gradle.kts index f38fdd1334..f3c0302bdf 100644 --- a/src/backend/job/biz-job/build.gradle.kts +++ b/src/backend/job/biz-job/build.gradle.kts @@ -45,6 +45,17 @@ dependencies { implementation(project(":common:common-operate:operate-service")) implementation("org.springframework.boot:spring-boot-starter-data-mongodb") implementation("io.micrometer:micrometer-registry-prometheus") + implementation("io.milvus:milvus-sdk-java:${Versions.Milvus}") { + exclude(group = "org.slf4j") + exclude(group = "org.apache.logging.log4j") + exclude(group = "org.testcontainers") + exclude(group = "com.azure") + exclude(group = "com.amazonaws") + exclude(group = "io.minio") + exclude(group = "org.apache.hadoop") + exclude(group = "org.apache.parquet") + exclude(group = "com.squareup.okhttp3") + } testImplementation("org.springframework.boot:spring-boot-starter-test") testImplementation("de.flapdoodle.embed:de.flapdoodle.embed.mongo") testImplementation("org.mockito.kotlin:mockito-kotlin") diff --git a/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ArtifactAccessLogEmbeddingJob.kt b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ArtifactAccessLogEmbeddingJob.kt new file mode 100644 index 0000000000..6ad3fd7c7b --- /dev/null +++ b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ArtifactAccessLogEmbeddingJob.kt @@ -0,0 +1,224 @@ +/* + * Tencent is pleased to support the open source community by making BK-CI 蓝鲸持续集成平台 available. + * + * Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-CI 蓝鲸持续集成平台 is licensed under the MIT license. + * + * A copy of the MIT License is included in this file. + * + * + * Terms of the MIT License: + * --------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT + * LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package com.tencent.bkrepo.job.batch.task.cache.preload + +import com.tencent.bkrepo.auth.constant.PIPELINE +import com.tencent.bkrepo.common.artifact.event.base.EventType +import com.tencent.bkrepo.common.mongo.constant.ID +import com.tencent.bkrepo.common.mongo.constant.MIN_OBJECT_ID +import com.tencent.bkrepo.common.mongo.dao.util.sharding.MonthRangeShardingUtils +import com.tencent.bkrepo.common.operate.service.model.TOperateLog +import com.tencent.bkrepo.job.batch.base.DefaultContextJob +import com.tencent.bkrepo.job.batch.base.JobContext +import com.tencent.bkrepo.job.batch.task.cache.preload.ai.AiProperties +import com.tencent.bkrepo.job.batch.task.cache.preload.ai.Document +import com.tencent.bkrepo.job.batch.task.cache.preload.ai.EmbeddingModel +import com.tencent.bkrepo.job.batch.task.cache.preload.ai.MilvusVectorStore +import com.tencent.bkrepo.job.batch.task.cache.preload.ai.MilvusVectorStoreProperties +import com.tencent.bkrepo.job.batch.task.cache.preload.ai.VectorStore +import com.tencent.bkrepo.job.config.properties.ArtifactAccessLogEmbeddingJobProperties +import io.milvus.client.MilvusClient +import org.bson.types.ObjectId +import org.slf4j.LoggerFactory +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty +import org.springframework.boot.context.properties.EnableConfigurationProperties +import org.springframework.data.domain.Sort +import org.springframework.data.mongodb.core.MongoTemplate +import org.springframework.data.mongodb.core.find +import org.springframework.data.mongodb.core.query.Criteria +import org.springframework.data.mongodb.core.query.Query +import org.springframework.data.mongodb.core.query.isEqualTo +import org.springframework.stereotype.Component +import java.time.Duration +import java.time.LocalDate +import java.time.LocalDateTime +import kotlin.system.measureTimeMillis + +@Component +@EnableConfigurationProperties(ArtifactAccessLogEmbeddingJobProperties::class) +@ConditionalOnProperty("job.artifact-access-log-embedding.enabled") +class ArtifactAccessLogEmbeddingJob( + private val aiProperties: AiProperties, + private val properties: ArtifactAccessLogEmbeddingJobProperties, + private val mongoTemplate: MongoTemplate, + private val milvusClient: MilvusClient, + private val embeddingModel: EmbeddingModel, +) : DefaultContextJob(properties) { + + override fun getLockAtMostFor(): Duration = Duration.ofDays(7L) + + override fun doStart0(jobContext: JobContext) { + val lastMonthVectorStore = createVectorStore(1L) + val curMonthVectorStore = createVectorStore(0L) + var lastMonthCollectionExists = lastMonthVectorStore.collectionExists() + val curMonthCollectionExists = curMonthVectorStore.collectionExists() + + if (lastMonthCollectionExists && !curMonthCollectionExists) { + // 可能由于数据生成过程被中断导致存在上月数据不存在当月数据,需要删除重新生成 + lastMonthVectorStore.dropCollection() + lastMonthCollectionExists = false + } + + if (!lastMonthCollectionExists) { + // 上个月的数据不存在时,使用上个月的访问记录生成数据 + logger.info("collection[${lastMonthVectorStore.collectionName()}] not exists, try to create") + lastMonthVectorStore.createCollection() + lastMonthVectorStore.findAccessLogAndInsert(1L) + logger.info("insert data into collection[${lastMonthVectorStore.collectionName()}] success") + } + + if (!curMonthCollectionExists) { + // 当月数据不存在时候,使用月初至今的访问记录生成数据 + logger.info("collection[${curMonthVectorStore.collectionName()}] not exists, try to create") + curMonthVectorStore.createCollection() + curMonthVectorStore.findAccessLogAndInsert(0L, before = LocalDate.now().atStartOfDay()) + } else { + // 已有数据,使用昨日数据生成记录 + logger.info("collection[${curMonthVectorStore.collectionName()}] exists, insert data of last day") + val startOfToday = LocalDate.now().atStartOfDay() + val startOfLastDay = LocalDate.now().minusDays(1L).atStartOfDay() + curMonthVectorStore.findAccessLogAndInsert(0L, after = startOfLastDay, before = startOfToday) + } + logger.info("insert data into collection[${curMonthVectorStore.collectionName()}] success") + + // 删除过期数据 + val deprecatedVectorStore = createVectorStore(2L) + if (deprecatedVectorStore.collectionExists()) { + logger.info("deprecated collection[${deprecatedVectorStore.collectionName()}] exists") + deprecatedVectorStore.dropCollection() + logger.info("drop collection [${deprecatedVectorStore.collectionName()}] success") + } + } + + /** + * 获取访问记录并写入向量数据库 + */ + private fun VectorStore.findAccessLogAndInsert( + minusMonth: Long, + after: LocalDateTime? = null, + before: LocalDateTime? = null + ) { + properties.projects.forEach { projectId -> + processDataBatch(projectId, minusMonth, after, before) { paths -> + val documents = paths.map { Document(content = it, metadata = emptyMap()) } + val elapsed = measureTimeMillis { insert(documents) } + logger.info("[$projectId] insert ${documents.size} data into [${collectionName()}] in $elapsed ms") + } + } + } + + private fun createVectorStore(minusMonth: Long): VectorStore { + val seq = MonthRangeShardingUtils.shardingSequenceFor(LocalDateTime.now().minusMonths(minusMonth), 1) + val collectionName = "${aiProperties.collectionPrefix}$seq" + + val config = MilvusVectorStoreProperties( + databaseName = aiProperties.databaseName, + collectionName = collectionName, + embeddingDimension = embeddingModel.dimensions(), + ) + return MilvusVectorStore(config, milvusClient, embeddingModel) + } + + /** + * 获取有访问记录的路径 + */ + private fun processDataBatch( + projectId: String, + minusMonth: Long, + after: LocalDateTime?, + before: LocalDateTime?, + handler: (paths: Set) -> Unit, + ) { + val collectionName = collectionName(minusMonth) + if (!mongoTemplate.collectionExists(collectionName)) { + logger.warn("mongo collection[$collectionName] not exists") + return + } + val pageSize = properties.batchSize + var lastId = ObjectId(MIN_OBJECT_ID) + var querySize: Int + val criteria = buildCriteria(projectId, after, before) + do { + val query = Query(criteria) + .addCriteria(Criteria.where(ID).gt(lastId)) + .limit(pageSize) + .with(Sort.by(ID).ascending()) + query.fields().include( + TOperateLog::repoName.name, + TOperateLog::resourceKey.name, + TOperateLog::createdDate.name + ) + val data = mongoTemplate.find>(query, collectionName) + if (data.isEmpty()) { + break + } + // 记录制品访问时间 + val accessPaths = data.mapTo(HashSet(pageSize)) { + val repoName = it[TOperateLog::repoName.name] as String + val fullPath = it[TOperateLog::resourceKey.name] as String + val projectRepoFullPath = if (repoName == PIPELINE) { + // 流水线仓库路径/p-xxx/b-xxx/xxx中的构建id不参与相似度计算 + val secondSlashIndex = fullPath.indexOf("/", 1) + val pipelinePath = fullPath.substring(0, secondSlashIndex) + val artifactPath = fullPath.substring(fullPath.indexOf("/", secondSlashIndex + 1)) + pipelinePath + artifactPath + } else { + fullPath + } + "/$projectId/$repoName$projectRepoFullPath" + } + + handler(accessPaths) + querySize = data.size + lastId = data.last()[ID] as ObjectId + } while (querySize == pageSize && shouldRun()) + } + + private fun collectionName(minusMonth: Long): String { + // 查询上个月的记录 + val seq = MonthRangeShardingUtils.shardingSequenceFor(LocalDateTime.now().minusMonths(minusMonth), 1) + return "artifact_oplog_$seq" + } + + private fun buildCriteria(projectId: String, after: LocalDateTime?, before: LocalDateTime?): Criteria { + val criteria = Criteria + .where(TOperateLog::projectId.name).isEqualTo(projectId) + .and(TOperateLog::type.name).isEqualTo(EventType.NODE_DOWNLOADED.name) + if (after != null && before != null) { + criteria.and(TOperateLog::createdDate.name).gte(after).lt(before) + } else { + after?.let { criteria.and(TOperateLog::createdDate.name).gte(it) } + before?.let { criteria.and(TOperateLog::createdDate.name).lt(it) } + } + return criteria + } + + companion object { + private val logger = LoggerFactory.getLogger(ArtifactAccessLogEmbeddingJob::class.java) + } +} diff --git a/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/ArtifactAccessRecordCleanupJob.kt b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ArtifactAccessRecordCleanupJob.kt similarity index 97% rename from src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/ArtifactAccessRecordCleanupJob.kt rename to src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ArtifactAccessRecordCleanupJob.kt index f26671c8b7..7b5411d318 100644 --- a/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/ArtifactAccessRecordCleanupJob.kt +++ b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ArtifactAccessRecordCleanupJob.kt @@ -25,7 +25,7 @@ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package com.tencent.bkrepo.job.batch.task.cache +package com.tencent.bkrepo.job.batch.task.cache.preload import com.tencent.bkrepo.common.artifact.cache.service.impl.ArtifactAccessRecorder import com.tencent.bkrepo.job.batch.base.DefaultContextJob diff --git a/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/ArtifactPreloadPlanExecuteJob.kt b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ArtifactPreloadPlanExecuteJob.kt similarity index 97% rename from src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/ArtifactPreloadPlanExecuteJob.kt rename to src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ArtifactPreloadPlanExecuteJob.kt index bd446340a0..82dfcb5771 100644 --- a/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/ArtifactPreloadPlanExecuteJob.kt +++ b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ArtifactPreloadPlanExecuteJob.kt @@ -25,7 +25,7 @@ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package com.tencent.bkrepo.job.batch.task.cache +package com.tencent.bkrepo.job.batch.task.cache.preload import com.tencent.bkrepo.common.artifact.cache.config.ArtifactPreloadProperties import com.tencent.bkrepo.common.artifact.cache.service.ArtifactPreloadPlanService diff --git a/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/ArtifactPreloadStrategyGenerateJob.kt b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ArtifactPreloadStrategyGenerateJob.kt similarity index 97% rename from src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/ArtifactPreloadStrategyGenerateJob.kt rename to src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ArtifactPreloadStrategyGenerateJob.kt index 91ea190cdf..f1f71f799e 100644 --- a/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/ArtifactPreloadStrategyGenerateJob.kt +++ b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ArtifactPreloadStrategyGenerateJob.kt @@ -25,7 +25,7 @@ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package com.tencent.bkrepo.job.batch.task.cache +package com.tencent.bkrepo.job.batch.task.cache.preload import com.tencent.bkrepo.common.artifact.cache.service.impl.ArtifactAccessRecorder import com.tencent.bkrepo.job.batch.base.DefaultContextJob diff --git a/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ArtifactSimilarityPreloadPlanGenerator.kt b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ArtifactSimilarityPreloadPlanGenerator.kt new file mode 100644 index 0000000000..2264f21423 --- /dev/null +++ b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ArtifactSimilarityPreloadPlanGenerator.kt @@ -0,0 +1,129 @@ +/* + * Tencent is pleased to support the open source community by making BK-CI 蓝鲸持续集成平台 available. + * + * Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-CI 蓝鲸持续集成平台 is licensed under the MIT license. + * + * A copy of the MIT License is included in this file. + * + * + * Terms of the MIT License: + * --------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT + * LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package com.tencent.bkrepo.job.batch.task.cache.preload + +import com.tencent.bkrepo.common.artifact.cache.config.ArtifactPreloadProperties +import com.tencent.bkrepo.common.artifact.cache.pojo.ArtifactPreloadPlan +import com.tencent.bkrepo.common.artifact.cache.pojo.ArtifactPreloadPlanGenerateParam +import com.tencent.bkrepo.common.artifact.cache.service.ArtifactPreloadPlanGenerator +import com.tencent.bkrepo.common.mongo.dao.util.sharding.MonthRangeShardingUtils +import com.tencent.bkrepo.job.batch.task.cache.preload.ai.AiProperties +import com.tencent.bkrepo.job.batch.task.cache.preload.ai.EmbeddingModel +import com.tencent.bkrepo.job.batch.task.cache.preload.ai.MilvusVectorStore +import com.tencent.bkrepo.job.batch.task.cache.preload.ai.MilvusVectorStoreProperties +import com.tencent.bkrepo.job.batch.task.cache.preload.ai.SearchRequest +import com.tencent.bkrepo.job.batch.task.cache.preload.ai.VectorStore +import io.milvus.client.MilvusClient +import org.slf4j.LoggerFactory +import java.time.LocalDateTime +import java.time.ZoneId +import kotlin.random.Random + +class ArtifactSimilarityPreloadPlanGenerator( + private val embeddingModel: EmbeddingModel, + private val milvusClient: MilvusClient, + private val aiProperties: AiProperties, + private val preloadProperties: ArtifactPreloadProperties, +) : ArtifactPreloadPlanGenerator { + override fun generate(param: ArtifactPreloadPlanGenerateParam): ArtifactPreloadPlan? { + with(param) { + val executeTime = calculateExecuteTime(param) ?: return null + val now = LocalDateTime.now() + return ArtifactPreloadPlan( + id = null, + createdDate = now, + lastModifiedDate = now, + strategyId = strategy.id!!, + projectId = projectId, + repoName = repoName, + fullPath = fullPath, + sha256 = sha256, + size = size, + credentialsKey = credentialsKey, + executeTime = executeTime + ) + } + } + + /** + * 根据历史访问记录计算预加载时间 + */ + private fun calculateExecuteTime(param: ArtifactPreloadPlanGenerateParam): Long? { + with(param) { + val preloadHourOfDay = preloadProperties.preloadHourOfDay.sorted().ifEmpty { return null } + + // 查询相似路径,没有相似路径时不执行预加载 + val projectPath = "/$projectId/$repoName$fullPath" + val searchReq = SearchRequest( + query = projectPath, + topK = 10, + similarityThreshold = aiProperties.defaultSimilarityThreshold + ) + val docs = createVectorStore(0L).similaritySearch(searchReq).ifEmpty { + createVectorStore(1L).similaritySearch(searchReq) + } + if (docs.isEmpty()) { + logger.info("no similarity path found for [$projectPath]") + return null + } + + val now = LocalDateTime.now() + val preloadHour = preloadHourOfDay.firstOrNull { it > now.hour } + ?: (preloadHourOfDay.first { (it + 24) > now.hour } + 24) + val preloadTimestamp = now + // 设置预加载时间 + .plusHours((preloadHour - now.hour).toLong()) + .withMinute(0) + // 减去随机时间,避免同时多文件触发加载 + .minusSeconds(Random.nextLong(0, preloadProperties.maxRandomSeconds)) + // 转化为毫秒时间戳 + .atZone(ZoneId.systemDefault()) + .toEpochSecond() * 1000 + logger.info( + "similarity path[${docs.first().content}] found for [$projectPath], will preload on $preloadTimestamp" + ) + return preloadTimestamp + } + } + + private fun createVectorStore(minusMonth: Long): VectorStore { + // 根据上个月的历史访问数据进行预测 + val seq = MonthRangeShardingUtils.shardingSequenceFor(LocalDateTime.now().minusMonths(minusMonth), 1) + val collectionName = "${aiProperties.collectionPrefix}$seq" + val config = MilvusVectorStoreProperties( + databaseName = aiProperties.databaseName, + collectionName = collectionName, + embeddingDimension = embeddingModel.dimensions(), + ) + return MilvusVectorStore(config, milvusClient, embeddingModel) + } + + companion object { + private val logger = LoggerFactory.getLogger(ArtifactSimilarityPreloadPlanGenerator::class.java) + } +} diff --git a/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/PreloadConfig.kt b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/PreloadConfig.kt new file mode 100644 index 0000000000..970a519462 --- /dev/null +++ b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/PreloadConfig.kt @@ -0,0 +1,51 @@ +/* + * Tencent is pleased to support the open source community by making BK-CI 蓝鲸持续集成平台 available. + * + * Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-CI 蓝鲸持续集成平台 is licensed under the MIT license. + * + * A copy of the MIT License is included in this file. + * + * + * Terms of the MIT License: + * --------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT + * LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package com.tencent.bkrepo.job.batch.task.cache.preload + +import com.tencent.bkrepo.common.artifact.cache.config.ArtifactPreloadProperties +import com.tencent.bkrepo.common.artifact.cache.service.ArtifactPreloadPlanGenerator +import com.tencent.bkrepo.job.batch.task.cache.preload.ai.AiProperties +import com.tencent.bkrepo.job.batch.task.cache.preload.ai.EmbeddingModel +import io.milvus.client.MilvusClient +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty +import org.springframework.context.annotation.Bean +import org.springframework.context.annotation.Configuration + +@Configuration +@ConditionalOnProperty("job.artifact-access-log-embedding.enabled") +class PreloadConfig { + @Bean("INTELLIGENT") + fun artifactSimilarityPreloadPlanGenerator( + milvusClient: MilvusClient, + embeddingModel: EmbeddingModel, + aiProperties: AiProperties, + preloadProperties: ArtifactPreloadProperties, + ): ArtifactPreloadPlanGenerator { + return ArtifactSimilarityPreloadPlanGenerator(embeddingModel, milvusClient, aiProperties, preloadProperties) + } +} diff --git a/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/AiProperties.kt b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/AiProperties.kt new file mode 100644 index 0000000000..f3d36f6bfa --- /dev/null +++ b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/AiProperties.kt @@ -0,0 +1,64 @@ +/* + * Tencent is pleased to support the open source community by making BK-CI 蓝鲸持续集成平台 available. + * + * Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-CI 蓝鲸持续集成平台 is licensed under the MIT license. + * + * A copy of the MIT License is included in this file. + * + * + * Terms of the MIT License: + * --------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT + * LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package com.tencent.bkrepo.job.batch.task.cache.preload.ai + +import org.springframework.boot.context.properties.ConfigurationProperties +import java.time.Duration + +@ConfigurationProperties("spring.ai") +data class AiProperties( + /** + * 向量化服务URL + * 服务需要实现[com.tencent.bkrepo.job.batch.task.cache.preload.ai.HttpEmbeddingModel]中调用的接口 + */ + var embeddingServiceUrl: String = "", + /** + * 向量化服务token,会在请求头Authorization中携带用于认证 + */ + var embeddingServiceToken: String = "", + /** + * 模型维度 + */ + var dimenssion: Int = 384, + /** + * 调用向量化接口超时时间 + */ + var embeddingTimeout: Duration = Duration.ofMinutes(1L), + /** + * 向量数据库名 + */ + var databaseName: String = "default", + /** + * 向量数据库表名前缀 + */ + var collectionPrefix: String = "artifact_access_log_", + /** + * 默认相似度阈值,用于过滤相似路径查询结果 + */ + var defaultSimilarityThreshold: Double = 0.95, +) diff --git a/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/Document.kt b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/Document.kt new file mode 100644 index 0000000000..c6d8d98398 --- /dev/null +++ b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/Document.kt @@ -0,0 +1,51 @@ +/* + * Tencent is pleased to support the open source community by making BK-CI 蓝鲸持续集成平台 available. + * + * Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-CI 蓝鲸持续集成平台 is licensed under the MIT license. + * + * A copy of the MIT License is included in this file. + * + * + * Terms of the MIT License: + * --------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT + * LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package com.tencent.bkrepo.job.batch.task.cache.preload.ai + +import org.bson.types.ObjectId + +/** + * 表示一份向量化后的文档及其关联数据 + * + * 升级到jdk17后迁移到spring-ai + */ +data class Document( + /** + * 用于向量化的文档内容 + */ + val content: String, + /** + * 携带的元数据 + * value 只能为string, int, float, boolean + */ + val metadata: Map = emptyMap(), + /** + * 唯一id + */ + val id: String = ObjectId.get().toHexString(), +) diff --git a/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/EmbeddingModel.kt b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/EmbeddingModel.kt new file mode 100644 index 0000000000..efda873a5f --- /dev/null +++ b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/EmbeddingModel.kt @@ -0,0 +1,60 @@ +/* + * Tencent is pleased to support the open source community by making BK-CI 蓝鲸持续集成平台 available. + * + * Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-CI 蓝鲸持续集成平台 is licensed under the MIT license. + * + * A copy of the MIT License is included in this file. + * + * + * Terms of the MIT License: + * --------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT + * LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package com.tencent.bkrepo.job.batch.task.cache.preload.ai + +interface EmbeddingModel { + + fun embed(document: Document): List { + return this.embed(document.content) + } + + /** + * 对指定文本进行向量化 + * + * @param text 需要向量化的文本 + * + * @return [text]对应的向量 + */ + fun embed(text: String): List + + /** + * 批量向量化文本 + * + * @param texts 文本列表 + * + * @return 向量化结果列表 + */ + fun embed(texts: List): List> + + /** + * 获取模型生成的向量维度 + */ + fun dimensions(): Int { + return this.embed("Test String").size + } +} diff --git a/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/HttpEmbeddingModel.kt b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/HttpEmbeddingModel.kt new file mode 100644 index 0000000000..30c6663775 --- /dev/null +++ b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/HttpEmbeddingModel.kt @@ -0,0 +1,91 @@ +/* + * Tencent is pleased to support the open source community by making BK-CI 蓝鲸持续集成平台 available. + * + * Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-CI 蓝鲸持续集成平台 is licensed under the MIT license. + * + * A copy of the MIT License is included in this file. + * + * + * Terms of the MIT License: + * --------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT + * LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package com.tencent.bkrepo.job.batch.task.cache.preload.ai + +import com.tencent.bkrepo.common.api.constant.BEARER_AUTH_PREFIX +import com.tencent.bkrepo.common.api.constant.HttpHeaders +import com.tencent.bkrepo.common.api.constant.MediaTypes +import com.tencent.bkrepo.common.api.util.readJsonString +import com.tencent.bkrepo.common.api.util.toJsonString +import com.tencent.bkrepo.common.storage.innercos.http.toRequestBody +import okhttp3.MediaType.Companion.toMediaType +import okhttp3.OkHttpClient +import okhttp3.Request +import org.slf4j.LoggerFactory +import java.time.Duration + +class HttpEmbeddingModel( + private val properties: AiProperties, +) : EmbeddingModel { + private val client: OkHttpClient by lazy { + OkHttpClient.Builder() + .readTimeout(properties.embeddingTimeout) + .writeTimeout(Duration.ofSeconds(15L)) + .connectTimeout(Duration.ofSeconds(15L)) + .retryOnConnectionFailure(true) + .build() + } + + override fun embed(text: String): List { + return embed(listOf(text)).first() + } + + override fun embed(texts: List): List> { + val start = System.currentTimeMillis() + val body = EmbeddingRequest(texts).toJsonString().toRequestBody(MediaTypes.APPLICATION_JSON.toMediaType()) + val req = buildReq("/embeddings").post(body).build() + val res = client.newCall(req).execute() + logger.info("embed [${texts.size}] texts in ${System.currentTimeMillis() - start} ms") + if (res.isSuccessful) { + return res.body!!.byteStream().readJsonString().data + } else { + val message = res.body?.string() + throw RuntimeException("embedding failed: $message, code[${res.code}]") + } + } + + override fun dimensions(): Int { + return properties.dimenssion + } + + private fun buildReq(api: String) = Request.Builder() + .url("${properties.embeddingServiceUrl}$api") + .header(HttpHeaders.AUTHORIZATION, "$BEARER_AUTH_PREFIX${properties.embeddingServiceToken}") + + private data class EmbeddingRequest( + val input: List + ) + + private data class EmbeddingResponse( + val data: List> + ) + + companion object { + private val logger = LoggerFactory.getLogger(HttpEmbeddingModel::class.java) + } +} diff --git a/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/MilvusClientProperties.kt b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/MilvusClientProperties.kt new file mode 100644 index 0000000000..4d078c9c44 --- /dev/null +++ b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/MilvusClientProperties.kt @@ -0,0 +1,46 @@ +/* + * Tencent is pleased to support the open source community by making BK-CI 蓝鲸持续集成平台 available. + * + * Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-CI 蓝鲸持续集成平台 is licensed under the MIT license. + * + * A copy of the MIT License is included in this file. + * + * + * Terms of the MIT License: + * --------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT + * LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package com.tencent.bkrepo.job.batch.task.cache.preload.ai + +import org.springframework.boot.context.properties.ConfigurationProperties +import java.util.concurrent.TimeUnit + +@ConfigurationProperties("spring.ai.vectorstore.milvus.client") +data class MilvusClientProperties( + var host: String = "localhost", + var port: Int = 19530, + var uri: String? = null, + var token: String? = null, + var connectTimeoutMs: Long = 10000L, + var keepAliveTimeMs: Long = 55000L, + var keepAliveTimeoutMs: Long = 20000L, + var rpcDeadlineMs: Long = 0L, + var idleTimeoutMs: Long = TimeUnit.MILLISECONDS.convert(24L, TimeUnit.HOURS), + var username: String = "root", + var password: String = "milvus" +) diff --git a/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/MilvusVectorStore.kt b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/MilvusVectorStore.kt new file mode 100644 index 0000000000..2fe6da4b49 --- /dev/null +++ b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/MilvusVectorStore.kt @@ -0,0 +1,298 @@ +/* + * Tencent is pleased to support the open source community by making BK-CI 蓝鲸持续集成平台 available. + * + * Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-CI 蓝鲸持续集成平台 is licensed under the MIT license. + * + * A copy of the MIT License is included in this file. + * + * + * Terms of the MIT License: + * --------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT + * LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package com.tencent.bkrepo.job.batch.task.cache.preload.ai + +import com.alibaba.fastjson.JSONObject +import io.milvus.client.MilvusClient +import io.milvus.common.clientenum.ConsistencyLevelEnum +import io.milvus.grpc.DataType +import io.milvus.param.MetricType +import io.milvus.param.R +import io.milvus.param.collection.CreateCollectionParam +import io.milvus.param.collection.DropCollectionParam +import io.milvus.param.collection.FieldType +import io.milvus.param.collection.FlushParam +import io.milvus.param.collection.HasCollectionParam +import io.milvus.param.collection.LoadCollectionParam +import io.milvus.param.dml.DeleteParam +import io.milvus.param.dml.InsertParam +import io.milvus.param.dml.SearchParam +import io.milvus.param.index.CreateIndexParam +import io.milvus.param.index.DescribeIndexParam +import io.milvus.response.QueryResultsWrapper +import io.milvus.response.SearchResultsWrapper +import org.slf4j.LoggerFactory +import org.springframework.beans.factory.InitializingBean + +/** + * 升级到jdk17后迁移到spring-ai + */ +class MilvusVectorStore( + private val config: MilvusVectorStoreProperties, + private val milvusClient: MilvusClient, + private val embeddingModel: EmbeddingModel, +) : VectorStore, InitializingBean { + + override fun insert(documents: List) { + val docIdArray = ArrayList() + val contentArray = ArrayList() + val metadataArray = ArrayList() + val embeddingArray = embeddingModel.embed(documents.map { it.content }) + + for (document in documents) { + docIdArray.add(document.id) + // Use a (future) DocumentTextLayoutFormatter instance to extract + // the content used to compute the embeddings + contentArray.add(document.content) + metadataArray.add(JSONObject(document.metadata)) + } + + val fields: MutableList = ArrayList() + fields.add(InsertParam.Field(DOC_ID_FIELD_NAME, docIdArray)) + fields.add(InsertParam.Field(CONTENT_FIELD_NAME, contentArray)) + fields.add(InsertParam.Field(METADATA_FIELD_NAME, metadataArray)) + fields.add(InsertParam.Field(EMBEDDING_FIELD_NAME, embeddingArray)) + + val insertParam = InsertParam.newBuilder() + .withDatabaseName(config.databaseName) + .withCollectionName(config.collectionName) + .withFields(fields) + .build() + + val status = milvusClient.insert(insertParam) + if (status.exception != null) { + throw RuntimeException("Failed to insert:", status.exception) + } + milvusClient.flush( + FlushParam.newBuilder() + .withDatabaseName(config.databaseName) + .addCollectionName(config.collectionName) + .build() + ) + } + + override fun delete(ids: Set): Boolean { + val deleteExpression = "$DOC_ID_FIELD_NAME in [${ids.joinToString(",") { "'$it'" }}]" + val status = milvusClient.delete( + DeleteParam.newBuilder() + .withCollectionName(config.collectionName) + .withExpr(deleteExpression) + .build() + ) + + val deleteCount = status.data.deleteCnt + if (deleteCount != ids.size.toLong()) { + logger.warn(String.format("Deleted only %s entries from requested %s ", deleteCount, ids.size)) + } + + return status.status == R.Status.Success.code + } + + override fun similaritySearch(request: SearchRequest): List { + val nativeFilterExpressions = request.filterExpression ?: "" + val embedding: List = embeddingModel.embed(request.query) + + val searchParamBuilder = SearchParam.newBuilder() + .withDatabaseName(config.databaseName) + .withCollectionName(config.collectionName) + .withConsistencyLevel(ConsistencyLevelEnum.STRONG) + .withMetricType(config.metricType) + .withOutFields(SEARCH_OUTPUT_FIELDS) + .withTopK(request.topK) + .withFloatVectors(listOf(embedding)) + .withVectorFieldName(EMBEDDING_FIELD_NAME) + + if (nativeFilterExpressions.isNotBlank()) { + searchParamBuilder.withExpr(nativeFilterExpressions) + } + + val respSearch = milvusClient.search(searchParamBuilder.build()) + + if (respSearch.exception != null) { + throw RuntimeException("Search failed!", respSearch.exception) + } + + val wrapperSearch = SearchResultsWrapper(respSearch.data.results) + + return wrapperSearch.getRowRecords(0) + .filter { getResultSimilarity(it) >= request.similarityThreshold } + .map { rowRecord -> + val docId = rowRecord[DOC_ID_FIELD_NAME] as String + val content = rowRecord[CONTENT_FIELD_NAME] as String + val metadata = rowRecord[METADATA_FIELD_NAME] as JSONObject + // inject the distance into the metadata. + metadata[DISTANCE_FIELD_NAME] = 1 - getResultSimilarity(rowRecord) + Document(content, metadata.innerMap, docId) + } + .toList() + } + + private fun getResultSimilarity(rowRecord: QueryResultsWrapper.RowRecord): Float { + val distance = rowRecord[DISTANCE_FIELD_NAME] as Float + return if ((config.metricType == MetricType.IP || config.metricType == MetricType.COSINE)) { + distance + } else { + (1 - distance) + } + } + + override fun afterPropertiesSet() { + this.createCollection() + } + + override fun collectionExists(): Boolean { + return milvusClient.hasCollection( + HasCollectionParam.newBuilder() + .withDatabaseName(config.databaseName) + .withCollectionName(config.collectionName) + .build() + ).data + } + + override fun collectionName(): String { + return config.collectionName + } + + // used by the test as well + override fun createCollection(): Boolean { + var created = false + if (!collectionExists()) { + val docIdFieldType = FieldType.newBuilder() + .withName(DOC_ID_FIELD_NAME) + .withDataType(DataType.VarChar) + .withMaxLength(36) + .withPrimaryKey(true) + .withAutoID(false) + .build() + val contentFieldType = FieldType.newBuilder() + .withName(CONTENT_FIELD_NAME) + .withDataType(DataType.VarChar) + .withMaxLength(65535) + .build() + val metadataFieldType = FieldType.newBuilder() + .withName(METADATA_FIELD_NAME) + .withDataType(DataType.JSON) + .build() + val embeddingFieldType = FieldType.newBuilder() + .withName(EMBEDDING_FIELD_NAME) + .withDataType(DataType.FloatVector) + .withDimension(this.embeddingDimensions()) + .build() + + val createCollectionReq = CreateCollectionParam.newBuilder() + .withDatabaseName(this.config.databaseName) + .withCollectionName(this.config.collectionName) + .withDescription("Spring AI Vector Store") + .withConsistencyLevel(ConsistencyLevelEnum.STRONG) + .withShardsNum(2) + .addFieldType(docIdFieldType) + .addFieldType(contentFieldType) + .addFieldType(metadataFieldType) + .addFieldType(embeddingFieldType) + .build() + + val collectionStatus = milvusClient.createCollection(createCollectionReq) + if (collectionStatus.exception != null) { + throw RuntimeException("Failed to create collection", collectionStatus.exception) + } + created = true + } + + val indexDescriptionResponse = milvusClient + .describeIndex( + DescribeIndexParam.newBuilder() + .withDatabaseName(this.config.databaseName) + .withCollectionName(this.config.collectionName) + .build() + ) + + if (indexDescriptionResponse.data == null) { + val indexStatus = milvusClient.createIndex( + CreateIndexParam.newBuilder() + .withDatabaseName(this.config.databaseName) + .withCollectionName(this.config.collectionName) + .withFieldName(EMBEDDING_FIELD_NAME) + .withIndexType(this.config.indexType) + .withMetricType(this.config.metricType) + .withExtraParam(this.config.indexParameters) + .withSyncMode(false) + .build() + ) + + if (indexStatus.exception != null) { + throw RuntimeException("Failed to create Index", indexStatus.exception) + } + } + + val loadCollectionStatus = milvusClient.loadCollection( + LoadCollectionParam.newBuilder() + .withDatabaseName(this.config.databaseName) + .withCollectionName(this.config.collectionName) + .build() + ) + + if (loadCollectionStatus.exception != null) { + throw RuntimeException("Collection loading failed!", loadCollectionStatus.exception) + } + return created + } + + override fun dropCollection() { + val exception = milvusClient.dropCollection( + DropCollectionParam.newBuilder() + .withDatabaseName(config.databaseName) + .withCollectionName(config.collectionName) + .build() + ).exception + if (exception != null) { + throw RuntimeException("Failed to drop collection[${config.collectionName}]", exception) + } + } + + private fun embeddingDimensions(): Int { + if (config.embeddingDimension != INVALID_EMBEDDING_DIMENSION) { + return config.embeddingDimension + } + return embeddingModel.dimensions() + } + + companion object { + private val logger = LoggerFactory.getLogger(MilvusVectorStore::class.java) + + const val INVALID_EMBEDDING_DIMENSION: Int = -1 + const val DOC_ID_FIELD_NAME: String = "doc_id" + const val CONTENT_FIELD_NAME: String = "content" + const val METADATA_FIELD_NAME: String = "metadata" + const val EMBEDDING_FIELD_NAME: String = "embedding" + + // Metadata, automatically assigned by Milvus. + const val DISTANCE_FIELD_NAME: String = "distance" + + val SEARCH_OUTPUT_FIELDS = listOf(DOC_ID_FIELD_NAME, CONTENT_FIELD_NAME, METADATA_FIELD_NAME) + } +} diff --git a/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/MilvusVectorStoreProperties.kt b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/MilvusVectorStoreProperties.kt new file mode 100644 index 0000000000..06cbb1cf70 --- /dev/null +++ b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/MilvusVectorStoreProperties.kt @@ -0,0 +1,40 @@ +/* + * Tencent is pleased to support the open source community by making BK-CI 蓝鲸持续集成平台 available. + * + * Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-CI 蓝鲸持续集成平台 is licensed under the MIT license. + * + * A copy of the MIT License is included in this file. + * + * + * Terms of the MIT License: + * --------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT + * LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package com.tencent.bkrepo.job.batch.task.cache.preload.ai + +import io.milvus.param.IndexType +import io.milvus.param.MetricType + +data class MilvusVectorStoreProperties( + var databaseName: String = "default", + var collectionName: String = "vector_store", + var embeddingDimension: Int = 1536, + var indexType: IndexType = IndexType.IVF_FLAT, + var metricType: MetricType = MetricType.COSINE, + var indexParameters: String = "{\"nlist\":1024}", +) diff --git a/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/SearchRequest.kt b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/SearchRequest.kt new file mode 100644 index 0000000000..906ba40219 --- /dev/null +++ b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/SearchRequest.kt @@ -0,0 +1,40 @@ +/* + * Tencent is pleased to support the open source community by making BK-CI 蓝鲸持续集成平台 available. + * + * Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-CI 蓝鲸持续集成平台 is licensed under the MIT license. + * + * A copy of the MIT License is included in this file. + * + * + * Terms of the MIT License: + * --------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT + * LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package com.tencent.bkrepo.job.batch.task.cache.preload.ai + +data class SearchRequest( + var query: String, + var topK: Int = DEFAULT_TOP_K, + var similarityThreshold: Double = SIMILARITY_THRESHOLD_ACCEPT_ALL, + var filterExpression: String? = null +) { + companion object { + private const val DEFAULT_TOP_K = 4 + private const val SIMILARITY_THRESHOLD_ACCEPT_ALL = 0.0 + } +} diff --git a/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/SpringAiConfiguration.kt b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/SpringAiConfiguration.kt new file mode 100644 index 0000000000..3772bfed2d --- /dev/null +++ b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/SpringAiConfiguration.kt @@ -0,0 +1,76 @@ +/* + * Tencent is pleased to support the open source community by making BK-CI 蓝鲸持续集成平台 available. + * + * Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-CI 蓝鲸持续集成平台 is licensed under the MIT license. + * + * A copy of the MIT License is included in this file. + * + * + * Terms of the MIT License: + * --------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT + * LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package com.tencent.bkrepo.job.batch.task.cache.preload.ai + +import io.milvus.client.MilvusClient +import io.milvus.client.MilvusServiceClient +import io.milvus.param.ConnectParam +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty +import org.springframework.boot.context.properties.EnableConfigurationProperties +import org.springframework.context.annotation.Bean +import org.springframework.context.annotation.Configuration +import java.util.concurrent.TimeUnit + +@Configuration +@EnableConfigurationProperties( + AiProperties::class, + MilvusClientProperties::class +) +@ConditionalOnProperty("job.artifact-access-log-embedding.enabled") +class SpringAiConfiguration { + + @Bean + fun httpEmbeddingModel(properties: AiProperties): EmbeddingModel { + return HttpEmbeddingModel(properties) + } + + @Bean + fun milvusClient(properties: MilvusClientProperties): MilvusClient { + with(properties) { + val builder = ConnectParam.newBuilder() + .withHost(host) + .withPort(port) + .withUri(uri) + .withConnectTimeout(connectTimeoutMs, TimeUnit.MILLISECONDS) + .withKeepAliveTime(keepAliveTimeMs, TimeUnit.MILLISECONDS) + .withKeepAliveTimeout(keepAliveTimeoutMs, TimeUnit.MILLISECONDS) + .withRpcDeadline(rpcDeadlineMs, TimeUnit.MILLISECONDS) + .withIdleTimeout(idleTimeoutMs, TimeUnit.MILLISECONDS) + + if (username.isNotEmpty() && password.isNotEmpty()) { + builder.withAuthorization(username, password) + } + + if (!token.isNullOrEmpty()) { + builder.withToken(token) + } + + return MilvusServiceClient(builder.build()) + } + } +} diff --git a/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/VectorStore.kt b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/VectorStore.kt new file mode 100644 index 0000000000..2a0c48cb1b --- /dev/null +++ b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/batch/task/cache/preload/ai/VectorStore.kt @@ -0,0 +1,56 @@ +/* + * Tencent is pleased to support the open source community by making BK-CI 蓝鲸持续集成平台 available. + * + * Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-CI 蓝鲸持续集成平台 is licensed under the MIT license. + * + * A copy of the MIT License is included in this file. + * + * + * Terms of the MIT License: + * --------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT + * LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package com.tencent.bkrepo.job.batch.task.cache.preload.ai + + +interface VectorStore { + + /** + * 插入文档 + */ + fun insert(documents: List) + + /** + * 通过id批量删除文档 + */ + fun delete(ids: Set): Boolean + + fun similaritySearch(request: SearchRequest): List + + fun similaritySearch(query: String): List { + return this.similaritySearch(SearchRequest(query)) + } + + fun createCollection(): Boolean + + fun dropCollection() + + fun collectionExists(): Boolean + + fun collectionName(): String +} diff --git a/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/config/properties/ArtifactAccessLogEmbeddingJobProperties.kt b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/config/properties/ArtifactAccessLogEmbeddingJobProperties.kt new file mode 100644 index 0000000000..497f532d27 --- /dev/null +++ b/src/backend/job/biz-job/src/main/kotlin/com/tencent/bkrepo/job/config/properties/ArtifactAccessLogEmbeddingJobProperties.kt @@ -0,0 +1,40 @@ +/* + * Tencent is pleased to support the open source community by making BK-CI 蓝鲸持续集成平台 available. + * + * Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. + * + * BK-CI 蓝鲸持续集成平台 is licensed under the MIT license. + * + * A copy of the MIT License is included in this file. + * + * + * Terms of the MIT License: + * --------------------------------------------------- + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of + * the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT + * LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN + * NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package com.tencent.bkrepo.job.config.properties + +import org.springframework.boot.context.properties.ConfigurationProperties + +@ConfigurationProperties("job.artifact-access-log-embedding") +class ArtifactAccessLogEmbeddingJobProperties( + override var enabled: Boolean = false, + override var cron: String = "0 1 0 * * ?", + /** + * 需要将访问记录保存到向量数据库的项目 + */ + var projects: Set = emptySet() +) : MongodbJobProperties(enabled) diff --git a/src/backend/repository/biz-repository/src/main/kotlin/com/tencent/bkrepo/repository/controller/user/UserArtifactPreloadController.kt b/src/backend/repository/biz-repository/src/main/kotlin/com/tencent/bkrepo/repository/controller/user/UserArtifactPreloadController.kt index f6a4f7daac..949cfabc85 100644 --- a/src/backend/repository/biz-repository/src/main/kotlin/com/tencent/bkrepo/repository/controller/user/UserArtifactPreloadController.kt +++ b/src/backend/repository/biz-repository/src/main/kotlin/com/tencent/bkrepo/repository/controller/user/UserArtifactPreloadController.kt @@ -39,7 +39,6 @@ import com.tencent.bkrepo.common.artifact.cache.pojo.ArtifactPreloadPlan import com.tencent.bkrepo.common.artifact.cache.pojo.ArtifactPreloadStrategy import com.tencent.bkrepo.common.artifact.cache.pojo.ArtifactPreloadStrategyCreateRequest import com.tencent.bkrepo.common.artifact.cache.pojo.ArtifactPreloadStrategyUpdateRequest -import com.tencent.bkrepo.common.artifact.cache.pojo.PreloadStrategyType import com.tencent.bkrepo.common.artifact.cache.service.ArtifactPreloadPlanService import com.tencent.bkrepo.common.artifact.cache.service.ArtifactPreloadStrategyService import com.tencent.bkrepo.common.mongo.dao.util.Pages @@ -77,7 +76,7 @@ class UserArtifactPreloadController( checkPreloadEnabled(preloadPlanService, preloadStrategyService) permissionManager.checkRepoPermission(PermissionAction.MANAGE, request.projectId, request.repoName) val strategy = preloadStrategyService.create( - request.copy(operator = SecurityUtils.getUserId(), type = PreloadStrategyType.CUSTOM.name) + request.copy(operator = SecurityUtils.getUserId()) ) return ResponseBuilder.success(strategy) }