From 5235041397a63e671d0d847c371fe6a88e000d71 Mon Sep 17 00:00:00 2001 From: Daniel Getu Date: Tue, 12 Nov 2024 10:49:30 -0800 Subject: [PATCH] [Search] Regenerate with 2024-11-01-preview spec --- sdk/search/search-documents/CHANGELOG.md | 19 ++ .../review/search-documents.api.md | 157 +++++++++++++-- .../src/generated/data/models/index.ts | 142 ++++++++++--- .../src/generated/data/models/mappers.ts | 114 +++++++++++ .../src/generated/data/models/parameters.ts | 10 + .../generated/data/operations/documents.ts | 1 + .../src/generated/data/searchClient.ts | 6 +- .../src/generated/service/models/index.ts | 188 +++++++++++++++++- .../src/generated/service/models/mappers.ts | 145 ++++++++++++++ .../generated/service/searchServiceClient.ts | 6 +- sdk/search/search-documents/src/index.ts | 27 ++- .../search-documents/src/indexModels.ts | 41 +++- .../search-documents/src/searchClient.ts | 55 ++++- .../search-documents/src/serviceModels.ts | 37 +++- .../search-documents/src/serviceUtils.ts | 57 +++++- sdk/search/search-documents/swagger/Data.md | 2 +- .../search-documents/swagger/Service.md | 2 +- .../test/public/node/searchClient.spec.ts | 7 +- .../test/public/typeDefinitions.ts | 1 + 19 files changed, 929 insertions(+), 88 deletions(-) diff --git a/sdk/search/search-documents/CHANGELOG.md b/sdk/search/search-documents/CHANGELOG.md index e74d865312b9..166bc2123041 100644 --- a/sdk/search/search-documents/CHANGELOG.md +++ b/sdk/search/search-documents/CHANGELOG.md @@ -1,5 +1,24 @@ # Release History +## 12.3.0-beta.1 (2024-11-18) + +### Features Added + +- Added generative query rewriting for semantic and vectorized text queries [#31792](https://github.com/Azure/azure-sdk-for-js/pull/31792) + - Use the new `queryRewrites` field in `SemanticSearchOptions` and `VectorizedTextQuery`. +- Added hierarchical aggregation and facet filtering [#31792](https://github.com/Azure/azure-sdk-for-js/pull/31792) + - `FacetResult` now includes a recursive `facets` field. +- Added configuration for storing vectors in their uncompressed state for rescoring [#31792](https://github.com/Azure/azure-sdk-for-js/pull/31792) + - Configure through `VectorSearchCompression.rescoringOptions`. +- Added Markdown parsing mode for indexers [#31792](https://github.com/Azure/azure-sdk-for-js/pull/31792) + - Configure through the `markdownParsingSubmode` and `markdownHeaderDepth` properties of `IndexingParametersConfiguration`. +- Added `DocumentIntelligenceLayoutSkill` [#31792](https://github.com/Azure/azure-sdk-for-js/pull/31792) +- Added subdomain billing for skillsets [#31792](https://github.com/Azure/azure-sdk-for-js/pull/31792) + +### Bugs Fixed + +- Fixed the type of `SearchResult.documentDebugInfo` to not erroneously describe it as an array [#31792](https://github.com/Azure/azure-sdk-for-js/pull/31792) + ## 12.2.0-beta.1 (2024-09-25) ### Breaking Changes diff --git a/sdk/search/search-documents/review/search-documents.api.md b/sdk/search/search-documents/review/search-documents.api.md index 6a0f9af6182c..24aa55f66481 100644 --- a/sdk/search/search-documents/review/search-documents.api.md +++ b/sdk/search/search-documents/review/search-documents.api.md @@ -15,6 +15,20 @@ import type { Pipeline } from '@azure/core-rest-pipeline'; import type { RestError } from '@azure/core-rest-pipeline'; import type { TokenCredential } from '@azure/core-auth'; +// @public +export interface AIServicesAccountIdentity extends BaseCognitiveServicesAccount { + identity?: SearchIndexerDataIdentity; + odatatype: "#Microsoft.Azure.Search.AIServicesByIdentity"; + subdomainUrl: string; +} + +// @public +export interface AIServicesAccountKey extends BaseCognitiveServicesAccount { + key: string; + odatatype: "#Microsoft.Azure.Search.AIServicesByKey"; + subdomainUrl: string; +} + // @public export interface AIServicesVisionParameters { apiKey?: string; @@ -170,7 +184,7 @@ export interface BaseCharFilter { // @public export interface BaseCognitiveServicesAccount { description?: string; - odatatype: "#Microsoft.Azure.Search.DefaultCognitiveServices" | "#Microsoft.Azure.Search.CognitiveServicesByKey"; + odatatype: "#Microsoft.Azure.Search.DefaultCognitiveServices" | "#Microsoft.Azure.Search.CognitiveServicesByKey" | "#Microsoft.Azure.Search.AIServicesByKey" | "#Microsoft.Azure.Search.AIServicesByIdentity"; } // @public @@ -220,7 +234,7 @@ export interface BaseSearchIndexerSkill { description?: string; inputs: InputFieldMappingEntry[]; name?: string; - odatatype: "#Microsoft.Skills.Util.ConditionalSkill" | "#Microsoft.Skills.Text.KeyPhraseExtractionSkill" | "#Microsoft.Skills.Vision.OcrSkill" | "#Microsoft.Skills.Vision.ImageAnalysisSkill" | "#Microsoft.Skills.Text.LanguageDetectionSkill" | "#Microsoft.Skills.Util.ShaperSkill" | "#Microsoft.Skills.Text.MergeSkill" | "#Microsoft.Skills.Text.EntityRecognitionSkill" | "#Microsoft.Skills.Text.SentimentSkill" | "#Microsoft.Skills.Text.V3.SentimentSkill" | "#Microsoft.Skills.Text.V3.EntityLinkingSkill" | "#Microsoft.Skills.Text.V3.EntityRecognitionSkill" | "#Microsoft.Skills.Text.PIIDetectionSkill" | "#Microsoft.Skills.Text.SplitSkill" | "#Microsoft.Skills.Text.CustomEntityLookupSkill" | "#Microsoft.Skills.Text.TranslationSkill" | "#Microsoft.Skills.Util.DocumentExtractionSkill" | "#Microsoft.Skills.Custom.WebApiSkill" | "#Microsoft.Skills.Custom.AmlSkill" | "#Microsoft.Skills.Text.AzureOpenAIEmbeddingSkill" | "#Microsoft.Skills.Vision.VectorizeSkill"; + odatatype: "#Microsoft.Skills.Util.ConditionalSkill" | "#Microsoft.Skills.Text.KeyPhraseExtractionSkill" | "#Microsoft.Skills.Vision.OcrSkill" | "#Microsoft.Skills.Vision.ImageAnalysisSkill" | "#Microsoft.Skills.Text.LanguageDetectionSkill" | "#Microsoft.Skills.Util.ShaperSkill" | "#Microsoft.Skills.Text.MergeSkill" | "#Microsoft.Skills.Text.EntityRecognitionSkill" | "#Microsoft.Skills.Text.SentimentSkill" | "#Microsoft.Skills.Text.V3.SentimentSkill" | "#Microsoft.Skills.Text.V3.EntityLinkingSkill" | "#Microsoft.Skills.Text.V3.EntityRecognitionSkill" | "#Microsoft.Skills.Text.PIIDetectionSkill" | "#Microsoft.Skills.Text.SplitSkill" | "#Microsoft.Skills.Text.CustomEntityLookupSkill" | "#Microsoft.Skills.Text.TranslationSkill" | "#Microsoft.Skills.Util.DocumentExtractionSkill" | "#Microsoft.Skills.Util.DocumentIntelligenceLayoutSkill" | "#Microsoft.Skills.Custom.WebApiSkill" | "#Microsoft.Skills.Custom.AmlSkill" | "#Microsoft.Skills.Text.AzureOpenAIEmbeddingSkill" | "#Microsoft.Skills.Vision.VectorizeSkill"; outputs: OutputFieldMappingEntry[]; } @@ -245,7 +259,7 @@ export interface BaseSearchRequestOptions; sessionId?: string; skip?: number; - speller?: Speller; + speller?: QuerySpeller; top?: number; vectorSearchOptions?: VectorSearchOptions; } @@ -280,6 +294,7 @@ export interface BaseVectorSearchCompression { defaultOversampling?: number; kind: "scalarQuantization" | "binaryQuantization"; rerankWithOriginalVectors?: boolean; + rescoringOptions?: RescoringOptions; truncationDimension?: number; } @@ -346,7 +361,7 @@ export interface ClassicTokenizer extends BaseLexicalTokenizer { } // @public -export type CognitiveServicesAccount = DefaultCognitiveServicesAccount | CognitiveServicesAccountKey; +export type CognitiveServicesAccount = DefaultCognitiveServicesAccount | CognitiveServicesAccountKey | AIServicesAccountKey | AIServicesAccountIdentity; // @public export interface CognitiveServicesAccountKey extends BaseCognitiveServicesAccount { @@ -503,6 +518,11 @@ export type DataChangeDetectionPolicy = HighWaterMarkChangeDetectionPolicy | Sql // @public export type DataDeletionDetectionPolicy = SoftDeleteColumnDeletionDetectionPolicy | NativeBlobSoftDeleteDeletionDetectionPolicy; +// @public +export interface DebugInfo { + readonly queryRewrites?: QueryRewritesDebugInfo; +} + // @public export const DEFAULT_BATCH_SIZE: number; @@ -588,6 +608,19 @@ export interface DocumentExtractionSkill extends BaseSearchIndexerSkill { parsingMode?: string; } +// @public +export interface DocumentIntelligenceLayoutSkill extends BaseSearchIndexerSkill { + markdownHeaderDepth?: DocumentIntelligenceLayoutSkillMarkdownHeaderDepth; + odatatype: "#Microsoft.Skills.Util.DocumentIntelligenceLayoutSkill"; + outputMode?: DocumentIntelligenceLayoutSkillOutputMode; +} + +// @public +export type DocumentIntelligenceLayoutSkillMarkdownHeaderDepth = string; + +// @public +export type DocumentIntelligenceLayoutSkillOutputMode = string; + // @public export interface EdgeNGramTokenFilter { maxGram?: number; @@ -670,6 +703,7 @@ export interface ExtractiveQueryAnswer { // (undocumented) answerType: "extractive"; count?: number; + maxCharLength?: number; threshold?: number; } @@ -679,12 +713,16 @@ export interface ExtractiveQueryCaption { captionType: "extractive"; // (undocumented) highlight?: boolean; + maxCharLength?: number; } // @public export interface FacetResult { [property: string]: any; readonly count?: number; + readonly facets?: { + [propertyName: string]: FacetResult[]; + }; } // @public @@ -713,6 +751,14 @@ export interface FreshnessScoringParameters { boostingDuration: string; } +// @public (undocumented) +export interface GenerativeQueryRewrites { + // (undocumented) + count?: number; + // (undocumented) + rewritesType: "generative"; +} + // @public export class GeographyPoint { constructor(geographyPoint: { @@ -907,6 +953,8 @@ export interface IndexingParametersConfiguration { imageAction?: BlobIndexerImageAction; indexedFileNameExtensions?: string; indexStorageMetadataOnlyForOversizedDocuments?: boolean; + markdownHeaderDepth?: MarkdownHeaderDepth; + markdownParsingSubmode?: MarkdownParsingSubmode; parsingMode?: BlobIndexerParsingMode; pdfTextRotationAlgorithm?: BlobIndexerPDFTextRotationAlgorithm; queryTimeout?: string; @@ -1117,6 +1165,7 @@ export enum KnownBlobIndexerParsingMode { Json = "json", JsonArray = "jsonArray", JsonLines = "jsonLines", + Markdown = "markdown", Text = "text" } @@ -1144,6 +1193,21 @@ export enum KnownCustomEntityLookupSkillLanguage { Pt = "pt" } +// @public +export enum KnownDocumentIntelligenceLayoutSkillMarkdownHeaderDepth { + H1 = "h1", + H2 = "h2", + H3 = "h3", + H4 = "h4", + H5 = "h5", + H6 = "h6" +} + +// @public +export enum KnownDocumentIntelligenceLayoutSkillOutputMode { + OneToMany = "oneToMany" +} + // @public export enum KnownEntityCategory { Datetime = "datetime", @@ -1401,6 +1465,22 @@ enum KnownLexicalNormalizerName { export { KnownLexicalNormalizerName } export { KnownLexicalNormalizerName as KnownNormalizerNames } +// @public +export enum KnownMarkdownHeaderDepth { + H1 = "h1", + H2 = "h2", + H3 = "h3", + H4 = "h4", + H5 = "h5", + H6 = "h6" +} + +// @public +export enum KnownMarkdownParsingSubmode { + OneToMany = "oneToMany", + OneToOne = "oneToOne" +} + // @public export enum KnownOcrLineEnding { CarriageReturn = "carriageReturn", @@ -1591,8 +1671,11 @@ export enum KnownPIIDetectionSkillMaskingMode { // @public export enum KnownQueryDebugMode { + All = "all", Disabled = "disabled", - Semantic = "semantic" + QueryRewrites = "queryRewrites", + Semantic = "semantic", + Vector = "vector" } // @public @@ -1672,7 +1755,7 @@ export enum KnownQueryLanguage { } // @public -export enum KnownQuerySpellerType { +export enum KnownQuerySpeller { Lexicon = "lexicon", None = "none" } @@ -1744,6 +1827,11 @@ export enum KnownSemanticFieldState { Used = "used" } +// @public +export enum KnownSemanticQueryRewritesResultType { + OriginalQueryOnly = "originalQueryOnly" +} + // @public export enum KnownSemanticSearchResultsType { BaseResults = "baseResults", @@ -1769,12 +1857,6 @@ export enum KnownSentimentSkillLanguage { Tr = "tr" } -// @public -export enum KnownSpeller { - Lexicon = "lexicon", - None = "none" -} - // @public export enum KnownSplitSkillEncoderModelName { CL100KBase = "cl100k_base", @@ -2002,6 +2084,12 @@ export enum KnownVectorSearchCompressionKind { ScalarQuantization = "scalarQuantization" } +// @public +export enum KnownVectorSearchCompressionRescoreStorageMethod { + DiscardOriginals = "discardOriginals", + PreserveOriginals = "preserveOriginals" +} + // @public export enum KnownVectorSearchCompressionTarget { Int8 = "int8" @@ -2127,6 +2215,12 @@ export interface MappingCharFilter extends BaseCharFilter { odatatype: "#Microsoft.Azure.Search.MappingCharFilter"; } +// @public +export type MarkdownHeaderDepth = string; + +// @public +export type MarkdownParsingSubmode = string; + // @public export type MergeDocumentsOptions = IndexDocumentsOptions; @@ -2340,8 +2434,23 @@ export interface QueryResultDocumentSubscores { }[]; } +// @public (undocumented) +export type QueryRewrites = GenerativeQueryRewrites; + // @public -export type QuerySpellerType = string; +export interface QueryRewritesDebugInfo { + readonly text?: QueryRewritesValuesDebugInfo; + readonly vectors?: QueryRewritesValuesDebugInfo[]; +} + +// @public +export interface QueryRewritesValuesDebugInfo { + readonly inputQuery?: string; + readonly rewrites?: string[]; +} + +// @public +export type QuerySpeller = string; // @public export type QueryType = "simple" | "full" | "semantic"; @@ -2349,6 +2458,13 @@ export type QueryType = "simple" | "full" | "semantic"; // @public (undocumented) export type RegexFlags = `${KnownRegexFlags}`; +// @public +export interface RescoringOptions { + defaultOversampling?: number; + enableRescoring?: boolean; + rescoreStorageMethod?: VectorSearchCompressionRescoreStorageMethod; +} + // @public export interface ResetDocumentsOptions extends OperationOptions { datasourceDocumentIds?: string[]; @@ -2457,10 +2573,12 @@ export interface SearchDocumentsResultBase { readonly answers?: QueryAnswerResult[]; readonly count?: number; readonly coverage?: number; + readonly debugInfo?: DebugInfo; readonly facets?: { [propertyName: string]: FacetResult[]; }; readonly semanticErrorReason?: SemanticErrorReason; + readonly semanticQueryRewritesResultType?: SemanticQueryRewritesResultType; readonly semanticSearchResultsType?: SemanticSearchResultsType; } @@ -2722,7 +2840,7 @@ export interface SearchIndexerLimits { } // @public -export type SearchIndexerSkill = AzureMachineLearningSkill | AzureOpenAIEmbeddingSkill | ConditionalSkill | CustomEntityLookupSkill | DocumentExtractionSkill | EntityLinkingSkill | EntityRecognitionSkill | EntityRecognitionSkillV3 | ImageAnalysisSkill | KeyPhraseExtractionSkill | LanguageDetectionSkill | MergeSkill | OcrSkill | PIIDetectionSkill | SentimentSkill | SentimentSkillV3 | ShaperSkill | SplitSkill | TextTranslationSkill | VisionVectorizeSkill | WebApiSkill; +export type SearchIndexerSkill = AzureMachineLearningSkill | AzureOpenAIEmbeddingSkill | ConditionalSkill | CustomEntityLookupSkill | DocumentExtractionSkill | DocumentIntelligenceLayoutSkill | EntityLinkingSkill | EntityRecognitionSkill | EntityRecognitionSkillV3 | ImageAnalysisSkill | KeyPhraseExtractionSkill | LanguageDetectionSkill | MergeSkill | OcrSkill | PIIDetectionSkill | SentimentSkill | SentimentSkillV3 | ShaperSkill | SplitSkill | TextTranslationSkill | VisionVectorizeSkill | WebApiSkill; // @public export interface SearchIndexerSkillset { @@ -2923,6 +3041,9 @@ export interface SemanticPrioritizedFields { titleField?: SemanticField; } +// @public +export type SemanticQueryRewritesResultType = string; + // @public export interface SemanticSearch { configurations?: SemanticConfiguration[]; @@ -2937,6 +3058,7 @@ export interface SemanticSearchOptions { debugMode?: QueryDebugMode; errorMode?: SemanticErrorMode; maxWaitInMilliseconds?: number; + queryRewrites?: QueryRewrites; semanticFields?: string[]; semanticQuery?: string; } @@ -3050,9 +3172,6 @@ export interface SoftDeleteColumnDeletionDetectionPolicy extends BaseDataDeletio softDeleteMarkerValue?: string; } -// @public -export type Speller = string; - // @public export interface SplitSkill extends BaseSearchIndexerSkill { azureOpenAITokenizerParameters?: AzureOpenAITokenizerParameters; @@ -3257,6 +3376,7 @@ export interface VectorizableImageUrlQuery extends BaseVe // @public export interface VectorizableTextQuery extends BaseVectorQuery { kind: "text"; + queryRewrites?: QueryRewrites; text: string; } @@ -3300,6 +3420,9 @@ export type VectorSearchCompression = BinaryQuantizationCompression | ScalarQuan // @public export type VectorSearchCompressionKind = string; +// @public +export type VectorSearchCompressionRescoreStorageMethod = string; + // @public export type VectorSearchCompressionTarget = string; diff --git a/sdk/search/search-documents/src/generated/data/models/index.ts b/sdk/search/search-documents/src/generated/data/models/index.ts index 1b74a2af696e..1adf9842fa1c 100644 --- a/sdk/search/search-documents/src/generated/data/models/index.ts +++ b/sdk/search/search-documents/src/generated/data/models/index.ts @@ -91,6 +91,11 @@ export interface SearchDocumentsResult { * NOTE: This property will not be serialized. It can only be populated by the server. */ readonly answers?: QueryAnswerResult[]; + /** + * Debug information that applies to the search results as a whole. + * NOTE: This property will not be serialized. It can only be populated by the server. + */ + readonly debugInfo?: DebugInfo; /** * Continuation JSON payload returned when the query can't return all the requested results in a single response. You can use this JSON along with @odata.nextLink to formulate another POST Search request to get the next part of the search response. * NOTE: This property will not be serialized. It can only be populated by the server. @@ -116,6 +121,11 @@ export interface SearchDocumentsResult { * NOTE: This property will not be serialized. It can only be populated by the server. */ readonly semanticPartialResponseType?: SemanticSearchResultsType; + /** + * Type of query rewrite that was used to retrieve documents. + * NOTE: This property will not be serialized. It can only be populated by the server. + */ + readonly semanticQueryRewritesResultType?: SemanticQueryRewritesResultType; } /** A single bucket of a facet query result. Reports the number of documents with a field value falling within a particular range or having a particular value or interval. */ @@ -127,6 +137,11 @@ export interface FacetResult { * NOTE: This property will not be serialized. It can only be populated by the server. */ readonly count?: number; + /** + * The nested facet query results for the search operation, organized as a collection of buckets for each faceted field; null if the query did not contain any nested facets. + * NOTE: This property will not be serialized. It can only be populated by the server. + */ + readonly facets?: { [propertyName: string]: FacetResult[] }; } /** An answer is a text passage extracted from the contents of the most relevant documents that matched the query. Answers are extracted from the top search results. Answer candidates are scored and the top answers are selected. */ @@ -155,6 +170,43 @@ export interface QueryAnswerResult { readonly highlights?: string; } +/** Contains debugging information that can be used to further explore your search results. */ +export interface DebugInfo { + /** + * Contains debugging information specific to query rewrites. + * NOTE: This property will not be serialized. It can only be populated by the server. + */ + readonly queryRewrites?: QueryRewritesDebugInfo; +} + +/** Contains debugging information specific to query rewrites. */ +export interface QueryRewritesDebugInfo { + /** + * List of query rewrites generated for the text query. + * NOTE: This property will not be serialized. It can only be populated by the server. + */ + readonly text?: QueryRewritesValuesDebugInfo; + /** + * List of query rewrites generated for the vectorizable text queries. + * NOTE: This property will not be serialized. It can only be populated by the server. + */ + readonly vectors?: QueryRewritesValuesDebugInfo[]; +} + +/** Contains debugging information specific to query rewrites. */ +export interface QueryRewritesValuesDebugInfo { + /** + * The input text to the generative query rewriting model. There may be cases where the user query and the input to the generative model are not identical. + * NOTE: This property will not be serialized. It can only be populated by the server. + */ + readonly inputQuery?: string; + /** + * List of query rewrites. + * NOTE: This property will not be serialized. It can only be populated by the server. + */ + readonly rewrites?: string[]; +} + /** Parameters for filtering, sorting, faceting, paging, and other search query behaviors. */ export interface SearchRequest { /** A value that specifies whether to fetch the total count of results. Default is false. Setting this value to true may have a performance impact. Note that the count returned is an approximation. */ @@ -213,6 +265,8 @@ export interface SearchRequest { answers?: QueryAnswerType; /** A value that specifies whether captions should be returned as part of the search response. */ captions?: QueryCaptionType; + /** A value that specifies whether query rewrites should be generated to augment the search query. */ + queryRewrites?: QueryRewritesType; /** The comma-separated list of field names used for semantic ranking. */ semanticFields?: string; /** The query parameters for vector and hybrid search queries. */ @@ -588,6 +642,8 @@ export interface VectorizableTextQuery extends VectorQuery { kind: "text"; /** The text to be vectorized to perform a vector search query. */ text: string; + /** Can be configured to let a generative model rewrite the query before sending it to be vectorized. */ + queryRewrites?: QueryRewritesType; } /** The query parameters to use for vector search when an url that represents an image value that needs to be vectorized is provided. */ @@ -666,18 +722,20 @@ export interface SearchOptions { semanticErrorHandling?: SemanticErrorMode; /** Allows the user to set an upper bound on the amount of time it takes for semantic enrichment to finish processing before the request fails. */ semanticMaxWaitInMilliseconds?: number; - /** This parameter is only valid if the query type is `semantic`. If set, the query returns answers extracted from key passages in the highest ranked documents. The number of answers returned can be configured by appending the pipe character `|` followed by the `count-` option after the answers parameter value, such as `extractive|count-3`. Default count is 1. The confidence threshold can be configured by appending the pipe character `|` followed by the `threshold-` option after the answers parameter value, such as `extractive|threshold-0.9`. Default threshold is 0.7. */ + /** This parameter is only valid if the query type is `semantic`. If set, the query returns answers extracted from key passages in the highest ranked documents. The number of answers returned can be configured by appending the pipe character `|` followed by the `count-` option after the answers parameter value, such as `extractive|count-3`. Default count is 1. The confidence threshold can be configured by appending the pipe character `|` followed by the `threshold-` option after the answers parameter value, such as `extractive|threshold-0.9`. Default threshold is 0.7. The maximum character length of answers can be configured by appending the pipe character '|' followed by the 'count-', such as 'extractive|maxcharlength-600'. */ answers?: QueryAnswerType; - /** This parameter is only valid if the query type is `semantic`. If set, the query returns captions extracted from key passages in the highest ranked documents. When Captions is set to `extractive`, highlighting is enabled by default, and can be configured by appending the pipe character `|` followed by the `highlight-` option, such as `extractive|highlight-true`. Defaults to `None`. */ + /** This parameter is only valid if the query type is `semantic`. If set, the query returns captions extracted from key passages in the highest ranked documents. When Captions is set to `extractive`, highlighting is enabled by default, and can be configured by appending the pipe character `|` followed by the `highlight-` option, such as `extractive|highlight-true`. Defaults to `None`. The maximum character length of captions can be configured by appending the pipe character '|' followed by the 'count-', such as 'extractive|maxcharlength-600'. */ captions?: QueryCaptionType; /** Allows setting a separate search query that will be solely used for semantic reranking, semantic captions and semantic answers. Is useful for scenarios where there is a need to use different queries between the base retrieval and ranking phase, and the L2 semantic phase. */ semanticQuery?: string; + /** When QueryRewrites is set to `generative`, the query terms are sent to a generate model which will produce 10 (default) rewrites to help increase the recall of the request. The requested count can be configured by appending the pipe character `|` followed by the `count-` option, such as `generative|count-3`. Defaults to `None`. This parameter is only valid if the query type is `semantic`. */ + queryRewrites?: QueryRewritesType; /** Enables a debugging tool that can be used to further explore your search results. */ debug?: QueryDebugMode; /** The language of the query. */ queryLanguage?: QueryLanguage; /** Improve search recall by spell-correcting individual search query terms. */ - speller?: Speller; + speller?: QuerySpellerType; /** The list of field names used for semantic ranking. */ semanticFields?: string[]; } @@ -724,20 +782,20 @@ export interface AutocompleteOptions { top?: number; } -/** Known values of {@link ApiVersion20240901Preview} that the service accepts. */ -export enum KnownApiVersion20240901Preview { - /** Api Version '2024-09-01-preview' */ - TwoThousandTwentyFour0901Preview = "2024-09-01-preview", +/** Known values of {@link ApiVersion20241101Preview} that the service accepts. */ +export enum KnownApiVersion20241101Preview { + /** Api Version '2024-11-01-preview' */ + TwoThousandTwentyFour1101Preview = "2024-11-01-preview", } /** - * Defines values for ApiVersion20240901Preview. \ - * {@link KnownApiVersion20240901Preview} can be used interchangeably with ApiVersion20240901Preview, + * Defines values for ApiVersion20241101Preview. \ + * {@link KnownApiVersion20241101Preview} can be used interchangeably with ApiVersion20241101Preview, * this enum contains the known values that the service supports. * ### Known values supported by the service - * **2024-09-01-preview**: Api Version '2024-09-01-preview' + * **2024-11-01-preview**: Api Version '2024-11-01-preview' */ -export type ApiVersion20240901Preview = string; +export type ApiVersion20241101Preview = string; /** Known values of {@link SemanticErrorMode} that the service accepts. */ export enum KnownSemanticErrorMode { @@ -793,12 +851,36 @@ export enum KnownQueryCaptionType { */ export type QueryCaptionType = string; +/** Known values of {@link QueryRewritesType} that the service accepts. */ +export enum KnownQueryRewritesType { + /** Do not generate additional query rewrites for this query. */ + None = "none", + /** Generate alternative query terms to increase the recall of a search request. */ + Generative = "generative", +} + +/** + * Defines values for QueryRewritesType. \ + * {@link KnownQueryRewritesType} can be used interchangeably with QueryRewritesType, + * this enum contains the known values that the service supports. + * ### Known values supported by the service + * **none**: Do not generate additional query rewrites for this query. \ + * **generative**: Generate alternative query terms to increase the recall of a search request. + */ +export type QueryRewritesType = string; + /** Known values of {@link QueryDebugMode} that the service accepts. */ export enum KnownQueryDebugMode { /** No query debugging information will be returned. */ Disabled = "disabled", /** Allows the user to further explore their reranked results. */ Semantic = "semantic", + /** Allows the user to further explore their hybrid and vector query results. */ + Vector = "vector", + /** Allows the user to explore the list of query rewrites generated for their search request. */ + QueryRewrites = "queryRewrites", + /** Turn on all debug options. */ + All = "all", } /** @@ -807,7 +889,10 @@ export enum KnownQueryDebugMode { * this enum contains the known values that the service supports. * ### Known values supported by the service * **disabled**: No query debugging information will be returned. \ - * **semantic**: Allows the user to further explore their reranked results. + * **semantic**: Allows the user to further explore their reranked results. \ + * **vector**: Allows the user to further explore their hybrid and vector query results. \ + * **queryRewrites**: Allows the user to explore the list of query rewrites generated for their search request. \ + * **all**: Turn on all debug options. */ export type QueryDebugMode = string; @@ -1039,24 +1124,6 @@ export enum KnownQueryLanguage { */ export type QueryLanguage = string; -/** Known values of {@link Speller} that the service accepts. */ -export enum KnownSpeller { - /** Speller not enabled. */ - None = "none", - /** Speller corrects individual query terms using a static lexicon for the language specified by the queryLanguage parameter. */ - Lexicon = "lexicon", -} - -/** - * Defines values for Speller. \ - * {@link KnownSpeller} can be used interchangeably with Speller, - * this enum contains the known values that the service supports. - * ### Known values supported by the service - * **none**: Speller not enabled. \ - * **lexicon**: Speller corrects individual query terms using a static lexicon for the language specified by the queryLanguage parameter. - */ -export type Speller = string; - /** Known values of {@link QuerySpellerType} that the service accepts. */ export enum KnownQuerySpellerType { /** Speller not enabled. */ @@ -1212,6 +1279,21 @@ export enum KnownSemanticSearchResultsType { * **rerankedResults**: Results have been reranked with the reranker model and will include semantic captions. They will not include any answers, answers highlights or caption highlights. */ export type SemanticSearchResultsType = string; + +/** Known values of {@link SemanticQueryRewritesResultType} that the service accepts. */ +export enum KnownSemanticQueryRewritesResultType { + /** Query rewrites were not successfully generated for this request. Only the original query was used to retrieve the results. */ + OriginalQueryOnly = "originalQueryOnly", +} + +/** + * Defines values for SemanticQueryRewritesResultType. \ + * {@link KnownSemanticQueryRewritesResultType} can be used interchangeably with SemanticQueryRewritesResultType, + * this enum contains the known values that the service supports. + * ### Known values supported by the service + * **originalQueryOnly**: Query rewrites were not successfully generated for this request. Only the original query was used to retrieve the results. + */ +export type SemanticQueryRewritesResultType = string; /** Defines values for QueryType. */ export type QueryType = "simple" | "full" | "semantic"; /** Defines values for SearchMode. */ diff --git a/sdk/search/search-documents/src/generated/data/models/mappers.ts b/sdk/search/search-documents/src/generated/data/models/mappers.ts index 47025081e765..1f4652b18cde 100644 --- a/sdk/search/search-documents/src/generated/data/models/mappers.ts +++ b/sdk/search/search-documents/src/generated/data/models/mappers.ts @@ -152,6 +152,13 @@ export const SearchDocumentsResult: coreClient.CompositeMapper = { }, }, }, + debugInfo: { + serializedName: "@search\\.debugInfo", + type: { + name: "Composite", + className: "DebugInfo", + }, + }, nextPageParameters: { serializedName: "@search\\.nextPageParameters", type: { @@ -194,6 +201,13 @@ export const SearchDocumentsResult: coreClient.CompositeMapper = { name: "String", }, }, + semanticQueryRewritesResultType: { + serializedName: "@search\\.semanticQueryRewritesResultType", + readOnly: true, + type: { + name: "String", + }, + }, }, }, }; @@ -211,6 +225,21 @@ export const FacetResult: coreClient.CompositeMapper = { name: "Number", }, }, + facets: { + serializedName: "@search\\.facets", + readOnly: true, + type: { + name: "Dictionary", + value: { + type: { + name: "Sequence", + element: { + type: { name: "Composite", className: "FacetResult" }, + }, + }, + }, + }, + }, }, }, }; @@ -257,6 +286,79 @@ export const QueryAnswerResult: coreClient.CompositeMapper = { }, }; +export const DebugInfo: coreClient.CompositeMapper = { + type: { + name: "Composite", + className: "DebugInfo", + modelProperties: { + queryRewrites: { + serializedName: "queryRewrites", + type: { + name: "Composite", + className: "QueryRewritesDebugInfo", + }, + }, + }, + }, +}; + +export const QueryRewritesDebugInfo: coreClient.CompositeMapper = { + type: { + name: "Composite", + className: "QueryRewritesDebugInfo", + modelProperties: { + text: { + serializedName: "text", + type: { + name: "Composite", + className: "QueryRewritesValuesDebugInfo", + }, + }, + vectors: { + serializedName: "vectors", + readOnly: true, + type: { + name: "Sequence", + element: { + type: { + name: "Composite", + className: "QueryRewritesValuesDebugInfo", + }, + }, + }, + }, + }, + }, +}; + +export const QueryRewritesValuesDebugInfo: coreClient.CompositeMapper = { + type: { + name: "Composite", + className: "QueryRewritesValuesDebugInfo", + modelProperties: { + inputQuery: { + serializedName: "inputQuery", + readOnly: true, + type: { + name: "String", + }, + }, + rewrites: { + serializedName: "rewrites", + readOnly: true, + type: { + name: "Sequence", + element: { + type: { + name: "String", + }, + }, + }, + }, + }, + }, +}; + export const SearchRequest: coreClient.CompositeMapper = { type: { name: "Composite", @@ -447,6 +549,12 @@ export const SearchRequest: coreClient.CompositeMapper = { name: "String", }, }, + queryRewrites: { + serializedName: "queryRewrites", + type: { + name: "String", + }, + }, semanticFields: { serializedName: "semanticFields", type: { @@ -1287,6 +1395,12 @@ export const VectorizableTextQuery: coreClient.CompositeMapper = { name: "String", }, }, + queryRewrites: { + serializedName: "queryRewrites", + type: { + name: "String", + }, + }, }, }, }; diff --git a/sdk/search/search-documents/src/generated/data/models/parameters.ts b/sdk/search/search-documents/src/generated/data/models/parameters.ts index 8794f444c940..581b15e62cc2 100644 --- a/sdk/search/search-documents/src/generated/data/models/parameters.ts +++ b/sdk/search/search-documents/src/generated/data/models/parameters.ts @@ -356,6 +356,16 @@ export const semanticQuery: OperationQueryParameter = { }, }; +export const queryRewrites: OperationQueryParameter = { + parameterPath: ["options", "searchOptions", "queryRewrites"], + mapper: { + serializedName: "queryRewrites", + type: { + name: "String", + }, + }, +}; + export const debug: OperationQueryParameter = { parameterPath: ["options", "searchOptions", "debug"], mapper: { diff --git a/sdk/search/search-documents/src/generated/data/operations/documents.ts b/sdk/search/search-documents/src/generated/data/operations/documents.ts index 7413fdeafe96..66fe9c71ca38 100644 --- a/sdk/search/search-documents/src/generated/data/operations/documents.ts +++ b/sdk/search/search-documents/src/generated/data/operations/documents.ts @@ -237,6 +237,7 @@ const searchGetOperationSpec: coreClient.OperationSpec = { Parameters.answers, Parameters.captions, Parameters.semanticQuery, + Parameters.queryRewrites, Parameters.debug, Parameters.queryLanguage, Parameters.speller, diff --git a/sdk/search/search-documents/src/generated/data/searchClient.ts b/sdk/search/search-documents/src/generated/data/searchClient.ts index f1f77355a94b..976bcd1fd101 100644 --- a/sdk/search/search-documents/src/generated/data/searchClient.ts +++ b/sdk/search/search-documents/src/generated/data/searchClient.ts @@ -15,7 +15,7 @@ import { import { DocumentsImpl } from "./operations"; import { Documents } from "./operationsInterfaces"; import { - ApiVersion20240901Preview, + ApiVersion20241101Preview, SearchClientOptionalParams, } from "./models"; @@ -23,7 +23,7 @@ import { export class SearchClient extends coreHttpCompat.ExtendedServiceClient { endpoint: string; indexName: string; - apiVersion: ApiVersion20240901Preview; + apiVersion: ApiVersion20241101Preview; /** * Initializes a new instance of the SearchClient class. @@ -35,7 +35,7 @@ export class SearchClient extends coreHttpCompat.ExtendedServiceClient { constructor( endpoint: string, indexName: string, - apiVersion: ApiVersion20240901Preview, + apiVersion: ApiVersion20241101Preview, options?: SearchClientOptionalParams, ) { if (endpoint === undefined) { diff --git a/sdk/search/search-documents/src/generated/service/models/index.ts b/sdk/search/search-documents/src/generated/service/models/index.ts index 242fe4975b01..777dcc683079 100644 --- a/sdk/search/search-documents/src/generated/service/models/index.ts +++ b/sdk/search/search-documents/src/generated/service/models/index.ts @@ -40,6 +40,7 @@ export type SearchIndexerSkillUnion = | CustomEntityLookupSkill | TextTranslationSkill | DocumentExtractionSkill + | DocumentIntelligenceLayoutSkill | WebApiSkill | AzureMachineLearningSkill | AzureOpenAIEmbeddingSkill @@ -47,7 +48,9 @@ export type SearchIndexerSkillUnion = export type CognitiveServicesAccountUnion = | CognitiveServicesAccount | DefaultCognitiveServicesAccount - | CognitiveServicesAccountKey; + | CognitiveServicesAccountKey + | AIServicesAccountKey + | AIServicesAccountIdentity; export type ScoringFunctionUnion = | ScoringFunction | DistanceScoringFunction @@ -343,6 +346,10 @@ export interface IndexingParametersConfiguration { delimitedTextDelimiter?: string; /** For CSV blobs, indicates that the first (non-blank) line of each blob contains headers. */ firstLineContainsHeaders?: boolean; + /** Specifies the submode that will determine whether a markdown file will be parsed into exactly one search document or multiple search documents. Default is `oneToMany`. */ + markdownParsingSubmode?: MarkdownParsingSubmode; + /** Specifies the max header depth that will be considered while grouping markdown content. Default is `h6`. */ + markdownHeaderDepth?: MarkdownHeaderDepth; /** For JSON arrays, given a structured or semi-structured document, you can specify a path to the array using this property. */ documentRoot?: string; /** Specifies the data to extract from Azure blob storage and tells the indexer which data to extract from image content when "imageAction" is set to a value other than "none". This applies to embedded image content in a .PDF or other application, or image files such as .jpg and .png, in Azure blobs. */ @@ -644,6 +651,7 @@ export interface SearchIndexerSkill { | "#Microsoft.Skills.Text.CustomEntityLookupSkill" | "#Microsoft.Skills.Text.TranslationSkill" | "#Microsoft.Skills.Util.DocumentExtractionSkill" + | "#Microsoft.Skills.Util.DocumentIntelligenceLayoutSkill" | "#Microsoft.Skills.Custom.WebApiSkill" | "#Microsoft.Skills.Custom.AmlSkill" | "#Microsoft.Skills.Text.AzureOpenAIEmbeddingSkill" @@ -685,7 +693,9 @@ export interface CognitiveServicesAccount { /** Polymorphic discriminator, which specifies the different types this object can be */ odatatype: | "#Microsoft.Azure.Search.DefaultCognitiveServices" - | "#Microsoft.Azure.Search.CognitiveServicesByKey"; + | "#Microsoft.Azure.Search.CognitiveServicesByKey" + | "#Microsoft.Azure.Search.AIServicesByKey" + | "#Microsoft.Azure.Search.AIServicesByIdentity"; /** Description of the Azure AI service resource attached to a skillset. */ description?: string; } @@ -1095,10 +1105,22 @@ export interface VectorSearchCompression { rerankWithOriginalVectors?: boolean; /** Default oversampling factor. Oversampling will internally request more documents (specified by this multiplier) in the initial search. This increases the set of results that will be reranked using recomputed similarity scores from full-precision vectors. Minimum value is 1, meaning no oversampling (1x). This parameter can only be set when rerankWithOriginalVectors is true. Higher values improve recall at the expense of latency. */ defaultOversampling?: number; + /** Contains the options for rescoring. */ + rescoringOptions?: RescoringOptions; /** The number of dimensions to truncate the vectors to. Truncating the vectors reduces the size of the vectors and the amount of data that needs to be transferred during search. This can save storage cost and improve search performance at the expense of recall. It should be only used for embeddings trained with Matryoshka Representation Learning (MRL) such as OpenAI text-embedding-3-large (small). The default value is null, which means no truncation. */ truncationDimension?: number; } +/** Contains the options for rescoring. */ +export interface RescoringOptions { + /** If set to true, after the initial search on the compressed vectors, the similarity scores are recalculated using the full-precision vectors. This will improve recall at the expense of latency. */ + enableRescoring?: boolean; + /** Default oversampling factor. Oversampling retrieves a greater set of potential documents to offset the resolution loss due to quantization. This increases the set of results that will be rescored on full-precision vectors. Minimum value is 1, meaning no oversampling (1x). This parameter can only be set when 'enableRescoring' is true. Higher values improve recall at the expense of latency. */ + defaultOversampling?: number; + /** Controls the storage method for original vectors. This setting is immutable. */ + rescoreStorageMethod?: VectorSearchCompressionRescoreStorageMethod; +} + /** Response from a List Indexes request. If successful, it includes the full definitions of all indexes. */ export interface ListIndexesResult { /** @@ -1671,6 +1693,16 @@ export interface DocumentExtractionSkill extends SearchIndexerSkill { configuration?: { [propertyName: string]: any }; } +/** A skill that extracts content and layout information (as markdown), via Azure AI Services, from files within the enrichment pipeline. */ +export interface DocumentIntelligenceLayoutSkill extends SearchIndexerSkill { + /** Polymorphic discriminator, which specifies the different types this object can be */ + odatatype: "#Microsoft.Skills.Util.DocumentIntelligenceLayoutSkill"; + /** Controls the cardinality of the output produced by the skill. Default is 'oneToMany'. */ + outputMode?: DocumentIntelligenceLayoutSkillOutputMode; + /** The depth of headers in the markdown output. Default is h6. */ + markdownHeaderDepth?: DocumentIntelligenceLayoutSkillMarkdownHeaderDepth; +} + /** A skill that can call a Web API endpoint, allowing you to extend a skillset by having it call your custom code. */ export interface WebApiSkill extends SearchIndexerSkill { /** Polymorphic discriminator, which specifies the different types this object can be */ @@ -1744,6 +1776,26 @@ export interface CognitiveServicesAccountKey extends CognitiveServicesAccount { key: string; } +/** The account key of an Azure AI service resource that's attached to a skillset, to be used with the resource's subdomain. */ +export interface AIServicesAccountKey extends CognitiveServicesAccount { + /** Polymorphic discriminator, which specifies the different types this object can be */ + odatatype: "#Microsoft.Azure.Search.AIServicesByKey"; + /** The key used to provision the Azure AI service resource attached to a skillset. */ + key: string; + /** The subdomain url for the corresponding AI Service. */ + subdomainUrl: string; +} + +/** The multi-region account of an Azure AI service resource that's attached to a skillset. */ +export interface AIServicesAccountIdentity extends CognitiveServicesAccount { + /** Polymorphic discriminator, which specifies the different types this object can be */ + odatatype: "#Microsoft.Azure.Search.AIServicesByIdentity"; + /** The user-assigned managed identity used for connections to AI Service. If not specified, the system-assigned managed identity is used. On updates to the skillset, if the identity is unspecified, the value remains unchanged. If set to "none", the value of this property is cleared. */ + identity: SearchIndexerDataIdentityUnion | null; + /** The subdomain url for the corresponding AI Service. */ + subdomainUrl: string; +} + /** Description for what data to store in Azure Tables. */ export interface SearchIndexerKnowledgeStoreTableProjectionSelector extends SearchIndexerKnowledgeStoreProjectionSelector { @@ -2352,20 +2404,20 @@ export interface SearchIndexerKnowledgeStoreObjectProjectionSelector export interface SearchIndexerKnowledgeStoreFileProjectionSelector extends SearchIndexerKnowledgeStoreBlobProjectionSelector {} -/** Known values of {@link ApiVersion20240901Preview} that the service accepts. */ -export enum KnownApiVersion20240901Preview { - /** Api Version '2024-09-01-preview' */ - TwoThousandTwentyFour0901Preview = "2024-09-01-preview", +/** Known values of {@link ApiVersion20241101Preview} that the service accepts. */ +export enum KnownApiVersion20241101Preview { + /** Api Version '2024-11-01-preview' */ + TwoThousandTwentyFour1101Preview = "2024-11-01-preview", } /** - * Defines values for ApiVersion20240901Preview. \ - * {@link KnownApiVersion20240901Preview} can be used interchangeably with ApiVersion20240901Preview, + * Defines values for ApiVersion20241101Preview. \ + * {@link KnownApiVersion20241101Preview} can be used interchangeably with ApiVersion20241101Preview, * this enum contains the known values that the service supports. * ### Known values supported by the service - * **2024-09-01-preview**: Api Version '2024-09-01-preview' + * **2024-11-01-preview**: Api Version '2024-11-01-preview' */ -export type ApiVersion20240901Preview = string; +export type ApiVersion20241101Preview = string; /** Known values of {@link SearchIndexerDataSourceType} that the service accepts. */ export enum KnownSearchIndexerDataSourceType { @@ -2414,6 +2466,8 @@ export enum KnownBlobIndexerParsingMode { JsonArray = "jsonArray", /** Set to jsonLines to extract individual JSON entities, separated by a new line, as separate documents. */ JsonLines = "jsonLines", + /** Set to markdown to extract content from markdown files. */ + Markdown = "markdown", } /** @@ -2426,10 +2480,59 @@ export enum KnownBlobIndexerParsingMode { * **delimitedText**: Set to delimitedText when blobs are plain CSV files. \ * **json**: Set to json to extract structured content from JSON files. \ * **jsonArray**: Set to jsonArray to extract individual elements of a JSON array as separate documents. \ - * **jsonLines**: Set to jsonLines to extract individual JSON entities, separated by a new line, as separate documents. + * **jsonLines**: Set to jsonLines to extract individual JSON entities, separated by a new line, as separate documents. \ + * **markdown**: Set to markdown to extract content from markdown files. */ export type BlobIndexerParsingMode = string; +/** Known values of {@link MarkdownParsingSubmode} that the service accepts. */ +export enum KnownMarkdownParsingSubmode { + /** Indicates that each section of the markdown file (up to a specified depth) will be parsed into individual search documents. This can result in a single markdown file producing multiple search documents. This is the default sub-mode. */ + OneToMany = "oneToMany", + /** Indicates that each markdown file will be parsed into a single search document. */ + OneToOne = "oneToOne", +} + +/** + * Defines values for MarkdownParsingSubmode. \ + * {@link KnownMarkdownParsingSubmode} can be used interchangeably with MarkdownParsingSubmode, + * this enum contains the known values that the service supports. + * ### Known values supported by the service + * **oneToMany**: Indicates that each section of the markdown file (up to a specified depth) will be parsed into individual search documents. This can result in a single markdown file producing multiple search documents. This is the default sub-mode. \ + * **oneToOne**: Indicates that each markdown file will be parsed into a single search document. + */ +export type MarkdownParsingSubmode = string; + +/** Known values of {@link MarkdownHeaderDepth} that the service accepts. */ +export enum KnownMarkdownHeaderDepth { + /** Indicates that headers up to a level of h1 will be considered while grouping markdown content. */ + H1 = "h1", + /** Indicates that headers up to a level of h2 will be considered while grouping markdown content. */ + H2 = "h2", + /** Indicates that headers up to a level of h3 will be considered while grouping markdown content. */ + H3 = "h3", + /** Indicates that headers up to a level of h4 will be considered while grouping markdown content. */ + H4 = "h4", + /** Indicates that headers up to a level of h5 will be considered while grouping markdown content. */ + H5 = "h5", + /** Indicates that headers up to a level of h6 will be considered while grouping markdown content. This is the default. */ + H6 = "h6", +} + +/** + * Defines values for MarkdownHeaderDepth. \ + * {@link KnownMarkdownHeaderDepth} can be used interchangeably with MarkdownHeaderDepth, + * this enum contains the known values that the service supports. + * ### Known values supported by the service + * **h1**: Indicates that headers up to a level of h1 will be considered while grouping markdown content. \ + * **h2**: Indicates that headers up to a level of h2 will be considered while grouping markdown content. \ + * **h3**: Indicates that headers up to a level of h3 will be considered while grouping markdown content. \ + * **h4**: Indicates that headers up to a level of h4 will be considered while grouping markdown content. \ + * **h5**: Indicates that headers up to a level of h5 will be considered while grouping markdown content. \ + * **h6**: Indicates that headers up to a level of h6 will be considered while grouping markdown content. This is the default. + */ +export type MarkdownHeaderDepth = string; + /** Known values of {@link BlobIndexerDataToExtract} that the service accepts. */ export enum KnownBlobIndexerDataToExtract { /** Indexes just the standard blob properties and user-specified metadata. */ @@ -3003,6 +3106,24 @@ export enum KnownVectorSearchCompressionKind { */ export type VectorSearchCompressionKind = string; +/** Known values of {@link VectorSearchCompressionRescoreStorageMethod} that the service accepts. */ +export enum KnownVectorSearchCompressionRescoreStorageMethod { + /** This option preserves the original full-precision vectors. Choose this option for maximum flexibility and highest quality of compressed search results. This consumes more storage but allows for rescoring and oversampling. */ + PreserveOriginals = "preserveOriginals", + /** This option discards the original full-precision vectors. Choose this option for maximum storage savings. Since this option does not allow for rescoring and oversampling, it will often cause slight to moderate reductions in quality. */ + DiscardOriginals = "discardOriginals", +} + +/** + * Defines values for VectorSearchCompressionRescoreStorageMethod. \ + * {@link KnownVectorSearchCompressionRescoreStorageMethod} can be used interchangeably with VectorSearchCompressionRescoreStorageMethod, + * this enum contains the known values that the service supports. + * ### Known values supported by the service + * **preserveOriginals**: This option preserves the original full-precision vectors. Choose this option for maximum flexibility and highest quality of compressed search results. This consumes more storage but allows for rescoring and oversampling. \ + * **discardOriginals**: This option discards the original full-precision vectors. Choose this option for maximum storage savings. Since this option does not allow for rescoring and oversampling, it will often cause slight to moderate reductions in quality. + */ +export type VectorSearchCompressionRescoreStorageMethod = string; + /** Known values of {@link TokenFilterName} that the service accepts. */ export enum KnownTokenFilterName { /** A token filter that applies the Arabic normalizer to normalize the orthography. See http:\//lucene.apache.org\/core\/4_10_3\/analyzers-common\/org\/apache\/lucene\/analysis\/ar\/ArabicNormalizationFilter.html */ @@ -4674,6 +4795,51 @@ export enum KnownTextTranslationSkillLanguage { */ export type TextTranslationSkillLanguage = string; +/** Known values of {@link DocumentIntelligenceLayoutSkillOutputMode} that the service accepts. */ +export enum KnownDocumentIntelligenceLayoutSkillOutputMode { + /** Specify the deepest markdown header section to parse. */ + OneToMany = "oneToMany", +} + +/** + * Defines values for DocumentIntelligenceLayoutSkillOutputMode. \ + * {@link KnownDocumentIntelligenceLayoutSkillOutputMode} can be used interchangeably with DocumentIntelligenceLayoutSkillOutputMode, + * this enum contains the known values that the service supports. + * ### Known values supported by the service + * **oneToMany**: Specify the deepest markdown header section to parse. + */ +export type DocumentIntelligenceLayoutSkillOutputMode = string; + +/** Known values of {@link DocumentIntelligenceLayoutSkillMarkdownHeaderDepth} that the service accepts. */ +export enum KnownDocumentIntelligenceLayoutSkillMarkdownHeaderDepth { + /** Header level 1. */ + H1 = "h1", + /** Header level 2. */ + H2 = "h2", + /** Header level 3. */ + H3 = "h3", + /** Header level 4. */ + H4 = "h4", + /** Header level 5. */ + H5 = "h5", + /** Header level 6. */ + H6 = "h6", +} + +/** + * Defines values for DocumentIntelligenceLayoutSkillMarkdownHeaderDepth. \ + * {@link KnownDocumentIntelligenceLayoutSkillMarkdownHeaderDepth} can be used interchangeably with DocumentIntelligenceLayoutSkillMarkdownHeaderDepth, + * this enum contains the known values that the service supports. + * ### Known values supported by the service + * **h1**: Header level 1. \ + * **h2**: Header level 2. \ + * **h3**: Header level 3. \ + * **h4**: Header level 4. \ + * **h5**: Header level 5. \ + * **h6**: Header level 6. + */ +export type DocumentIntelligenceLayoutSkillMarkdownHeaderDepth = string; + /** Known values of {@link LexicalTokenizerName} that the service accepts. */ export enum KnownLexicalTokenizerName { /** Grammar-based tokenizer that is suitable for processing most European-language documents. See http:\//lucene.apache.org\/core\/4_10_3\/analyzers-common\/org\/apache\/lucene\/analysis\/standard\/ClassicTokenizer.html */ diff --git a/sdk/search/search-documents/src/generated/service/models/mappers.ts b/sdk/search/search-documents/src/generated/service/models/mappers.ts index d14a94a3189c..921dceeb787a 100644 --- a/sdk/search/search-documents/src/generated/service/models/mappers.ts +++ b/sdk/search/search-documents/src/generated/service/models/mappers.ts @@ -638,6 +638,22 @@ export const IndexingParametersConfiguration: coreClient.CompositeMapper = { name: "Boolean", }, }, + markdownParsingSubmode: { + defaultValue: "oneToMany", + serializedName: "markdownParsingSubmode", + nullable: true, + type: { + name: "String", + }, + }, + markdownHeaderDepth: { + defaultValue: "h6", + serializedName: "markdownHeaderDepth", + nullable: true, + type: { + name: "String", + }, + }, documentRoot: { serializedName: "documentRoot", type: { @@ -2651,6 +2667,13 @@ export const VectorSearchCompression: coreClient.CompositeMapper = { name: "Number", }, }, + rescoringOptions: { + serializedName: "rescoringOptions", + type: { + name: "Composite", + className: "RescoringOptions", + }, + }, truncationDimension: { serializedName: "truncationDimension", nullable: true, @@ -2662,6 +2685,37 @@ export const VectorSearchCompression: coreClient.CompositeMapper = { }, }; +export const RescoringOptions: coreClient.CompositeMapper = { + type: { + name: "Composite", + className: "RescoringOptions", + modelProperties: { + enableRescoring: { + defaultValue: true, + serializedName: "enableRescoring", + nullable: true, + type: { + name: "Boolean", + }, + }, + defaultOversampling: { + serializedName: "defaultOversampling", + nullable: true, + type: { + name: "Number", + }, + }, + rescoreStorageMethod: { + serializedName: "rescoreStorageMethod", + nullable: true, + type: { + name: "String", + }, + }, + }, + }, +}; + export const ListIndexesResult: coreClient.CompositeMapper = { type: { name: "Composite", @@ -4309,6 +4363,35 @@ export const DocumentExtractionSkill: coreClient.CompositeMapper = { }, }; +export const DocumentIntelligenceLayoutSkill: coreClient.CompositeMapper = { + serializedName: "#Microsoft.Skills.Util.DocumentIntelligenceLayoutSkill", + type: { + name: "Composite", + className: "DocumentIntelligenceLayoutSkill", + uberParent: "SearchIndexerSkill", + polymorphicDiscriminator: SearchIndexerSkill.type.polymorphicDiscriminator, + modelProperties: { + ...SearchIndexerSkill.type.modelProperties, + outputMode: { + defaultValue: "oneToMany", + serializedName: "outputMode", + nullable: true, + type: { + name: "String", + }, + }, + markdownHeaderDepth: { + defaultValue: "h6", + serializedName: "markdownHeaderDepth", + nullable: true, + type: { + name: "String", + }, + }, + }, + }, +}; + export const WebApiSkill: coreClient.CompositeMapper = { serializedName: "#Microsoft.Skills.Custom.WebApiSkill", type: { @@ -4508,6 +4591,62 @@ export const CognitiveServicesAccountKey: coreClient.CompositeMapper = { }, }; +export const AIServicesAccountKey: coreClient.CompositeMapper = { + serializedName: "#Microsoft.Azure.Search.AIServicesByKey", + type: { + name: "Composite", + className: "AIServicesAccountKey", + uberParent: "CognitiveServicesAccount", + polymorphicDiscriminator: + CognitiveServicesAccount.type.polymorphicDiscriminator, + modelProperties: { + ...CognitiveServicesAccount.type.modelProperties, + key: { + serializedName: "key", + required: true, + type: { + name: "String", + }, + }, + subdomainUrl: { + serializedName: "subdomainUrl", + required: true, + type: { + name: "String", + }, + }, + }, + }, +}; + +export const AIServicesAccountIdentity: coreClient.CompositeMapper = { + serializedName: "#Microsoft.Azure.Search.AIServicesByIdentity", + type: { + name: "Composite", + className: "AIServicesAccountIdentity", + uberParent: "CognitiveServicesAccount", + polymorphicDiscriminator: + CognitiveServicesAccount.type.polymorphicDiscriminator, + modelProperties: { + ...CognitiveServicesAccount.type.modelProperties, + identity: { + serializedName: "identity", + type: { + name: "Composite", + className: "SearchIndexerDataIdentity", + }, + }, + subdomainUrl: { + serializedName: "subdomainUrl", + required: true, + type: { + name: "String", + }, + }, + }, + }, +}; + export const SearchIndexerKnowledgeStoreTableProjectionSelector: coreClient.CompositeMapper = { type: { @@ -6603,6 +6742,8 @@ export let discriminators = { TextTranslationSkill, "SearchIndexerSkill.#Microsoft.Skills.Util.DocumentExtractionSkill": DocumentExtractionSkill, + "SearchIndexerSkill.#Microsoft.Skills.Util.DocumentIntelligenceLayoutSkill": + DocumentIntelligenceLayoutSkill, "SearchIndexerSkill.#Microsoft.Skills.Custom.WebApiSkill": WebApiSkill, "SearchIndexerSkill.#Microsoft.Skills.Custom.AmlSkill": AzureMachineLearningSkill, @@ -6614,6 +6755,10 @@ export let discriminators = { DefaultCognitiveServicesAccount, "CognitiveServicesAccount.#Microsoft.Azure.Search.CognitiveServicesByKey": CognitiveServicesAccountKey, + "CognitiveServicesAccount.#Microsoft.Azure.Search.AIServicesByKey": + AIServicesAccountKey, + "CognitiveServicesAccount.#Microsoft.Azure.Search.AIServicesByIdentity": + AIServicesAccountIdentity, "ScoringFunction.distance": DistanceScoringFunction, "ScoringFunction.freshness": FreshnessScoringFunction, "ScoringFunction.magnitude": MagnitudeScoringFunction, diff --git a/sdk/search/search-documents/src/generated/service/searchServiceClient.ts b/sdk/search/search-documents/src/generated/service/searchServiceClient.ts index c48344bf4fb2..56bd74af1cb6 100644 --- a/sdk/search/search-documents/src/generated/service/searchServiceClient.ts +++ b/sdk/search/search-documents/src/generated/service/searchServiceClient.ts @@ -32,7 +32,7 @@ import { import * as Parameters from "./models/parameters"; import * as Mappers from "./models/mappers"; import { - ApiVersion20240901Preview, + ApiVersion20241101Preview, SearchServiceClientOptionalParams, GetServiceStatisticsOptionalParams, GetServiceStatisticsResponse, @@ -41,7 +41,7 @@ import { /** @internal */ export class SearchServiceClient extends coreHttpCompat.ExtendedServiceClient { endpoint: string; - apiVersion: ApiVersion20240901Preview; + apiVersion: ApiVersion20241101Preview; /** * Initializes a new instance of the SearchServiceClient class. @@ -51,7 +51,7 @@ export class SearchServiceClient extends coreHttpCompat.ExtendedServiceClient { */ constructor( endpoint: string, - apiVersion: ApiVersion20240901Preview, + apiVersion: ApiVersion20241101Preview, options?: SearchServiceClientOptionalParams, ) { if (endpoint === undefined) { diff --git a/sdk/search/search-documents/src/index.ts b/sdk/search/search-documents/src/index.ts index 0f1fef736038..a7420486010c 100644 --- a/sdk/search/search-documents/src/index.ts +++ b/sdk/search/search-documents/src/index.ts @@ -6,6 +6,7 @@ export { AutocompleteItem, AutocompleteMode, AutocompleteResult, + DebugInfo, FacetResult, HybridCountAndFacetMode, HybridSearch as HybridSearchOptions, @@ -15,12 +16,12 @@ export { KnownHybridCountAndFacetMode, KnownQueryDebugMode, KnownQueryLanguage, - KnownQuerySpellerType, + KnownQuerySpellerType as KnownQuerySpeller, KnownSemanticErrorMode, KnownSemanticErrorReason, KnownSemanticFieldState, + KnownSemanticQueryRewritesResultType, KnownSemanticSearchResultsType, - KnownSpeller, KnownVectorFilterMode, KnownVectorQueryKind, KnownVectorThresholdKind, @@ -30,17 +31,20 @@ export { QueryLanguage, QueryResultDocumentRerankerInput, QueryResultDocumentSubscores, - QuerySpellerType, + QueryRewritesDebugInfo, + QueryRewritesValuesDebugInfo, + QuerySpellerType as QuerySpeller, QueryType, ScoringStatistics, SearchMode, SemanticFieldState, + SemanticQueryRewritesResultType, SingleVectorFieldResult, - Speller, TextResult, VectorsDebugInfo, } from "./generated/data/models"; export { + AIServicesAccountKey, AIStudioModelCatalogName, AnalyzedTokenInfo, AnalyzeResult, @@ -72,6 +76,8 @@ export { DistanceScoringFunction, DistanceScoringParameters, DocumentExtractionSkill, + DocumentIntelligenceLayoutSkillMarkdownHeaderDepth, + DocumentIntelligenceLayoutSkillOutputMode, EdgeNGramTokenFilterSide, EdgeNGramTokenizer, ElisionTokenFilter, @@ -100,6 +106,8 @@ export { KnownBlobIndexerParsingMode, KnownBlobIndexerPDFTextRotationAlgorithm, KnownCustomEntityLookupSkillLanguage, + KnownDocumentIntelligenceLayoutSkillMarkdownHeaderDepth, + KnownDocumentIntelligenceLayoutSkillOutputMode, KnownEntityCategory, KnownEntityRecognitionSkillLanguage, KnownImageAnalysisSkillLanguage, @@ -112,6 +120,8 @@ export { KnownLexicalAnalyzerName, KnownLexicalNormalizerName, KnownLexicalNormalizerName as KnownNormalizerNames, + KnownMarkdownHeaderDepth, + KnownMarkdownParsingSubmode, KnownOcrLineEnding, KnownOcrSkillLanguage, KnownPIIDetectionSkillMaskingMode, @@ -128,6 +138,7 @@ export { KnownVectorSearchAlgorithmKind, KnownVectorSearchAlgorithmMetric, KnownVectorSearchCompressionKind, + KnownVectorSearchCompressionRescoreStorageMethod, KnownVectorSearchCompressionTarget, KnownVectorSearchVectorizerKind, KnownVisualFeature, @@ -144,6 +155,8 @@ export { MagnitudeScoringFunction, MagnitudeScoringParameters, MappingCharFilter, + MarkdownHeaderDepth, + MarkdownParsingSubmode, MergeSkill, MicrosoftLanguageStemmingTokenizer, MicrosoftLanguageTokenizer, @@ -159,6 +172,7 @@ export { PatternReplaceTokenFilter, PhoneticEncoder, PhoneticTokenFilter, + RescoringOptions, ResourceCounter, ScalarQuantizationCompression, ScalarQuantizationParameters, @@ -218,6 +232,7 @@ export { VectorEncodingFormat, VectorSearchCompression as BaseVectorSearchCompression, VectorSearchCompressionKind, + VectorSearchCompressionRescoreStorageMethod, VectorSearchCompressionTarget, VectorSearchProfile, VectorSearchVectorizerKind, @@ -238,6 +253,7 @@ export { ExtractDocumentKey, ExtractiveQueryAnswer, ExtractiveQueryCaption, + GenerativeQueryRewrites, GetDocumentOptions, IndexDocumentsAction, IndexDocumentsOptions, @@ -248,6 +264,7 @@ export { QueryAnswer, QueryCaption, QueryResultDocumentSemanticField, + QueryRewrites, SearchDocumentsPageResult, SearchDocumentsResult, SearchDocumentsResultBase, @@ -303,6 +320,7 @@ export { SearchIndexingBufferedSender, } from "./searchIndexingBufferedSender"; export { + AIServicesAccountIdentity, AIServicesVisionParameters, AIServicesVisionVectorizer, AliasIterator, @@ -347,6 +365,7 @@ export { DeleteIndexOptions, DeleteSkillsetOptions, DeleteSynonymMapOptions, + DocumentIntelligenceLayoutSkill, EdgeNGramTokenFilter, EntityCategory, EntityRecognitionSkill, diff --git a/sdk/search/search-documents/src/indexModels.ts b/sdk/search/search-documents/src/indexModels.ts index 36fb4d337d2d..b2d51f5ef8fa 100644 --- a/sdk/search/search-documents/src/indexModels.ts +++ b/sdk/search/search-documents/src/indexModels.ts @@ -5,6 +5,7 @@ import type { OperationOptions } from "@azure/core-client"; import type { PagedAsyncIterableIterator } from "@azure/core-paging"; import type { AutocompleteMode, + DebugInfo, FacetResult, HybridSearch, IndexActionType, @@ -18,11 +19,12 @@ import type { QueryDebugMode, QueryLanguage, QueryResultDocumentRerankerInput, + QuerySpellerType as QuerySpeller, QueryType, ScoringStatistics, SearchMode, SemanticFieldState, - Speller, + SemanticQueryRewritesResultType, VectorsDebugInfo, } from "./generated/data/models"; import type GeographyPoint from "./geographyPoint"; @@ -242,6 +244,11 @@ export interface VectorizableTextQuery extends BaseVector kind: "text"; /** The text to be vectorized to perform a vector search query. */ text: string; + /** + * Can be configured to let a generative model rewrite the query before sending it to be + * vectorized. + */ + queryRewrites?: QueryRewrites; } /** The query parameters to use for vector search when an url that represents an image value that needs to be vectorized is provided. */ @@ -343,7 +350,7 @@ export interface BaseSearchRequestOptions< /** * Improve search recall by spell-correcting individual search query terms. */ - speller?: Speller; + speller?: QuerySpeller; /** * A value that specifies whether any or all of the search terms must be matched in order to * count the document as a match. Possible values include: 'any', 'all' @@ -476,6 +483,11 @@ export interface SearchDocumentsResultBase { * NOTE: This property will not be serialized. It can only be populated by the server. */ readonly answers?: QueryAnswerResult[]; + /** + * Debug information that applies to the search results as a whole. + * NOTE: This property will not be serialized. It can only be populated by the server. + */ + readonly debugInfo?: DebugInfo; /** * Reason that a partial response was returned for a semantic search request. * NOTE: This property will not be serialized. It can only be populated by the server. @@ -486,6 +498,11 @@ export interface SearchDocumentsResultBase { * NOTE: This property will not be serialized. It can only be populated by the server. */ readonly semanticSearchResultsType?: SemanticSearchResultsType; + /** + * Type of query rewrite that was used to retrieve documents. + * NOTE: This property will not be serialized. It can only be populated by the server. + */ + readonly semanticQueryRewritesResultType?: SemanticQueryRewritesResultType; } /** @@ -952,6 +969,10 @@ export interface ExtractiveQueryAnswer { * The confidence threshold. Default threshold is 0.7 */ threshold?: number; + /** + * The maximum length in characters of each answer. + */ + maxCharLength?: number; } /** @@ -965,6 +986,10 @@ export type QueryAnswer = ExtractiveQueryAnswer; export interface ExtractiveQueryCaption { captionType: "extractive"; highlight?: boolean; + /** + * The maximum length in characters of each caption. + */ + maxCharLength?: number; } /** @@ -1004,6 +1029,11 @@ export interface SemanticSearchOptions { * to 'None'. */ captions?: QueryCaption; + /** + * When QueryRewrites is set to `generative`, the query terms are sent to a generate model which will + * produce 10 (default) rewrites to help increase the recall of the request. Defaults to `None`. + */ + queryRewrites?: QueryRewrites; /** * Allows setting a separate search query that will be solely used for semantic reranking, * semantic captions and semantic answers. Is useful for scenarios where there is a need to use @@ -1020,6 +1050,13 @@ export interface SemanticSearchOptions { debugMode?: QueryDebugMode; } +export type QueryRewrites = GenerativeQueryRewrites; + +export interface GenerativeQueryRewrites { + rewritesType: "generative"; + count?: number; +} + /** * Defines options for vector search queries */ diff --git a/sdk/search/search-documents/src/searchClient.ts b/sdk/search/search-documents/src/searchClient.ts index 5c4c257ecf11..7641ab8d37fc 100644 --- a/sdk/search/search-documents/src/searchClient.ts +++ b/sdk/search/search-documents/src/searchClient.ts @@ -16,6 +16,7 @@ import type { IndexDocumentsResult, QueryAnswerType as BaseAnswers, QueryCaptionType as BaseCaptions, + QueryRewritesType as GeneratedQueryRewrites, SearchRequest as GeneratedSearchRequest, SuggestRequest, VectorQueryUnion as GeneratedVectorQuery, @@ -34,6 +35,7 @@ import type { NarrowedModel, QueryAnswer, QueryCaption, + QueryRewrites, SearchDocumentsPageResult, SearchDocumentsResult, SearchFieldArray, @@ -337,6 +339,7 @@ export class SearchClient implements IndexDocumentsClient answers, captions, debugMode, + queryRewrites, ...restSemanticOptions } = semanticSearchOptions ?? {}; const { queries, filterMode, ...restVectorOptions } = vectorSearchOptions ?? {}; @@ -357,6 +360,7 @@ export class SearchClient implements IndexDocumentsClient semanticErrorHandling: errorMode, semanticConfigurationName: configurationName, debug: debugMode, + queryRewrites: this.convertQueryRewrites(queryRewrites), vectorFilterMode: filterMode, hybridSearch: hybridSearch, }; @@ -897,7 +901,7 @@ export class SearchClient implements IndexDocumentsClient } const config = []; - const { answerType: output, count, threshold } = answers; + const { answerType: output, count, threshold, maxCharLength } = answers; if (count) { config.push(`count-${count}`); @@ -907,6 +911,10 @@ export class SearchClient implements IndexDocumentsClient config.push(`threshold-${threshold}`); } + if (maxCharLength) { + config.push(`maxcharlength-${maxCharLength}`); + } + if (config.length) { return output + `|${config.join(",")}`; } @@ -920,12 +928,16 @@ export class SearchClient implements IndexDocumentsClient } const config = []; - const { captionType: output, highlight } = captions; + const { captionType: output, highlight, maxCharLength } = captions; if (highlight !== undefined) { config.push(`highlight-${highlight}`); } + if (maxCharLength) { + config.push(`maxcharlength-${maxCharLength}`); + } + if (config.length) { return output + `|${config.join(",")}`; } @@ -934,6 +946,43 @@ export class SearchClient implements IndexDocumentsClient } private convertVectorQuery>(vectorQuery: T): GeneratedVectorQuery { - return { ...vectorQuery, fields: this.convertVectorQueryFields(vectorQuery?.fields) }; + switch (vectorQuery.kind) { + case "text": { + const { fields, queryRewrites, ...restFields } = vectorQuery; + return { + ...restFields, + fields: this.convertVectorQueryFields(fields), + queryRewrites: this.convertQueryRewrites(queryRewrites), + }; + } + case "vector": + case "imageUrl": + case "imageBinary": { + return { ...vectorQuery, fields: this.convertVectorQueryFields(vectorQuery?.fields) }; + } + default: { + logger.warning("Unknown vector query kind; sending without serialization"); + return vectorQuery as any; + } + } + } + + private convertQueryRewrites(queryRewrites?: QueryRewrites): GeneratedQueryRewrites | undefined { + if (!queryRewrites) { + return queryRewrites; + } + + const { rewritesType: baseOutput } = queryRewrites; + switch (baseOutput) { + case "generative": { + const { count } = queryRewrites; + + const config = [...(count === undefined ? [] : [`count-${count}`])]; + if (config.length) return baseOutput + `|${config.join(",")}`; + return baseOutput; + } + default: + return baseOutput; + } } } diff --git a/sdk/search/search-documents/src/serviceModels.ts b/sdk/search/search-documents/src/serviceModels.ts index 6168117ec91b..1c1a4f49b066 100644 --- a/sdk/search/search-documents/src/serviceModels.ts +++ b/sdk/search/search-documents/src/serviceModels.ts @@ -4,6 +4,7 @@ import type { OperationOptions } from "@azure/core-client"; import type { PagedAsyncIterableIterator } from "@azure/core-paging"; import type { + AIServicesAccountKey, AIStudioModelCatalogName, AsciiFoldingTokenFilter, AzureMachineLearningSkill, @@ -15,6 +16,7 @@ import type { CjkBigramTokenFilter, ClassicSimilarity, ClassicTokenizer, + CognitiveServicesAccount as BaseCognitiveServicesAccount, CognitiveServicesAccountKey, CommonGramTokenFilter, ConditionalSkill, @@ -25,6 +27,8 @@ import type { DictionaryDecompounderTokenFilter, DistanceScoringFunction, DocumentExtractionSkill, + DocumentIntelligenceLayoutSkillMarkdownHeaderDepth, + DocumentIntelligenceLayoutSkillOutputMode, EdgeNGramTokenFilterSide, EdgeNGramTokenizer, ElisionTokenFilter, @@ -73,6 +77,8 @@ import type { LuceneStandardAnalyzer, MagnitudeScoringFunction, MappingCharFilter, + MarkdownHeaderDepth, + MarkdownParsingSubmode, MergeSkill, MicrosoftLanguageStemmingTokenizer, MicrosoftLanguageTokenizer, @@ -638,6 +644,7 @@ export type SearchIndexerSkill = | ConditionalSkill | CustomEntityLookupSkill | DocumentExtractionSkill + | DocumentIntelligenceLayoutSkill | EntityLinkingSkill | EntityRecognitionSkill | EntityRecognitionSkillV3 @@ -655,12 +662,34 @@ export type SearchIndexerSkill = | VisionVectorizeSkill | WebApiSkill; +/** A skill that extracts content and layout information (as markdown), via Azure AI Services, from files within the enrichment pipeline. */ +export interface DocumentIntelligenceLayoutSkill extends BaseSearchIndexerSkill { + /** Polymorphic discriminator, which specifies the different types this object can be */ + odatatype: "#Microsoft.Skills.Util.DocumentIntelligenceLayoutSkill"; + /** Controls the cardinality of the output produced by the skill. Default is 'oneToMany'. */ + outputMode?: DocumentIntelligenceLayoutSkillOutputMode; + /** The depth of headers in the markdown output. Default is h6. */ + markdownHeaderDepth?: DocumentIntelligenceLayoutSkillMarkdownHeaderDepth; +} + /** * Contains the possible cases for CognitiveServicesAccount. */ export type CognitiveServicesAccount = | DefaultCognitiveServicesAccount - | CognitiveServicesAccountKey; + | CognitiveServicesAccountKey + | AIServicesAccountKey + | AIServicesAccountIdentity; + +/** The multi-region account of an Azure AI service resource that's attached to a skillset. */ +export interface AIServicesAccountIdentity extends BaseCognitiveServicesAccount { + /** Polymorphic discriminator, which specifies the different types this object can be */ + odatatype: "#Microsoft.Azure.Search.AIServicesByIdentity"; + /** The user-assigned managed identity used for connections to AI Service. If not specified, the system-assigned managed identity is used. On updates to the skillset, if the identity is unspecified, the value remains unchanged. If set to "none", the value of this property is cleared. */ + identity?: SearchIndexerDataIdentity; + /** The subdomain url for the corresponding AI Service. */ + subdomainUrl: string; +} /** * Tokenizer that uses regex pattern matching to construct distinct tokens. This tokenizer is * implemented using Apache Lucene. @@ -2379,7 +2408,7 @@ export interface IndexingParametersConfiguration { failOnUnsupportedContentType?: boolean; /** For Azure blobs, set to false if you want to continue indexing if a document fails indexing. */ failOnUnprocessableDocument?: boolean; - /** For Azure blobs, set this property to true to still index storage metadata for blob content that is too large to process. Oversized blobs are treated as errors by default. For limits on blob size, see https://docs.microsoft.com/azure/search/search-limits-quotas-capacity. */ + /** For Azure blobs, set this property to true to still index storage metadata for blob content that is too large to process. Oversized blobs are treated as errors by default. For limits on blob size, see https://learn.microsoft.com/azure/search/search-limits-quotas-capacity. */ indexStorageMetadataOnlyForOversizedDocuments?: boolean; /** For CSV blobs, specifies a comma-delimited list of column headers, useful for mapping source fields to destination fields in an index. */ delimitedTextHeaders?: string; @@ -2387,6 +2416,10 @@ export interface IndexingParametersConfiguration { delimitedTextDelimiter?: string; /** For CSV blobs, indicates that the first (non-blank) line of each blob contains headers. */ firstLineContainsHeaders?: boolean; + /** Specifies the submode that will determine whether a markdown file will be parsed into exactly one search document or multiple search documents. Default is `oneToMany`. */ + markdownParsingSubmode?: MarkdownParsingSubmode; + /** Specifies the max header depth that will be considered while grouping markdown content. Default is `h6`. */ + markdownHeaderDepth?: MarkdownHeaderDepth; /** For JSON arrays, given a structured or semi-structured document, you can specify a path to the array using this property. */ documentRoot?: string; /** Specifies the data to extract from Azure blob storage and tells the indexer which data to extract from image content when "imageAction" is set to a value other than "none". This applies to embedded image content in a .PDF or other application, or image files such as .jpg and .png, in Azure blobs. */ diff --git a/sdk/search/search-documents/src/serviceUtils.ts b/sdk/search/search-documents/src/serviceUtils.ts index 670f408ef00c..62899fc473ac 100644 --- a/sdk/search/search-documents/src/serviceUtils.ts +++ b/sdk/search/search-documents/src/serviceUtils.ts @@ -6,18 +6,20 @@ import type { SuggestDocumentsResult as GeneratedSuggestDocumentsResult, } from "./generated/data/models"; import type { + AIServicesAccountIdentity as GeneratedAIServicesAccountIdentity, + AIServicesAccountKey as GeneratedAIServicesAccountKey, AIServicesVisionVectorizer as GeneratedAIServicesVisionVectorizer, AMLParameters as GeneratedAMLParameters, AMLVectorizer as GeneratedAMLVectorizer, AzureOpenAIVectorizer as GeneratedAzureOpenAIVectorizer, BM25Similarity, ClassicSimilarity, - CognitiveServicesAccountKey, + CognitiveServicesAccountKey as GeneratedCognitiveServicesAccountKey, CognitiveServicesAccountUnion, CustomAnalyzer as BaseCustomAnalyzer, DataChangeDetectionPolicyUnion, DataDeletionDetectionPolicyUnion, - DefaultCognitiveServicesAccount, + DefaultCognitiveServicesAccount as GeneratedDefaultCognitiveServicesAccount, ExhaustiveKnnAlgorithmConfiguration as GeneratedExhaustiveKnnAlgorithmConfiguration, HighWaterMarkChangeDetectionPolicy, HnswAlgorithmConfiguration as GeneratedHnswAlgorithmConfiguration, @@ -131,6 +133,7 @@ const knownSkills: Record<`${SearchIndexerSkillUnion["odatatype"]}`, true> = { "#Microsoft.Skills.Vision.OcrSkill": true, "#Microsoft.Skills.Custom.AmlSkill": true, "#Microsoft.Skills.Vision.VectorizeSkill": true, + "#Microsoft.Skills.Util.DocumentIntelligenceLayoutSkill": true, }; export function convertSkillsToPublic(skills: SearchIndexerSkillUnion[]): SearchIndexerSkill[] { @@ -149,7 +152,19 @@ export function convertCognitiveServicesAccountToGenerated( return cognitiveServicesAccount; } - return cognitiveServicesAccount as CognitiveServicesAccountUnion; + switch (cognitiveServicesAccount.odatatype) { + case "#Microsoft.Azure.Search.AIServicesByIdentity": + case "#Microsoft.Azure.Search.DefaultCognitiveServices": + case "#Microsoft.Azure.Search.CognitiveServicesByKey": + case "#Microsoft.Azure.Search.AIServicesByKey": + return cognitiveServicesAccount; + default: { + logger.warning( + `Unsupported Cognitive Services account odatatype: ${(cognitiveServicesAccount as any).odatatype}`, + ); + return cognitiveServicesAccount as any; + } + } } export function convertCognitiveServicesAccountToPublic( @@ -159,11 +174,37 @@ export function convertCognitiveServicesAccountToPublic( return cognitiveServicesAccount; } - if (cognitiveServicesAccount.odatatype === "#Microsoft.Azure.Search.DefaultCognitiveServices") { - return cognitiveServicesAccount as DefaultCognitiveServicesAccount; - } else { - return cognitiveServicesAccount as CognitiveServicesAccountKey; - } + const deserializers: Record< + CognitiveServicesAccountUnion["odatatype"], + () => CognitiveServicesAccount + > = { + "#Microsoft.Azure.Search.DefaultCognitiveServices": () => { + return cognitiveServicesAccount as GeneratedDefaultCognitiveServicesAccount; + }, + "#Microsoft.Azure.Search.CognitiveServicesByKey": () => { + return cognitiveServicesAccount as GeneratedCognitiveServicesAccountKey; + }, + "#Microsoft.Azure.Search.AIServicesByKey": () => { + return cognitiveServicesAccount as GeneratedAIServicesAccountKey; + }, + "#Microsoft.Azure.Search.AIServicesByIdentity": () => { + const { identity, ...restParams } = + cognitiveServicesAccount as GeneratedAIServicesAccountIdentity; + return { + ...restParams, + identity: convertSearchIndexerDataIdentityToPublic(identity ?? undefined), + }; + }, + }; + + const defaultDeserializer: () => CognitiveServicesAccount = () => { + logger.warning( + `Unsupported Cognitive Services account odatatype: ${(cognitiveServicesAccount as CognitiveServicesAccount).odatatype}`, + ); + return cognitiveServicesAccount as CognitiveServicesAccount; + }; + + return (deserializers[cognitiveServicesAccount.odatatype] ?? defaultDeserializer)(); } export function convertTokenFiltersToGenerated( diff --git a/sdk/search/search-documents/swagger/Data.md b/sdk/search/search-documents/swagger/Data.md index e0c8f52d691e..584cca2ae471 100644 --- a/sdk/search/search-documents/swagger/Data.md +++ b/sdk/search/search-documents/swagger/Data.md @@ -10,7 +10,7 @@ generate-metadata: false license-header: MICROSOFT_MIT_NO_VERSION output-folder: ../ source-code-folder-path: ./src/generated/data -input-file: https://raw.githubusercontent.com/Azure/azure-rest-api-specs/4b7fbd8b842b509a0330f20260821dd844328dff/specification/search/data-plane/Azure.Search/preview/2024-09-01-preview/searchindex.json +input-file: https://raw.githubusercontent.com/Azure/azure-rest-api-specs/14531a7cf6101c1dd57e7c1c83103a047bb8f5bb/specification/search/data-plane/Azure.Search/preview/2024-11-01-preview/searchindex.json add-credentials: false title: SearchClient use-extension: diff --git a/sdk/search/search-documents/swagger/Service.md b/sdk/search/search-documents/swagger/Service.md index 199fd98f4beb..5d4574eb9ad4 100644 --- a/sdk/search/search-documents/swagger/Service.md +++ b/sdk/search/search-documents/swagger/Service.md @@ -10,7 +10,7 @@ generate-metadata: false license-header: MICROSOFT_MIT_NO_VERSION output-folder: ../ source-code-folder-path: ./src/generated/service -input-file: https://raw.githubusercontent.com/Azure/azure-rest-api-specs/4b7fbd8b842b509a0330f20260821dd844328dff/specification/search/data-plane/Azure.Search/preview/2024-09-01-preview/searchservice.json +input-file: https://raw.githubusercontent.com/Azure/azure-rest-api-specs/14531a7cf6101c1dd57e7c1c83103a047bb8f5bb/specification/search/data-plane/Azure.Search/preview/2024-11-01-preview/searchservice.json add-credentials: false use-extension: "@autorest/typescript": "6.0.27" diff --git a/sdk/search/search-documents/test/public/node/searchClient.spec.ts b/sdk/search/search-documents/test/public/node/searchClient.spec.ts index 78ce44059974..2e25a55e2859 100644 --- a/sdk/search/search-documents/test/public/node/searchClient.spec.ts +++ b/sdk/search/search-documents/test/public/node/searchClient.spec.ts @@ -8,18 +8,19 @@ import { assert } from "chai"; import type { Context, Suite } from "mocha"; import type { AutocompleteResult, + SearchFieldArray, SearchIndex, SearchIndexClient, + SelectArray, SelectFields, } from "../../../src"; import { AzureKeyCredential, IndexDocumentsBatch, KnownQueryLanguage, - KnownSpeller, + KnownQuerySpeller, SearchClient, } from "../../../src"; -import type { SearchFieldArray, SelectArray } from "../../../src/indexModels"; import { defaultServiceVersion } from "../../../src/serviceUtils"; import type { Hotel } from "../utils/interfaces"; import { createClients } from "../utils/recordedClient"; @@ -112,7 +113,7 @@ describe("SearchClient", function (this: Suite) { top: 5, includeTotalCount: true, queryLanguage: KnownQueryLanguage.EnUs, - speller: KnownSpeller.Lexicon, + speller: KnownQuerySpeller.Lexicon, }); assert.equal(searchResults.count, 6); }); diff --git a/sdk/search/search-documents/test/public/typeDefinitions.ts b/sdk/search/search-documents/test/public/typeDefinitions.ts index 71d3b2d893ad..07ff80e98276 100644 --- a/sdk/search/search-documents/test/public/typeDefinitions.ts +++ b/sdk/search/search-documents/test/public/typeDefinitions.ts @@ -81,6 +81,7 @@ type BlobIndexerParsingMode = | "json" | "jsonArray" | "jsonLines" + | "markdown" | "text"; type BlobIndexerPDFTextRotationAlgorithm = "detectAngles" | "none"; type CustomEntityLookupSkillLanguage = "da" | "de" | "en" | "es" | "fi" | "fr" | "it" | "ko" | "pt";