From 4893bc9b4378dedfd4212ad16bbe18748d46f556 Mon Sep 17 00:00:00 2001 From: Austin DeNoble Date: Sat, 25 Jan 2025 15:40:51 -0500 Subject: [PATCH 1/9] update types to handle new sparse index fields, update tests to account for new deserialization --- pinecone/client.go | 2 +- pinecone/client_test.go | 4 +-- pinecone/index_connection_test.go | 14 ++++----- pinecone/models.go | 15 +++++++++- pinecone/models_test.go | 48 +++++++++++++++++++------------ pinecone/test_suite.go | 8 ++++-- 6 files changed, 59 insertions(+), 32 deletions(-) diff --git a/pinecone/client.go b/pinecone/client.go index 069f283..19d2f4d 100644 --- a/pinecone/client.go +++ b/pinecone/client.go @@ -1566,7 +1566,7 @@ func toIndex(idx *db_control.IndexModel) *Index { return &Index{ Name: idx.Name, - Dimension: *idx.Dimension, + Dimension: idx.Dimension, Host: idx.Host, Metric: IndexMetric(idx.Metric), DeletionProtection: DeletionProtection(deletionProtection), diff --git a/pinecone/client_test.go b/pinecone/client_test.go index fb9f81e..cb6024f 100644 --- a/pinecone/client_test.go +++ b/pinecone/client_test.go @@ -1044,7 +1044,7 @@ func TestToIndexUnit(t *testing.T) { }, expectedOutput: &Index{ Name: "testIndex", - Dimension: 128, + Dimension: &dimension, Host: "test-host", Metric: "cosine", DeletionProtection: "disabled", @@ -1092,7 +1092,7 @@ func TestToIndexUnit(t *testing.T) { }, expectedOutput: &Index{ Name: "testIndex", - Dimension: 128, + Dimension: &dimension, Host: "test-host", Metric: "cosine", DeletionProtection: "enabled", diff --git a/pinecone/index_connection_test.go b/pinecone/index_connection_test.go index 72f68d0..369d0c0 100644 --- a/pinecone/index_connection_test.go +++ b/pinecone/index_connection_test.go @@ -27,7 +27,7 @@ func (ts *IntegrationTests) TestFetchVectors() { } func (ts *IntegrationTests) TestQueryByVector() { - vec := make([]float32, ts.dimension) + vec := make([]float32, derefOrDefault(ts.dimension, 0)) for i := range vec { vec[i] = 0.01 } @@ -61,7 +61,7 @@ func (ts *IntegrationTests) TestDeleteVectorsById() { assert.NoError(ts.T(), err) ts.vectorIds = []string{} - vectors := GenerateVectors(5, ts.dimension, true, nil) + vectors := GenerateVectors(5, derefOrDefault(ts.dimension, 0), true, nil) _, err = ts.idxConn.UpsertVectors(ctx, vectors) if err != nil { @@ -95,7 +95,7 @@ func (ts *IntegrationTests) TestDeleteVectorsByFilter() { } ts.vectorIds = []string{} - vectors := GenerateVectors(5, ts.dimension, true, nil) + vectors := GenerateVectors(5, derefOrDefault(ts.dimension, 0), true, nil) _, err = ts.idxConn.UpsertVectors(ctx, vectors) if err != nil { @@ -116,7 +116,7 @@ func (ts *IntegrationTests) TestDeleteAllVectorsInNamespace() { assert.NoError(ts.T(), err) ts.vectorIds = []string{} - vectors := GenerateVectors(5, ts.dimension, true, nil) + vectors := GenerateVectors(5, derefOrDefault(ts.dimension, 0), true, nil) _, err = ts.idxConn.UpsertVectors(ctx, vectors) if err != nil { @@ -244,9 +244,9 @@ func (ts *IntegrationTests) TestUpdateVectorMetadata() { func (ts *IntegrationTests) TestUpdateVectorSparseValues() { ctx := context.Background() - dims := int(ts.dimension) - indices := generateUint32Array(dims) - vals := generateFloat32Array(dims) + dims := int32(derefOrDefault(ts.dimension, 0)) + indices := generateUint32Array(int(dims)) + vals := generateFloat32Array(int(dims)) expectedSparseValues := SparseValues{ Indices: indices, Values: vals, diff --git a/pinecone/models.go b/pinecone/models.go index 6efd7c5..2eb104b 100644 --- a/pinecone/models.go +++ b/pinecone/models.go @@ -65,19 +65,32 @@ type IndexSpec struct { Serverless *ServerlessSpec `json:"serverless,omitempty"` } +// [IndexEmbed] is the embedding model configured for an index, including document fields mapped to embedding inputs. +type IndexEmbed struct { + Model string `json:"model"` + Dimension *int32 `json:"dimension,omitempty"` + Metric *IndexMetric `json:"metric,omitempty"` + VectorType *string `json:"vector_type,omitempty"` + FieldMap *map[string]interface{} `json:"field_map,omitempty"` + ReadParameters *map[string]interface{} `json:"read_parameters,omitempty"` + WriteParameters *map[string]interface{} `json:"write_parameters,omitempty"` +} + // [IndexTags] is a set of key-value pairs that can be attached to a Pinecone [Index]. type IndexTags map[string]string // [Index] is a Pinecone [Index] object. Can be either a pod-based or a serverless [Index], depending on the [IndexSpec]. type Index struct { Name string `json:"name"` - Dimension int32 `json:"dimension"` Host string `json:"host"` Metric IndexMetric `json:"metric"` + VectorType string `json:"vector_type"` DeletionProtection DeletionProtection `json:"deletion_protection,omitempty"` + Dimension *int32 `json:"dimension"` Spec *IndexSpec `json:"spec,omitempty"` Status *IndexStatus `json:"status,omitempty"` Tags *IndexTags `json:"tags,omitempty"` + Embed *IndexEmbed `json:"embed,omitempty"` } // [Collection] is a Pinecone [collection entity]. Only available for pod-based Indexes. diff --git a/pinecone/models_test.go b/pinecone/models_test.go index 21aa392..cb46e7b 100644 --- a/pinecone/models_test.go +++ b/pinecone/models_test.go @@ -7,7 +7,7 @@ import ( "google.golang.org/protobuf/types/known/structpb" ) -func TestMarshalIndexStatus(t *testing.T) { +func TestMarshalIndexStatusUnit(t *testing.T) { tests := []struct { name string input IndexStatus @@ -44,7 +44,7 @@ func TestMarshalIndexStatus(t *testing.T) { } } -func TestMarshalServerlessSpec(t *testing.T) { +func TestMarshalServerlessSpecUnit(t *testing.T) { tests := []struct { name string input ServerlessSpec @@ -82,7 +82,7 @@ func TestMarshalServerlessSpec(t *testing.T) { } } -func TestMarshalPodSpec(t *testing.T) { +func TestMarshalPodSpecUnit(t *testing.T) { sourceCollection := "source-collection" tests := []struct { name string @@ -138,7 +138,7 @@ func TestMarshalPodSpec(t *testing.T) { } } -func TestMarshalIndexSpec(t *testing.T) { +func TestMarshalIndexSpecUnit(t *testing.T) { sourceCollection := "source-collection" tests := []struct { name string @@ -191,7 +191,9 @@ func TestMarshalIndexSpec(t *testing.T) { } } -func TestMarshalIndex(t *testing.T) { +func TestMarshalIndexUnit(t *testing.T) { + dimension := int32(128) + tests := []struct { name string input Index @@ -200,10 +202,15 @@ func TestMarshalIndex(t *testing.T) { { name: "All fields present", input: Index{ - Name: "test-index", - Dimension: 128, - Host: "index-host-1.io", - Metric: "cosine", + Name: "test-index", + Dimension: &dimension, + Host: "index-host-1.io", + Metric: "cosine", + VectorType: "sparse", + DeletionProtection: "enabled", + Embed: &IndexEmbed{ + Model: "multilingual-e5-large", + }, Spec: &IndexSpec{ Serverless: &ServerlessSpec{ Cloud: "aws", @@ -214,25 +221,28 @@ func TestMarshalIndex(t *testing.T) { Ready: true, State: "Ready", }, + Tags: &IndexTags{ + "test1": "test-tag-1", + }, }, - want: `{"name":"test-index","dimension":128,"host":"index-host-1.io","metric":"cosine","spec":{"serverless":{"cloud":"aws","region":"us-west-2"}},"status":{"ready":true,"state":"Ready"}}`, + want: `{"name":"test-index","host":"index-host-1.io","metric":"cosine","vector_type":"sparse","deletion_protection":"enabled","dimension":128,"spec":{"serverless":{"cloud":"aws","region":"us-west-2"}},"status":{"ready":true,"state":"Ready"},"tags":{"test1":"test-tag-1"},"embed":{"model":"multilingual-e5-large"}}`, }, { name: "Fields omitted", input: Index{}, - want: `{"name":"","dimension":0,"host":"","metric":""}`, + want: `{"name":"","host":"","metric":"","vector_type":"","dimension":null}`, }, { name: "Fields empty", input: Index{ Name: "", - Dimension: 0, + Dimension: nil, Host: "", Metric: "", Spec: nil, Status: nil, }, - want: `{"name":"","dimension":0,"host":"","metric":""}`, + want: `{"name":"","host":"","metric":"","vector_type":"","dimension":null}`, }, } @@ -250,7 +260,7 @@ func TestMarshalIndex(t *testing.T) { } } -func TestMarshalCollection(t *testing.T) { +func TestMarshalCollectionUnit(t *testing.T) { tests := []struct { name string input Collection @@ -301,7 +311,7 @@ func TestMarshalCollection(t *testing.T) { } } -func TestMarshalPodSpecMetadataConfig(t *testing.T) { +func TestMarshalPodSpecMetadataConfigUnit(t *testing.T) { tests := []struct { name string input PodSpecMetadataConfig @@ -338,7 +348,7 @@ func TestMarshalPodSpecMetadataConfig(t *testing.T) { } } -func TestMarshalVector(t *testing.T) { +func TestMarshalVectorUnit(t *testing.T) { metadata, err := structpb.NewStruct(map[string]interface{}{"genre": "rock"}) if err != nil { t.Fatalf("Failed to create metadata: %v", err) @@ -388,7 +398,7 @@ func TestMarshalVector(t *testing.T) { } } -func TestMarshalScoredVector(t *testing.T) { +func TestMarshalScoredVectorUnit(t *testing.T) { metadata, err := structpb.NewStruct(map[string]interface{}{"genre": "rock"}) if err != nil { t.Fatalf("Failed to create metadata: %v", err) @@ -441,7 +451,7 @@ func TestMarshalScoredVector(t *testing.T) { } } -func TestMarshalSparseValues(t *testing.T) { +func TestMarshalSparseValuesUnit(t *testing.T) { tests := []struct { name string input SparseValues @@ -481,7 +491,7 @@ func TestMarshalSparseValues(t *testing.T) { } } -func TestMarshalNamespaceSummary(t *testing.T) { +func TestMarshalNamespaceSummaryUnit(t *testing.T) { tests := []struct { name string input NamespaceSummary diff --git a/pinecone/test_suite.go b/pinecone/test_suite.go index 8ec93da..681eac8 100644 --- a/pinecone/test_suite.go +++ b/pinecone/test_suite.go @@ -17,7 +17,7 @@ type IntegrationTests struct { apiKey string client *Client host string - dimension int32 + dimension *int32 indexType string vectorIds []string idxName string @@ -44,9 +44,13 @@ func (ts *IntegrationTests) SetupSuite() { require.NotNil(ts.T(), idxConn, "Failed to create idxConn") ts.idxConn = idxConn + dim := int32(0) + if ts.dimension != nil { + dim = *ts.dimension + } // Deterministically create vectors - vectors := GenerateVectors(10, ts.dimension, false, nil) + vectors := GenerateVectors(10, dim, false, nil) // Add vector ids to the suite vectorIds := make([]string, len(vectors)) From 6a62213eec2840b9fe3328cfcabf84345258b53d Mon Sep 17 00:00:00 2001 From: Austin DeNoble Date: Mon, 27 Jan 2025 19:09:48 -0500 Subject: [PATCH 2/9] modify CreatePodIndexRequest and CreateServerlessIndexRequest structs and associated methods, add some additional error validation for the different scenarios, update unit / integration tests --- pinecone/client.go | 61 +++++++++++++++++++++++---- pinecone/client_test.go | 92 ++++++++++++++++++++++++++++++----------- pinecone/test_suite.go | 3 +- 3 files changed, 121 insertions(+), 35 deletions(-) diff --git a/pinecone/client.go b/pinecone/client.go index 19d2f4d..430f806 100644 --- a/pinecone/client.go +++ b/pinecone/client.go @@ -401,6 +401,8 @@ func (c *Client) ListIndexes(ctx context.Context) ([]*Index, error) { // - Dimension: (Required) The [dimensionality] of the vectors to be inserted in the Index. // - Metric: (Required) The distance metric to be used for [similarity] search. You can use // 'euclidean', 'cosine', or 'dotproduct'. +// - DeletionProtection: (Optional) determines whether [deletion protection] is "enabled" or "disabled" for the index. +// When "enabled", the index cannot be deleted. Defaults to "disabled". // - Environment: (Required) The [cloud environment] where the Index will be hosted. // - PodType: (Required) The [type of pod] to use for the [Index]. One of `s1`, `p1`, or `p2` appended with `.` and // one of `x1`, `x2`, `x4`, or `x8`. @@ -413,8 +415,6 @@ func (c *Client) ListIndexes(ctx context.Context) ([]*Index, error) { // default, all metadata is indexed; when `metadata_config` is present, // only specified metadata fields are indexed. These configurations are // only valid for use with pod-based Indexes. -// - DeletionProtection: (Optional) determines whether [deletion protection] is "enabled" or "disabled" for the index. -// When "enabled", the index cannot be deleted. Defaults to "disabled". // - Tags: (Optional) A map of tags to associate with the Index. // // To create a new pods-based Index, use the [Client.CreatePodIndex] method. @@ -540,7 +540,7 @@ func (req CreatePodIndexRequest) TotalCount() int { // fmt.Printf("Successfully created pod index: %s", idx.Name) // } func (c *Client) CreatePodIndex(ctx context.Context, in *CreatePodIndexRequest) (*Index, error) { - if in.Name == "" || in.Dimension == 0 || in.Metric == "" || in.Environment == "" || in.PodType == "" { + if in.Name == "" || in.Dimension <= 0 || in.Metric == "" || in.Environment == "" || in.PodType == "" { return nil, fmt.Errorf("fields Name, Dimension, Metric, Environment, and Podtype must be included in CreatePodIndexRequest") } @@ -549,6 +549,7 @@ func (c *Client) CreatePodIndex(ctx context.Context, in *CreatePodIndexRequest) pods := in.TotalCount() replicas := in.ReplicaCount() shards := in.ShardCount() + vectorType := "dense" var tags *db_control.IndexTags if in.Tags != nil { @@ -561,6 +562,7 @@ func (c *Client) CreatePodIndex(ctx context.Context, in *CreatePodIndexRequest) Metric: metric, DeletionProtection: deletionProtection, Tags: tags, + VectorType: &vectorType, } req.Spec = db_control.IndexSpec{ @@ -601,13 +603,15 @@ func (c *Client) CreatePodIndex(ctx context.Context, in *CreatePodIndexRequest) // - Name: (Required) The name of the [Index]. Resource name must be 1-45 characters long, // start and end with an alphanumeric character, // and consist only of lower case alphanumeric characters or '-'. -// - Dimension: (Required) The [dimensionality] of the vectors to be inserted in the [Index]. // - Metric: (Required) The metric used to measure the [similarity] between vectors ('euclidean', 'cosine', or 'dotproduct'). // - DeletionProtection: (Optional) Determines whether [deletion protection] is "enabled" or "disabled" for the index. // When "enabled", the index cannot be deleted. Defaults to "disabled". // - Cloud: (Required) The public [cloud provider] where you would like your [Index] hosted. // For serverless Indexes, you define only the cloud and region where the [Index] should be hosted. // - Region: (Required) The [region] where you would like your [Index] to be created. +// - Dimension: (Optional) The [dimensionality] of the vectors to be inserted in the [Index]. +// - VectorType: (Optional) The index vector type. You can use `dense` or `sparse`. If `dense`, the vector dimension must be specified. +// If `sparse`, the vector dimension should not be specified, and the Metric must be set to `dotproduct`. Defaults to `dense`. // - Tags: (Optional) A map of tags to associate with the Index. // // To create a new Serverless Index, use the [Client.CreateServerlessIndex] method. @@ -652,11 +656,12 @@ func (c *Client) CreatePodIndex(ctx context.Context, in *CreatePodIndexRequest) // [deletion protection]: https://docs.pinecone.io/guides/indexes/prevent-index-deletion#enable-deletion-protection type CreateServerlessIndexRequest struct { Name string - Dimension int32 Metric IndexMetric DeletionProtection DeletionProtection Cloud Cloud Region string + Dimension *int32 + VectorType *string Tags *IndexTags } @@ -701,8 +706,24 @@ type CreateServerlessIndexRequest struct { // fmt.Printf("Successfully created serverless index: %s", idx.Name) // } func (c *Client) CreateServerlessIndex(ctx context.Context, in *CreateServerlessIndexRequest) (*Index, error) { - if in.Name == "" || in.Dimension == 0 || in.Metric == "" || in.Cloud == "" || in.Region == "" { - return nil, fmt.Errorf("fields Name, Dimension, Metric, Cloud, and Region must be included in CreateServerlessIndexRequest") + if in.Name == "" || in.Metric == "" || in.Cloud == "" || in.Region == "" { + return nil, fmt.Errorf("fields Name, Metric, Cloud, and Region must be included in CreateServerlessIndexRequest") + } + + // validate VectorType + if in.VectorType != nil { + switch *in.VectorType { + case "sparse": + if in.Dimension != nil { + return nil, fmt.Errorf("dimension should not be specified when VectorType is 'sparse'") + } else if in.Metric != Dotproduct { + return nil, fmt.Errorf("metric should be 'dotproduct' when VectorType is 'sparse'") + } + case "dense": + if in.Dimension == nil { + return nil, fmt.Errorf("dimension should be specified when VectorType is 'dense'") + } + } } deletionProtection := pointerOrNil(db_control.DeletionProtection(in.DeletionProtection)) @@ -715,9 +736,10 @@ func (c *Client) CreateServerlessIndex(ctx context.Context, in *CreateServerless req := db_control.CreateIndexRequest{ Name: in.Name, - Dimension: &in.Dimension, + Dimension: in.Dimension, Metric: metric, DeletionProtection: deletionProtection, + VectorType: in.VectorType, Spec: db_control.IndexSpec{ Serverless: &db_control.ServerlessSpec{ Cloud: db_control.ServerlessSpecCloud(in.Cloud), @@ -1561,18 +1583,39 @@ func toIndex(idx *db_control.IndexModel) *Index { Ready: idx.Status.Ready, State: IndexStatusState(idx.Status.State), } + var embed *IndexEmbed + if idx.Embed != nil { + var metric *IndexMetric + if idx.Embed.Metric != nil { + convertedMetric := IndexMetric(*idx.Embed.Metric) + metric = &convertedMetric + } + + embed = &IndexEmbed{ + Dimension: idx.Embed.Dimension, + FieldMap: idx.Embed.FieldMap, + Metric: metric, + Model: idx.Embed.Model, + ReadParameters: idx.Embed.ReadParameters, + VectorType: idx.Embed.VectorType, + WriteParameters: idx.Embed.WriteParameters, + } + } + tags := (*IndexTags)(idx.Tags) deletionProtection := derefOrDefault(idx.DeletionProtection, "disabled") return &Index{ Name: idx.Name, - Dimension: idx.Dimension, Host: idx.Host, Metric: IndexMetric(idx.Metric), + VectorType: idx.VectorType, DeletionProtection: DeletionProtection(deletionProtection), + Dimension: idx.Dimension, Spec: spec, Status: status, Tags: tags, + Embed: embed, } } diff --git a/pinecone/client_test.go b/pinecone/client_test.go index cb6024f..519a97f 100644 --- a/pinecone/client_test.go +++ b/pinecone/client_test.go @@ -31,7 +31,11 @@ func (ts *IntegrationTests) TestListIndexes() { require.Greater(ts.T(), len(indexes), 0, "Expected at least one index to exist") } -func (ts *IntegrationTests) TestCreatePodIndex() { +func (ts *IntegrationTests) TestCreatePodIndexDense() { + if ts.indexType == "serverless" { + ts.T().Skip("Skipping pod index tests for serverless suite") + } + name := uuid.New().String() defer func(ts *IntegrationTests, name string) { @@ -48,38 +52,43 @@ func (ts *IntegrationTests) TestCreatePodIndex() { }) require.NoError(ts.T(), err) require.Equal(ts.T(), name, idx.Name, "Index name does not match") + // create index should default to "dense" if no VectorType is specified + require.Equal(ts.T(), "dense", idx.VectorType, "Index vector type does not match") } -func (ts *IntegrationTests) TestCreatePodIndexInvalidDimension() { - name := uuid.New().String() - - _, err := ts.client.CreatePodIndex(context.Background(), &CreatePodIndexRequest{ - Name: name, - Dimension: -1, - Metric: Cosine, - Environment: "us-east1-gcp", - PodType: "p1.x1", - }) - require.Error(ts.T(), err) - require.Equal(ts.T(), reflect.TypeOf(err), reflect.TypeOf(&PineconeError{}), "Expected error to be of type PineconeError") -} +func (ts *IntegrationTests) TestCreateServerlessIndexDense() { + if ts.indexType == "pod" { + ts.T().Skip("Skipping serverless index tests for pod suite") + } -func (ts *IntegrationTests) TestCreateServerlessIndexInvalidDimension() { name := uuid.New().String() + dimension := int32(10) - _, err := ts.client.CreateServerlessIndex(context.Background(), &CreateServerlessIndexRequest{ + defer func(ts *IntegrationTests, name string) { + err := ts.deleteIndex(name) + require.NoError(ts.T(), err) + }(ts, name) + + idx, err := ts.client.CreateServerlessIndex(context.Background(), &CreateServerlessIndexRequest{ Name: name, - Dimension: -1, + Dimension: &dimension, Metric: Cosine, Cloud: Aws, Region: "us-west-2", }) - require.Error(ts.T(), err) - require.Equal(ts.T(), reflect.TypeOf(err), reflect.TypeOf(&PineconeError{}), "Expected error to be of type PineconeError") + require.NoError(ts.T(), err) + require.Equal(ts.T(), name, idx.Name, "Index name does not match") + // create index should default to "dense" if no VectorType is specified + require.Equal(ts.T(), "dense", idx.VectorType, "Index vector type does not match") } -func (ts *IntegrationTests) TestCreateServerlessIndex() { +func (ts *IntegrationTests) TestCreateServerlessIndexSparse() { + if ts.indexType == "pod" { + ts.T().Skip("Skipping serverless index tests for pod suite") + } + name := uuid.New().String() + vectorType := "sparse" defer func(ts *IntegrationTests, name string) { err := ts.deleteIndex(name) @@ -87,14 +96,34 @@ func (ts *IntegrationTests) TestCreateServerlessIndex() { }(ts, name) idx, err := ts.client.CreateServerlessIndex(context.Background(), &CreateServerlessIndexRequest{ + Name: name, + Metric: Dotproduct, + Cloud: Aws, + Region: "us-west-2", + VectorType: &vectorType, + }) + require.NoError(ts.T(), err) + require.Equal(ts.T(), name, idx.Name, "Index name does not match") + require.Equal(ts.T(), vectorType, idx.VectorType, "Index vector type does not match") +} + +func (ts *IntegrationTests) TestCreateServerlessIndexInvalidDimension() { + if ts.indexType == "pod" { + ts.T().Skip("Skipping serverless index tests for pod suite") + } + + name := uuid.New().String() + dimension := int32(-1) + + _, err := ts.client.CreateServerlessIndex(context.Background(), &CreateServerlessIndexRequest{ Name: name, - Dimension: 10, + Dimension: &dimension, Metric: Cosine, Cloud: Aws, Region: "us-west-2", }) - require.NoError(ts.T(), err) - require.Equal(ts.T(), name, idx.Name, "Index name does not match") + require.Error(ts.T(), err) + require.Equal(ts.T(), reflect.TypeOf(err), reflect.TypeOf(&PineconeError{}), "Expected error to be of type PineconeError") } func (ts *IntegrationTests) TestDescribeIndex() { @@ -781,14 +810,14 @@ func TestCreatePodIndexMissingReqdFieldsUnit(t *testing.T) { client := &Client{} _, err := client.CreatePodIndex(context.Background(), &CreatePodIndexRequest{}) require.Error(t, err) - require.ErrorContainsf(t, err, "fields Name, Dimension, Metric, Environment, and Podtype must be included in CreatePodIndexRequest", err.Error()) //_, err := ts.client.CreatePodIndex(context.Background(), &CreatePodIndexRequest{}) + require.ErrorContainsf(t, err, "fields Name, Dimension, Metric, Environment, and Podtype must be included in CreatePodIndexRequest", err.Error()) } func TestCreateServerlessIndexMissingReqdFieldsUnit(t *testing.T) { client := &Client{} _, err := client.CreateServerlessIndex(context.Background(), &CreateServerlessIndexRequest{}) require.Error(t, err) - require.ErrorContainsf(t, err, "fields Name, Dimension, Metric, Cloud, and Region must be included in CreateServerlessIndexRequest", err.Error()) + require.ErrorContainsf(t, err, "fields Name, Metric, Cloud, and Region must be included in CreateServerlessIndexRequest", err.Error()) } func TestCreateCollectionMissingReqdFieldsUnit(t *testing.T) { @@ -798,6 +827,19 @@ func TestCreateCollectionMissingReqdFieldsUnit(t *testing.T) { require.ErrorContains(t, err, "fields Name and Source must be included in CreateCollectionRequest") } +func TestCreatePodIndexInvalidDimensionUnit(t *testing.T) { + client := &Client{} + _, err := client.CreatePodIndex(context.Background(), &CreatePodIndexRequest{ + Name: "test-invalid-dimension", + Dimension: -1, + Metric: Cosine, + Environment: "us-east1-gcp", + PodType: "p1.x1", + }) + require.Error(t, err) + require.ErrorContains(t, err, "fields Name, Dimension, Metric, Environment, and Podtype must be included in CreatePodIndexRequest") +} + func TestHandleErrorResponseBodyUnit(t *testing.T) { tests := []struct { name string diff --git a/pinecone/test_suite.go b/pinecone/test_suite.go index 681eac8..a0af3fd 100644 --- a/pinecone/test_suite.go +++ b/pinecone/test_suite.go @@ -214,11 +214,12 @@ func generateVectorValues(dimension int32) []float32 { func BuildServerlessTestIndex(in *Client, idxName string, tags IndexTags) *Index { ctx := context.Background() + dimension := int32(setDimensionsForTestIndexes()) fmt.Printf("Creating Serverless index: %s\n", idxName) serverlessIdx, err := in.CreateServerlessIndex(ctx, &CreateServerlessIndexRequest{ Name: idxName, - Dimension: int32(setDimensionsForTestIndexes()), + Dimension: &dimension, Metric: Cosine, Region: "us-east-1", Cloud: "aws", From ca64d30be835dc65eac30ada8e72a6994229ed21 Mon Sep 17 00:00:00 2001 From: Austin DeNoble Date: Mon, 27 Jan 2025 19:45:01 -0500 Subject: [PATCH 3/9] add unit tests for sparse index validation --- pinecone/client.go | 15 ++++++++----- pinecone/client_test.go | 50 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 57 insertions(+), 8 deletions(-) diff --git a/pinecone/client.go b/pinecone/client.go index 430f806..68e43b9 100644 --- a/pinecone/client.go +++ b/pinecone/client.go @@ -710,7 +710,10 @@ func (c *Client) CreateServerlessIndex(ctx context.Context, in *CreateServerless return nil, fmt.Errorf("fields Name, Metric, Cloud, and Region must be included in CreateServerlessIndexRequest") } - // validate VectorType + // default to "dense" if VectorType is not specified + vectorType := "dense" + + // validate VectorType options if in.VectorType != nil { switch *in.VectorType { case "sparse": @@ -719,12 +722,14 @@ func (c *Client) CreateServerlessIndex(ctx context.Context, in *CreateServerless } else if in.Metric != Dotproduct { return nil, fmt.Errorf("metric should be 'dotproduct' when VectorType is 'sparse'") } + vectorType = "sparse" case "dense": - if in.Dimension == nil { - return nil, fmt.Errorf("dimension should be specified when VectorType is 'dense'") - } + vectorType = "dense" } } + if in.Dimension == nil && vectorType == "dense" { + return nil, fmt.Errorf("dimension should be specified when VectorType is 'dense'") + } deletionProtection := pointerOrNil(db_control.DeletionProtection(in.DeletionProtection)) metric := pointerOrNil(db_control.CreateIndexRequestMetric(in.Metric)) @@ -739,7 +744,7 @@ func (c *Client) CreateServerlessIndex(ctx context.Context, in *CreateServerless Dimension: in.Dimension, Metric: metric, DeletionProtection: deletionProtection, - VectorType: in.VectorType, + VectorType: &vectorType, Spec: db_control.IndexSpec{ Serverless: &db_control.ServerlessSpec{ Cloud: db_control.ServerlessSpecCloud(in.Cloud), diff --git a/pinecone/client_test.go b/pinecone/client_test.go index 519a97f..a589fd0 100644 --- a/pinecone/client_test.go +++ b/pinecone/client_test.go @@ -820,11 +820,48 @@ func TestCreateServerlessIndexMissingReqdFieldsUnit(t *testing.T) { require.ErrorContainsf(t, err, "fields Name, Metric, Cloud, and Region must be included in CreateServerlessIndexRequest", err.Error()) } -func TestCreateCollectionMissingReqdFieldsUnit(t *testing.T) { +func TestCreateServerlessIndexInvalidSparseDimensionUnit(t *testing.T) { + vectorType := "sparse" + dimension := int32(1) client := &Client{} - _, err := client.CreateCollection(context.Background(), &CreateCollectionRequest{}) + _, err := client.CreateServerlessIndex(context.Background(), &CreateServerlessIndexRequest{ + Name: "test-invalid-dimension", + Metric: Dotproduct, + Cloud: "aws", + Region: "us-east-1", + Dimension: &dimension, + VectorType: &vectorType, + }) require.Error(t, err) - require.ErrorContains(t, err, "fields Name and Source must be included in CreateCollectionRequest") + require.ErrorContains(t, err, "dimension should not be specified when VectorType is 'sparse'") +} + +func TestCreateServerlessIndexInvalidSparseMetricUnit(t *testing.T) { + vectorType := "sparse" + client := &Client{} + _, err := client.CreateServerlessIndex(context.Background(), &CreateServerlessIndexRequest{ + Name: "test-invalid-dimension", + Metric: Cosine, + Cloud: "aws", + Region: "us-east-1", + VectorType: &vectorType, + }) + require.Error(t, err) + require.ErrorContains(t, err, "metric should be 'dotproduct' when VectorType is 'sparse'") +} + +func TestCreateServerlessIndexInvalidDenseDimensionUnit(t *testing.T) { + vectorType := "dense" + client := &Client{} + _, err := client.CreateServerlessIndex(context.Background(), &CreateServerlessIndexRequest{ + Name: "test-invalid-dimension", + Metric: Cosine, + Cloud: "aws", + Region: "us-east-1", + VectorType: &vectorType, + }) + require.Error(t, err) + require.ErrorContains(t, err, "dimension should be specified when VectorType is 'dense'") } func TestCreatePodIndexInvalidDimensionUnit(t *testing.T) { @@ -840,6 +877,13 @@ func TestCreatePodIndexInvalidDimensionUnit(t *testing.T) { require.ErrorContains(t, err, "fields Name, Dimension, Metric, Environment, and Podtype must be included in CreatePodIndexRequest") } +func TestCreateCollectionMissingReqdFieldsUnit(t *testing.T) { + client := &Client{} + _, err := client.CreateCollection(context.Background(), &CreateCollectionRequest{}) + require.Error(t, err) + require.ErrorContains(t, err, "fields Name and Source must be included in CreateCollectionRequest") +} + func TestHandleErrorResponseBodyUnit(t *testing.T) { tests := []struct { name string From e40dbf802dc525c1dbb9fdf0594acd75c462d77b Mon Sep 17 00:00:00 2001 From: Austin DeNoble Date: Tue, 28 Jan 2025 12:41:35 -0500 Subject: [PATCH 4/9] add doc comment and unit tests for IndexEmbed --- pinecone/models.go | 15 +++++++++- pinecone/models_test.go | 66 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 79 insertions(+), 2 deletions(-) diff --git a/pinecone/models.go b/pinecone/models.go index 2eb104b..172d554 100644 --- a/pinecone/models.go +++ b/pinecone/models.go @@ -65,7 +65,20 @@ type IndexSpec struct { Serverless *ServerlessSpec `json:"serverless,omitempty"` } -// [IndexEmbed] is the embedding model configured for an index, including document fields mapped to embedding inputs. +// [IndexEmbed] represents the embedding model configured for an index, +// including document fields mapped to embedding inputs. +// +// Fields: +// - Model: The name of the embedding model used to create the index (e.g., "multilingual-e5-large"). +// - Dimension: The dimension of the embedding model, specifying the size of the output vector. +// - Metric: The distance metric used by the embedding model. If the 'vector_type' is 'sparse', +// the metric must be 'dotproduct'. If the `vector_type` is `dense`, the metric +// defaults to 'cosine'. +// - VectorType: The index vector type associated with the model. If 'dense', the vector dimension must be specified. +// If 'sparse', the vector dimension will be nil. +// - FieldMap: Identifies the name of the text field from your document model that is embedded. +// - ReadParameters: The read parameters for the embedding model. +// - WriteParameters: The write parameters for the embedding model. type IndexEmbed struct { Model string `json:"model"` Dimension *int32 `json:"dimension,omitempty"` diff --git a/pinecone/models_test.go b/pinecone/models_test.go index cb46e7b..d0bb812 100644 --- a/pinecone/models_test.go +++ b/pinecone/models_test.go @@ -528,7 +528,7 @@ func TestMarshalNamespaceSummaryUnit(t *testing.T) { } } -func TestMarshalUsage(t *testing.T) { +func TestMarshalUsageUnit(t *testing.T) { tests := []struct { name string input Usage @@ -563,5 +563,69 @@ func TestMarshalUsage(t *testing.T) { } }) } +} + +func TestMarshalIndexEmbedUnit(t *testing.T) { + dimension := int32(128) + metric := IndexMetric("cosine") + vectorType := "sparse" + fieldMap := map[string]interface{}{ + "text-field": "my-text-field", + } + readParameters := map[string]interface{}{ + "readParam": "readParamValue", + } + writeParameters := map[string]interface{}{ + "writeParam": "writeParamValue", + } + + tests := []struct { + name string + input IndexEmbed + want string + }{ + { + name: "All fields present", + input: IndexEmbed{ + Model: "multilingual-e5-large", + Dimension: &dimension, + Metric: &metric, + VectorType: &vectorType, + FieldMap: &fieldMap, + ReadParameters: &readParameters, + WriteParameters: &writeParameters, + }, + want: `{"model":"multilingual-e5-large","dimension":128,"metric":"cosine","vector_type":"sparse","field_map":{"text-field":"my-text-field"},"read_parameters":{"readParam":"readParamValue"},"write_parameters":{"writeParam":"writeParamValue"}}`, + }, + { + name: "Fields omitted", + input: IndexEmbed{}, + want: `{"model":""}`, + }, + { + name: "Fields empty", + input: IndexEmbed{ + Model: "", + Dimension: nil, + Metric: nil, + VectorType: nil, + FieldMap: nil, + ReadParameters: nil, + WriteParameters: nil, + }, + want: `{"model":""}`, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(c *testing.T) { + got, err := json.Marshal(tt.input) + if err != nil { + c.Errorf("Failed to marshal IndexEmbed: %v", err) + } + if string(got) != tt.want { + c.Errorf("Marshal IndexEmbed got = %s, want = %s", string(got), tt.want) + } + }) + } } From 767fb70be88a39d1ae6b339c16cbb247b0f77fa5 Mon Sep 17 00:00:00 2001 From: Austin DeNoble Date: Tue, 28 Jan 2025 12:57:33 -0500 Subject: [PATCH 5/9] add sparse serverless example to README --- README.md | 49 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b05bc6a..deade4b 100644 --- a/README.md +++ b/README.md @@ -119,7 +119,7 @@ func main() { **Create a serverless index** -The following example creates a serverless index in the `us-east-1` +The following example creates a `dense` serverless index in the `us-east-1` region of AWS. For more information on serverless and regional availability, see [Understanding indexes](https://docs.pinecone.io/guides/indexes/understanding-indexes#serverless-indexes). @@ -167,6 +167,53 @@ func main() { } ``` +You can also create `sparse` only serverless indexes. These indexes enable direct indexing and retrieval of sparse vectors, supporting traditional methods like BM25 and learned sparse models such as [pinecone-sparse-english-v0](https://docs.pinecone.io/models/pinecone-sparse-english-v0). A `sparse` index must have a distance metric of `dotproduct` and does not require a specified dimension: + +```go +package main + +import ( + "context" + "fmt" + "github.com/pinecone-io/go-pinecone/v2/pinecone" + "log" + "os" +) + +func main() { + ctx := context.Background() + + clientParams := pinecone.NewClientParams{ + ApiKey: os.Getenv("PINECONE_API_KEY"), + } + + pc, err := pinecone.NewClient(clientParams) + if err != nil { + log.Fatalf("Failed to create Client: %v", err) + } else { + fmt.Println("Successfully created a new Client object!") + } + + indexName := "my-serverless-index" + vectorType := "dense" + + idx, err := pc.CreateServerlessIndex(ctx, &pinecone.CreateServerlessIndexRequest{ + Name: indexName, + Metric: pinecone.Dotproduct, + VectorType: &vectorType, + Cloud: pinecone.Aws, + Region: "us-east-1", + Tags: &pinecone.IndexTags{"environment": "development"}, + }) + + if err != nil { + log.Fatalf("Failed to create serverless index: %v", err) + } else { + fmt.Printf("Successfully created serverless index: %s", idx.Name) + } +} +``` + **Create a pod-based index** The following example creates a pod-based index with a metadata configuration. If no metadata configuration is From 4665c1adaf4cb002e343276185b1e31a62ddde87 Mon Sep 17 00:00:00 2001 From: Austin DeNoble Date: Tue, 28 Jan 2025 13:16:12 -0500 Subject: [PATCH 6/9] pull in latest spec and codegen changes --- codegen/apis | 2 +- .../db_data/grpc/db_data_2025-01_grpc.pb.go | 36 +++++++++---------- .../gen/db_data/rest/db_data_2025-01.oas.go | 6 ++-- .../gen/inference/inference_2025-01.oas.go | 4 ++- 4 files changed, 26 insertions(+), 22 deletions(-) diff --git a/codegen/apis b/codegen/apis index 8fdecc7..63e97dc 160000 --- a/codegen/apis +++ b/codegen/apis @@ -1 +1 @@ -Subproject commit 8fdecc7e0c3c752ff7749e5db8575f898b64b6de +Subproject commit 63e97dcd8a46cfb0687e23eab1f02300824f6e9d diff --git a/internal/gen/db_data/grpc/db_data_2025-01_grpc.pb.go b/internal/gen/db_data/grpc/db_data_2025-01_grpc.pb.go index 3ed42f3..fbecca0 100644 --- a/internal/gen/db_data/grpc/db_data_2025-01_grpc.pb.go +++ b/internal/gen/db_data/grpc/db_data_2025-01_grpc.pb.go @@ -34,47 +34,47 @@ const ( type VectorServiceClient interface { // Upsert vectors // - // The `upsert` operation writes vectors into a namespace. If a new value is upserted for an existing vector ID, it will overwrite the previous value. + // Writes vectors into a namespace. If a new value is upserted for an existing vector ID, it will overwrite the previous value. // // For guidance and examples, see [Upsert data](https://docs.pinecone.io/guides/data/upsert-data). Upsert(ctx context.Context, in *UpsertRequest, opts ...grpc.CallOption) (*UpsertResponse, error) // Delete vectors // - // The `delete` operation deletes vectors, by id, from a single namespace. + // Delete vectors by id from a single namespace. // // For guidance and examples, see [Delete data](https://docs.pinecone.io/guides/data/delete-data). Delete(ctx context.Context, in *DeleteRequest, opts ...grpc.CallOption) (*DeleteResponse, error) // Fetch vectors // - // The `fetch` operation looks up and returns vectors, by ID, from a single namespace. The returned vectors include the vector data and/or metadata. + // Look up and returns vectors by ID from a single namespace. The returned vectors include the vector data and/or metadata. // // For guidance and examples, see [Fetch data](https://docs.pinecone.io/guides/data/fetch-data). Fetch(ctx context.Context, in *FetchRequest, opts ...grpc.CallOption) (*FetchResponse, error) // List vector IDs // - // The `list` operation lists the IDs of vectors in a single namespace of a serverless index. An optional prefix can be passed to limit the results to IDs with a common prefix. + // List the IDs of vectors in a single namespace of a serverless index. An optional prefix can be passed to limit the results to IDs with a common prefix. // - // `list` returns up to 100 IDs at a time by default in sorted order (bitwise/"C" collation). If the `limit` parameter is set, `list` returns up to that number of IDs instead. Whenever there are additional IDs to return, the response also includes a `pagination_token` that you can use to get the next batch of IDs. When the response does not include a `pagination_token`, there are no more IDs to return. + // This returns up to 100 IDs at a time by default in sorted order (bitwise/"C" collation). If the `limit` parameter is set, `list` returns up to that number of IDs instead. Whenever there are additional IDs to return, the response also includes a `pagination_token` that you can use to get the next batch of IDs. When the response does not include a `pagination_token`, there are no more IDs to return. // // For guidance and examples, see [List record IDs](https://docs.pinecone.io/guides/data/list-record-ids). // - // **Note:** `list` is supported only for serverless indexes. + // **Note:** This is supported only for serverless indexes. List(ctx context.Context, in *ListRequest, opts ...grpc.CallOption) (*ListResponse, error) // Query vectors // - // The `query` operation searches a namespace, using a query vector. It retrieves the ids of the most similar items in a namespace, along with their similarity scores. + // Searches a namespace, using a query vector. It retrieves the ids of the most similar items in a namespace, along with their similarity scores. // // For guidance and examples, see [Query data](https://docs.pinecone.io/guides/data/query-data). Query(ctx context.Context, in *QueryRequest, opts ...grpc.CallOption) (*QueryResponse, error) // Update a vector // - // The `update` operation updates a vector in a namespace. If a value is included, it will overwrite the previous value. If a `set_metadata` is included, the values of the fields specified in it will be added or overwrite the previous value. + // Update a vector in a namespace. If a value is included, it will overwrite the previous value. If a `set_metadata` is included, the values of the fields specified in it will be added or overwrite the previous value. // // For guidance and examples, see [Update data](https://docs.pinecone.io/guides/data/update-data). Update(ctx context.Context, in *UpdateRequest, opts ...grpc.CallOption) (*UpdateResponse, error) // Get index stats // - // The `describe_index_stats` operation returns statistics about the contents of an index, including the vector count per namespace, the number of dimensions, and the index fullness. + // Return statistics about the contents of an index, including the vector count per namespace, the number of dimensions, and the index fullness. // // Serverless indexes scale automatically as needed, so index fullness is relevant only for pod-based indexes. DescribeIndexStats(ctx context.Context, in *DescribeIndexStatsRequest, opts ...grpc.CallOption) (*DescribeIndexStatsResponse, error) @@ -157,47 +157,47 @@ func (c *vectorServiceClient) DescribeIndexStats(ctx context.Context, in *Descri type VectorServiceServer interface { // Upsert vectors // - // The `upsert` operation writes vectors into a namespace. If a new value is upserted for an existing vector ID, it will overwrite the previous value. + // Writes vectors into a namespace. If a new value is upserted for an existing vector ID, it will overwrite the previous value. // // For guidance and examples, see [Upsert data](https://docs.pinecone.io/guides/data/upsert-data). Upsert(context.Context, *UpsertRequest) (*UpsertResponse, error) // Delete vectors // - // The `delete` operation deletes vectors, by id, from a single namespace. + // Delete vectors by id from a single namespace. // // For guidance and examples, see [Delete data](https://docs.pinecone.io/guides/data/delete-data). Delete(context.Context, *DeleteRequest) (*DeleteResponse, error) // Fetch vectors // - // The `fetch` operation looks up and returns vectors, by ID, from a single namespace. The returned vectors include the vector data and/or metadata. + // Look up and returns vectors by ID from a single namespace. The returned vectors include the vector data and/or metadata. // // For guidance and examples, see [Fetch data](https://docs.pinecone.io/guides/data/fetch-data). Fetch(context.Context, *FetchRequest) (*FetchResponse, error) // List vector IDs // - // The `list` operation lists the IDs of vectors in a single namespace of a serverless index. An optional prefix can be passed to limit the results to IDs with a common prefix. + // List the IDs of vectors in a single namespace of a serverless index. An optional prefix can be passed to limit the results to IDs with a common prefix. // - // `list` returns up to 100 IDs at a time by default in sorted order (bitwise/"C" collation). If the `limit` parameter is set, `list` returns up to that number of IDs instead. Whenever there are additional IDs to return, the response also includes a `pagination_token` that you can use to get the next batch of IDs. When the response does not include a `pagination_token`, there are no more IDs to return. + // This returns up to 100 IDs at a time by default in sorted order (bitwise/"C" collation). If the `limit` parameter is set, `list` returns up to that number of IDs instead. Whenever there are additional IDs to return, the response also includes a `pagination_token` that you can use to get the next batch of IDs. When the response does not include a `pagination_token`, there are no more IDs to return. // // For guidance and examples, see [List record IDs](https://docs.pinecone.io/guides/data/list-record-ids). // - // **Note:** `list` is supported only for serverless indexes. + // **Note:** This is supported only for serverless indexes. List(context.Context, *ListRequest) (*ListResponse, error) // Query vectors // - // The `query` operation searches a namespace, using a query vector. It retrieves the ids of the most similar items in a namespace, along with their similarity scores. + // Searches a namespace, using a query vector. It retrieves the ids of the most similar items in a namespace, along with their similarity scores. // // For guidance and examples, see [Query data](https://docs.pinecone.io/guides/data/query-data). Query(context.Context, *QueryRequest) (*QueryResponse, error) // Update a vector // - // The `update` operation updates a vector in a namespace. If a value is included, it will overwrite the previous value. If a `set_metadata` is included, the values of the fields specified in it will be added or overwrite the previous value. + // Update a vector in a namespace. If a value is included, it will overwrite the previous value. If a `set_metadata` is included, the values of the fields specified in it will be added or overwrite the previous value. // // For guidance and examples, see [Update data](https://docs.pinecone.io/guides/data/update-data). Update(context.Context, *UpdateRequest) (*UpdateResponse, error) // Get index stats // - // The `describe_index_stats` operation returns statistics about the contents of an index, including the vector count per namespace, the number of dimensions, and the index fullness. + // Return statistics about the contents of an index, including the vector count per namespace, the number of dimensions, and the index fullness. // // Serverless indexes scale automatically as needed, so index fullness is relevant only for pod-based indexes. DescribeIndexStats(context.Context, *DescribeIndexStatsRequest) (*DescribeIndexStatsResponse, error) diff --git a/internal/gen/db_data/rest/db_data_2025-01.oas.go b/internal/gen/db_data/rest/db_data_2025-01.oas.go index 75a92fe..e7dd8f0 100644 --- a/internal/gen/db_data/rest/db_data_2025-01.oas.go +++ b/internal/gen/db_data/rest/db_data_2025-01.oas.go @@ -295,7 +295,9 @@ type SearchRecordsRequest struct { // Query The query to rerank documents against. If a specific rerank query is specified, it overwrites the query input that was provided at the top level. Query *string `json:"query,omitempty"` - // RankFields The fields to use for reranking. + // RankFields The field(s) to consider for reranking. If not provided, the default is `["text"]`. + // + // The number of fields supported is [model-specific](https://docs.pinecone.io/guides/inference/understanding-inference#reranking-models). RankFields []string `json:"rank_fields"` // TopN The number of top results to return after reranking. Defaults to top_k. @@ -395,7 +397,7 @@ type UpdateResponse = map[string]interface{} // UpsertRecord The request for the `upsert` operation. type UpsertRecord struct { - // Id The unique ID of the record to upsert. + // Id The unique ID of the record to upsert. Note that `id` can be used as an alias for `_id`. Id string `json:"_id"` } diff --git a/internal/gen/inference/inference_2025-01.oas.go b/internal/gen/inference/inference_2025-01.oas.go index 6904279..41eeb90 100644 --- a/internal/gen/inference/inference_2025-01.oas.go +++ b/internal/gen/inference/inference_2025-01.oas.go @@ -142,7 +142,9 @@ type RerankRequest struct { // Query The query to rerank documents against. Query string `json:"query"` - // RankFields The fields to rank the documents by. If not provided, the default is `"text"`. + // RankFields The field(s) to consider for reranking. If not provided, the default is `["text"]`. + // + // The number of fields supported is [model-specific](https://docs.pinecone.io/guides/inference/understanding-inference#reranking-models). RankFields *[]string `json:"rank_fields,omitempty"` // ReturnDocuments Whether to return the documents in the response. From 1ddb62699b92b77f7409549d1901fb06e35d59ad Mon Sep 17 00:00:00 2001 From: Austin DeNoble Date: Thu, 30 Jan 2025 00:11:14 -0500 Subject: [PATCH 7/9] update Vector type to allow optional Values --- internal/gen/db_data/rest/db_data_2025-01.oas.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/gen/db_data/rest/db_data_2025-01.oas.go b/internal/gen/db_data/rest/db_data_2025-01.oas.go index e7dd8f0..ea658dd 100644 --- a/internal/gen/db_data/rest/db_data_2025-01.oas.go +++ b/internal/gen/db_data/rest/db_data_2025-01.oas.go @@ -434,7 +434,7 @@ type Vector struct { SparseValues *SparseValues `json:"sparseValues,omitempty"` // Values This is the vector data included in the request. - Values []float32 `json:"values"` + Values *[]float32 `json:"values,omitempty"` } // VectorValues This is the vector data included in the request. From 4bf4e9e5c261673efde075aa01466660be02dcb2 Mon Sep 17 00:00:00 2001 From: Austin DeNoble Date: Sat, 1 Feb 2025 00:08:38 -0500 Subject: [PATCH 8/9] review feedback, make sure Values is a pointer on Vector --- README.md | 2 +- pinecone/client.go | 2 +- pinecone/client_test.go | 4 ++-- pinecone/index_connection.go | 14 +++++++++-- pinecone/index_connection_test.go | 39 ++++++++++++++++++++----------- pinecone/local_test.go | 4 ++-- pinecone/models.go | 2 +- pinecone/models_test.go | 6 +++-- pinecone/test_suite.go | 7 +++--- 9 files changed, 52 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index deade4b..4a03f70 100644 --- a/README.md +++ b/README.md @@ -195,7 +195,7 @@ func main() { } indexName := "my-serverless-index" - vectorType := "dense" + vectorType := "sparse" idx, err := pc.CreateServerlessIndex(ctx, &pinecone.CreateServerlessIndexRequest{ Name: indexName, diff --git a/pinecone/client.go b/pinecone/client.go index 68e43b9..e9cc1ff 100644 --- a/pinecone/client.go +++ b/pinecone/client.go @@ -541,7 +541,7 @@ func (req CreatePodIndexRequest) TotalCount() int { // } func (c *Client) CreatePodIndex(ctx context.Context, in *CreatePodIndexRequest) (*Index, error) { if in.Name == "" || in.Dimension <= 0 || in.Metric == "" || in.Environment == "" || in.PodType == "" { - return nil, fmt.Errorf("fields Name, Dimension, Metric, Environment, and Podtype must be included in CreatePodIndexRequest") + return nil, fmt.Errorf("fields Name, positive Dimension, Metric, Environment, and Podtype must be included in CreatePodIndexRequest") } deletionProtection := pointerOrNil(db_control.DeletionProtection(in.DeletionProtection)) diff --git a/pinecone/client_test.go b/pinecone/client_test.go index a589fd0..58285b5 100644 --- a/pinecone/client_test.go +++ b/pinecone/client_test.go @@ -810,7 +810,7 @@ func TestCreatePodIndexMissingReqdFieldsUnit(t *testing.T) { client := &Client{} _, err := client.CreatePodIndex(context.Background(), &CreatePodIndexRequest{}) require.Error(t, err) - require.ErrorContainsf(t, err, "fields Name, Dimension, Metric, Environment, and Podtype must be included in CreatePodIndexRequest", err.Error()) + require.ErrorContainsf(t, err, "fields Name, positive Dimension, Metric, Environment, and Podtype must be included in CreatePodIndexRequest", err.Error()) } func TestCreateServerlessIndexMissingReqdFieldsUnit(t *testing.T) { @@ -874,7 +874,7 @@ func TestCreatePodIndexInvalidDimensionUnit(t *testing.T) { PodType: "p1.x1", }) require.Error(t, err) - require.ErrorContains(t, err, "fields Name, Dimension, Metric, Environment, and Podtype must be included in CreatePodIndexRequest") + require.ErrorContains(t, err, "fields Name, positive Dimension, Metric, Environment, and Podtype must be included in CreatePodIndexRequest") } func TestCreateCollectionMissingReqdFieldsUnit(t *testing.T) { diff --git a/pinecone/index_connection.go b/pinecone/index_connection.go index e91eeb7..0519d4e 100644 --- a/pinecone/index_connection.go +++ b/pinecone/index_connection.go @@ -1333,9 +1333,14 @@ func toVector(vector *db_data_grpc.Vector) *Vector { if vector == nil { return nil } + var vectorValues *[]float32 + if vector.Values != nil { + vectorValues = &vector.Values + } + return &Vector{ Id: vector.Id, - Values: vector.Values, + Values: vectorValues, Metadata: vector.Metadata, SparseValues: toSparseValues(vector.SparseValues), } @@ -1435,9 +1440,14 @@ func vecToGrpc(v *Vector) *db_data_grpc.Vector { if v == nil { return nil } + var vecValues []float32 + if v.Values != nil { + vecValues = *v.Values + } + return &db_data_grpc.Vector{ Id: v.Id, - Values: v.Values, + Values: vecValues, Metadata: v.Metadata, SparseValues: sparseValToGrpc(v.SparseValues), } diff --git a/pinecone/index_connection_test.go b/pinecone/index_connection_test.go index 369d0c0..d4590e3 100644 --- a/pinecone/index_connection_test.go +++ b/pinecone/index_connection_test.go @@ -363,6 +363,9 @@ func TestNewIndexConnectionNamespace(t *testing.T) { } func TestMarshalFetchVectorsResponseUnit(t *testing.T) { + vec1Values := []float32{0.01, 0.01, 0.01} + vec2Values := []float32{0.02, 0.02, 0.02} + tests := []struct { name string input FetchVectorsResponse @@ -372,8 +375,8 @@ func TestMarshalFetchVectorsResponseUnit(t *testing.T) { name: "All fields present", input: FetchVectorsResponse{ Vectors: map[string]*Vector{ - "vec-1": {Id: "vec-1", Values: []float32{0.01, 0.01, 0.01}}, - "vec-2": {Id: "vec-2", Values: []float32{0.02, 0.02, 0.02}}, + "vec-1": {Id: "vec-1", Values: &vec1Values}, + "vec-2": {Id: "vec-2", Values: &vec2Values}, }, Usage: &Usage{ReadUnits: 5}, Namespace: "test-namespace", @@ -461,6 +464,8 @@ func TestMarshalListVectorsResponseUnit(t *testing.T) { } func TestMarshalQueryVectorsResponseUnit(t *testing.T) { + vec1Values := []float32{0.01, 0.01, 0.01} + vec2Values := []float32{0.02, 0.02, 0.02} tests := []struct { name string input QueryVectorsResponse @@ -470,8 +475,8 @@ func TestMarshalQueryVectorsResponseUnit(t *testing.T) { name: "All fields present", input: QueryVectorsResponse{ Matches: []*ScoredVector{ - {Vector: &Vector{Id: "vec-1", Values: []float32{0.01, 0.01, 0.01}}, Score: 0.1}, - {Vector: &Vector{Id: "vec-2", Values: []float32{0.02, 0.02, 0.02}}, Score: 0.2}, + {Vector: &Vector{Id: "vec-1", Values: &vec1Values}, Score: 0.1}, + {Vector: &Vector{Id: "vec-2", Values: &vec2Values}, Score: 0.2}, }, Usage: &Usage{ReadUnits: 5}, Namespace: "test-namespace", @@ -554,6 +559,8 @@ func TestMarshalDescribeIndexStatsResponseUnit(t *testing.T) { } func TestToVectorUnit(t *testing.T) { + vecValues := []float32{0.01, 0.02, 0.03} + tests := []struct { name string vector *db_data_grpc.Vector @@ -572,7 +579,7 @@ func TestToVectorUnit(t *testing.T) { }, expected: &Vector{ Id: "dense-1", - Values: []float32{0.01, 0.02, 0.03}, + Values: &vecValues, }, }, { @@ -607,7 +614,7 @@ func TestToVectorUnit(t *testing.T) { expected: &Vector{ Id: "hybrid-1", - Values: []float32{0.01, 0.02, 0.03}, + Values: &vecValues, SparseValues: &SparseValues{ Indices: []uint32{0, 2}, Values: []float32{0.01, 0.03}, @@ -630,7 +637,7 @@ func TestToVectorUnit(t *testing.T) { }, expected: &Vector{ Id: "hybrid-metadata-1", - Values: []float32{0.01, 0.02, 0.03}, + Values: &vecValues, SparseValues: &SparseValues{ Indices: []uint32{0, 2}, Values: []float32{0.01, 0.03}, @@ -683,6 +690,8 @@ func TestToSparseValuesUnit(t *testing.T) { } func TestToScoredVectorUnit(t *testing.T) { + vecValues := []float32{0.01, 0.02, 0.03} + tests := []struct { name string scoredVector *db_data_grpc.ScoredVector @@ -697,13 +706,13 @@ func TestToScoredVectorUnit(t *testing.T) { name: "Pass scored dense vector", scoredVector: &db_data_grpc.ScoredVector{ Id: "dense-1", - Values: []float32{0.01, 0.01, 0.01}, + Values: []float32{0.01, 0.02, 0.03}, Score: 0.1, }, expected: &ScoredVector{ Vector: &Vector{ Id: "dense-1", - Values: []float32{0.01, 0.01, 0.01}, + Values: &vecValues, }, Score: 0.1, }, @@ -743,7 +752,7 @@ func TestToScoredVectorUnit(t *testing.T) { expected: &ScoredVector{ Vector: &Vector{ Id: "hybrid-1", - Values: []float32{0.01, 0.02, 0.03}, + Values: &vecValues, SparseValues: &SparseValues{ Indices: []uint32{0, 2}, Values: []float32{0.01, 0.03}, @@ -771,7 +780,7 @@ func TestToScoredVectorUnit(t *testing.T) { expected: &ScoredVector{ Vector: &Vector{ Id: "hybrid-metadata-1", - Values: []float32{0.01, 0.02, 0.03}, + Values: &vecValues, SparseValues: &SparseValues{ Indices: []uint32{0, 2}, Values: []float32{0.01, 0.03}, @@ -795,6 +804,8 @@ func TestToScoredVectorUnit(t *testing.T) { } func TestVecToGrpcUnit(t *testing.T) { + vecValues := []float32{0.01, 0.02, 0.03} + tests := []struct { name string vector *Vector @@ -809,7 +820,7 @@ func TestVecToGrpcUnit(t *testing.T) { name: "Pass dense vector", vector: &Vector{ Id: "dense-1", - Values: []float32{0.01, 0.02, 0.03}, + Values: &vecValues, }, expected: &db_data_grpc.Vector{ Id: "dense-1", @@ -838,7 +849,7 @@ func TestVecToGrpcUnit(t *testing.T) { name: "Pass hybrid vector", vector: &Vector{ Id: "hybrid-1", - Values: []float32{0.01, 0.02, 0.03}, + Values: &vecValues, SparseValues: &SparseValues{ Indices: []uint32{0, 2}, Values: []float32{0.01, 0.03}, @@ -857,7 +868,7 @@ func TestVecToGrpcUnit(t *testing.T) { name: "Pass hybrid vector with metadata", vector: &Vector{ Id: "hybrid-metadata-1", - Values: []float32{0.01, 0.02, 0.03}, + Values: &vecValues, SparseValues: &SparseValues{ Indices: []uint32{0, 2}, Values: []float32{0.01, 0.03}, diff --git a/pinecone/local_test.go b/pinecone/local_test.go index 4619f27..3d05f6a 100644 --- a/pinecone/local_test.go +++ b/pinecone/local_test.go @@ -185,7 +185,7 @@ func (ts *LocalIntegrationTests) TestQueryVectors() { assert.Equal(ts.T(), queryVectorId, queryVectorsByIdResponse.Matches[0].Vector.Id, "Top QueryByVectorId result's vector id should match queryVectorId") queryByVectorValuesResponse, err := idxConn.QueryByVectorValues(context.Background(), &QueryByVectorValuesRequest{ - Vector: queryVectorsByIdResponse.Matches[0].Vector.Values, + Vector: *queryVectorsByIdResponse.Matches[0].Vector.Values, TopK: uint32(topK), MetadataFilter: ts.metadata, IncludeValues: true, @@ -210,7 +210,7 @@ func (ts *LocalIntegrationTests) TestUpdateVectors() { newValues := generateVectorValues(ts.dimension) for _, idxConn := range ts.idxConns { - err := idxConn.UpdateVector(context.Background(), &UpdateVectorRequest{Id: updateVectorId, Values: newValues}) + err := idxConn.UpdateVector(context.Background(), &UpdateVectorRequest{Id: updateVectorId, Values: *newValues}) require.NoError(ts.T(), err) fetchVectorsResponse, err := idxConn.FetchVectors(context.Background(), []string{updateVectorId}) diff --git a/pinecone/models.go b/pinecone/models.go index 172d554..4a140b8 100644 --- a/pinecone/models.go +++ b/pinecone/models.go @@ -154,7 +154,7 @@ type ServerlessSpec struct { // [dense or sparse vector object]: https://docs.pinecone.io/guides/get-started/key-concepts#dense-vector type Vector struct { Id string `json:"id"` - Values []float32 `json:"values,omitempty"` + Values *[]float32 `json:"values,omitempty"` SparseValues *SparseValues `json:"sparse_values,omitempty"` Metadata *Metadata `json:"metadata,omitempty"` } diff --git a/pinecone/models_test.go b/pinecone/models_test.go index d0bb812..b633b6e 100644 --- a/pinecone/models_test.go +++ b/pinecone/models_test.go @@ -353,6 +353,7 @@ func TestMarshalVectorUnit(t *testing.T) { if err != nil { t.Fatalf("Failed to create metadata: %v", err) } + vecValues := []float32{0.1, 0.2, 0.3} tests := []struct { name string @@ -363,7 +364,7 @@ func TestMarshalVectorUnit(t *testing.T) { name: "All fields present", input: Vector{ Id: "vector-1", - Values: []float32{0.1, 0.2, 0.3}, + Values: &vecValues, Metadata: metadata, SparseValues: &SparseValues{ Indices: []uint32{1, 2, 3}, @@ -403,6 +404,7 @@ func TestMarshalScoredVectorUnit(t *testing.T) { if err != nil { t.Fatalf("Failed to create metadata: %v", err) } + vecValues := []float32{0.1, 0.2, 0.3} tests := []struct { name string @@ -414,7 +416,7 @@ func TestMarshalScoredVectorUnit(t *testing.T) { input: ScoredVector{ Vector: &Vector{ Id: "vector-1", - Values: []float32{0.1, 0.2, 0.3}, + Values: &vecValues, Metadata: metadata, SparseValues: &SparseValues{ Indices: []uint32{1, 2, 3}, diff --git a/pinecone/test_suite.go b/pinecone/test_suite.go index a0af3fd..d11aac6 100644 --- a/pinecone/test_suite.go +++ b/pinecone/test_suite.go @@ -188,7 +188,8 @@ func GenerateVectors(numOfVectors int, dimension int32, isSparse bool, metadata for j := 0; j < int(dimension); j++ { sparseValues.Indices = append(sparseValues.Indices, uint32(j)) } - sparseValues.Values = generateVectorValues(dimension) + values := generateVectorValues(dimension) + sparseValues.Values = *values vectors[i].SparseValues = &sparseValues } @@ -200,7 +201,7 @@ func GenerateVectors(numOfVectors int, dimension int32, isSparse bool, metadata return vectors } -func generateVectorValues(dimension int32) []float32 { +func generateVectorValues(dimension int32) *[]float32 { maxInt := 1000000 // A large integer to normalize the float values values := make([]float32, dimension) @@ -209,7 +210,7 @@ func generateVectorValues(dimension int32) []float32 { values[i] = float32(rand.Intn(maxInt)) / float32(maxInt) } - return values + return &values } func BuildServerlessTestIndex(in *Client, idxName string, tags IndexTags) *Index { From 1149ebbf3fcc7d2fa4ba084c016bfa3d3cccda46 Mon Sep 17 00:00:00 2001 From: Austin DeNoble Date: Sat, 1 Feb 2025 00:46:48 -0500 Subject: [PATCH 9/9] fix assertions in vector update tests --- pinecone/index_connection_test.go | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/pinecone/index_connection_test.go b/pinecone/index_connection_test.go index d4590e3..d511711 100644 --- a/pinecone/index_connection_test.go +++ b/pinecone/index_connection_test.go @@ -206,7 +206,9 @@ func (ts *IntegrationTests) TestUpdateVectorValues() { } actualVals := vector.Vectors[ts.vectorIds[0]].Values - assert.ElementsMatch(ts.T(), expectedVals, actualVals, "Values do not match") + if actualVals != nil { + assert.ElementsMatch(ts.T(), expectedVals, *actualVals, "Values do not match") + } } func (ts *IntegrationTests) TestUpdateVectorMetadata() { @@ -228,17 +230,20 @@ func (ts *IntegrationTests) TestUpdateVectorMetadata() { time.Sleep(10 * time.Second) - vector, err := ts.idxConn.FetchVectors(ctx, []string{ts.vectorIds[0]}) + vectors, err := ts.idxConn.FetchVectors(ctx, []string{ts.vectorIds[0]}) if err != nil { ts.FailNow(fmt.Sprintf("Failed to fetch vector: %v", err)) } + vector := vectors.Vectors[ts.vectorIds[0]] - assert.NotNil(ts.T(), vector.Vectors[ts.vectorIds[0]].Metadata, "Metadata is nil after update") + if vector != nil { + assert.NotNil(ts.T(), vector.Metadata, "Metadata is nil after update") - expectedGenre := expectedMetadataMap.Fields["genre"].GetStringValue() - actualGenre := vector.Vectors[ts.vectorIds[0]].Metadata.Fields["genre"].GetStringValue() + expectedGenre := expectedMetadataMap.Fields["genre"].GetStringValue() + actualGenre := vector.Metadata.Fields["genre"].GetStringValue() - assert.Equal(ts.T(), expectedGenre, actualGenre, "Metadata does not match") + assert.Equal(ts.T(), expectedGenre, actualGenre, "Metadata does not match") + } } func (ts *IntegrationTests) TestUpdateVectorSparseValues() { @@ -263,13 +268,17 @@ func (ts *IntegrationTests) TestUpdateVectorSparseValues() { time.Sleep(5 * time.Second) // Fetch updated vector and verify sparse values - vector, err := ts.idxConn.FetchVectors(ctx, []string{ts.vectorIds[0]}) + vectors, err := ts.idxConn.FetchVectors(ctx, []string{ts.vectorIds[0]}) if err != nil { ts.FailNow(fmt.Sprintf("Failed to fetch vector: %v", err)) } - actualSparseValues := vector.Vectors[ts.vectorIds[0]].SparseValues.Values + vector := vectors.Vectors[ts.vectorIds[0]] + + if vector != nil { + actualSparseValues := vector.SparseValues.Values - assert.ElementsMatch(ts.T(), expectedSparseValues.Values, actualSparseValues, "Sparse values do not match") + assert.ElementsMatch(ts.T(), expectedSparseValues.Values, actualSparseValues, "Sparse values do not match") + } } func (ts *IntegrationTests) TestImportFlowHappyPath() {