diff --git a/chunker/json_parser_test.go b/chunker/json_parser_test.go index b928087561c..6752b8c150a 100644 --- a/chunker/json_parser_test.go +++ b/chunker/json_parser_test.go @@ -77,8 +77,8 @@ type Person struct { type Product struct { Uid string `json:"uid,omitempty"` Name string `json:"name"` - Discription string `json:"discription"` - Discription_v string `json:"discription_v"` + Description string `json:"description"` + Description_v string `json:"description_v"` } func Parse(b []byte, op int) ([]*api.NQuad, error) { @@ -1406,7 +1406,7 @@ func TestNquadsJsonEmptyStringVectorPred(t *testing.T) { p := Product{ Uid: "1", Name: "", - Discription_v: "", + Description_v: "", } b, err := json.Marshal([]Product{p}) @@ -1420,16 +1420,16 @@ func TestNquadsJsonEmptyStringVectorPred(t *testing.T) { require.NoError(t, err) require.Equal(t, 3, len(fastNQ)) - // predicate Name should be empty and edge for Discription_v should not be there + // predicate Name should be empty and edge for Description_v should not be there // we do not create edge for "" in float32vector. exp := &Experiment{ t: t, nqs: nq, schema: `name: string @index(exact) . - discription_v: float32vector .`, + description_v: float32vector .`, query: `{product(func: uid(1)) { name - discription_v + description_v }}`, expected: `{"product":[{ "name":""}]}`, @@ -1443,7 +1443,7 @@ func TestNquadsJsonEmptyStringVectorPred(t *testing.T) { func TestNquadsJsonEmptySquareBracketVectorPred(t *testing.T) { p := Product{ Name: "ipad", - Discription_v: "[]", + Description_v: "[]", } b, err := json.Marshal(p) @@ -1457,16 +1457,16 @@ func TestNquadsJsonEmptySquareBracketVectorPred(t *testing.T) { require.NoError(t, err) require.Equal(t, 3, len(fastNQ)) - // predicate Name should have value "ipad" and edge for Discription_v should not be there + // predicate Name should have value "ipad" and edge for Description_v should not be there // we do not create edge for [] in float32vector. exp := &Experiment{ t: t, nqs: nq, schema: `name: string @index(exact) . - discription_v: float32vector .`, + description_v: float32vector .`, query: `{product(func: eq(name, "ipad")) { name - discription_v + description_v }}`, expected: `{"product":[{ "name":"ipad"}]}`, @@ -1480,7 +1480,7 @@ func TestNquadsJsonEmptySquareBracketVectorPred(t *testing.T) { func TestNquadsJsonValidVector(t *testing.T) { p := Product{ Name: "ipad", - Discription_v: "[1.1, 2.2, 3.3]", + Description_v: "[1.1, 2.2, 3.3]", } b, err := json.Marshal(p) @@ -1498,14 +1498,14 @@ func TestNquadsJsonValidVector(t *testing.T) { t: t, nqs: nq, schema: `name: string @index(exact) . - discription_v: float32vector .`, + description_v: float32vector .`, query: `{product(func: eq(name, "ipad")) { name - discription_v + description_v }}`, expected: `{"product":[{ "name":"ipad", - "discription_v":[1.1, 2.2, 3.3]}]}`, + "description_v":[1.1, 2.2, 3.3]}]}`, } exp.verify() diff --git a/dgraphapi/vector.go b/dgraphapi/vector.go index b2261b96db2..38762f130f7 100644 --- a/dgraphapi/vector.go +++ b/dgraphapi/vector.go @@ -90,7 +90,7 @@ func UnmarshalVectorResp(resp *api.Response) ([][]float32, error) { type Data struct { Vector []struct { UID string `json:"uid"` - ProjectDescriptionV []float32 `json:"project_discription_v"` + ProjectDescriptionV []float32 `json:"project_description_v"` } `json:"vector"` } var data Data diff --git a/posting/index.go b/posting/index.go index 22c5dc7b38d..dfe6e7c7f3c 100644 --- a/posting/index.go +++ b/posting/index.go @@ -168,8 +168,7 @@ func (txn *Txn) addIndexMutations(ctx context.Context, info *indexMutationInfo) // Similarly, the current assumption is that we have at most one // Vector Index, but this assumption may break later. if info.op != pb.DirectedEdge_DEL && - len(data) > 0 && data[0].Tid == types.VFloatID && - len(info.factorySpecs) > 0 { + len(data) > 0 && len(info.factorySpecs) > 0 { // retrieve vector from inUuid save as inVec inVec := types.BytesAsFloatArray(data[0].Value.([]byte)) tc := hnsw.NewTxnCache(NewViTxn(txn), txn.StartTs) @@ -1387,6 +1386,37 @@ func rebuildTokIndex(ctx context.Context, rb *IndexRebuild) error { builder.fn = func(uid uint64, pl *List, txn *Txn) ([]*pb.DirectedEdge, error) { edge := pb.DirectedEdge{Attr: rb.Attr, Entity: uid} edges := []*pb.DirectedEdge{} + + if runForVectors { + val, err := pl.Value(txn.StartTs) + if err != nil { + return []*pb.DirectedEdge{}, err + } + + if val.Tid != types.VFloatID { + sv, err := types.Convert(val, types.VFloatID) + if err != nil { + return []*pb.DirectedEdge{}, err + } + b := types.ValueForType(types.BinaryID) + if err = types.Marshal(sv, &b); err != nil { + return []*pb.DirectedEdge{}, err + } + edge.Value = b.Value.([]byte) + edge.ValueType = types.VFloatID.Enum() + + inKey := x.DataKey(edge.Attr, uid) + p, err := txn.Get(inKey) + if err != nil { + return []*pb.DirectedEdge{}, err + } + + if err := p.addMutation(ctx, txn, &edge); err != nil { + return []*pb.DirectedEdge{}, err + } + } + } + err := pl.Iterate(txn.StartTs, 0, func(p *pb.Posting) error { // Add index entries based on p. val := types.Val{ @@ -1419,7 +1449,7 @@ func rebuildTokIndex(ctx context.Context, rb *IndexRebuild) error { } return edges, err } - if len(factorySpecs) != 0 { + if runForVectors { return builder.RunWithoutTemp(ctx) } return builder.Run(ctx) diff --git a/query/vector/vector_graphql_test.go b/query/vector/vector_graphql_test.go index 4f496ea1db1..73241ff0b70 100644 --- a/query/vector/vector_graphql_test.go +++ b/query/vector/vector_graphql_test.go @@ -19,12 +19,15 @@ package query import ( + "context" "encoding/json" "fmt" "math/rand" "testing" + "github.com/dgraph-io/dgo/v240/protos/api" "github.com/dgraph-io/dgraph/v24/dgraphapi" + "github.com/dgraph-io/dgraph/v24/x" "github.com/stretchr/testify/require" ) @@ -256,3 +259,56 @@ func testVectorGraphQlMutationAndQuery(t *testing.T, hc *dgraphapi.HTTPClient) { } } } + +func TestVectorIndexDropPredicate(t *testing.T) { + gc, cleanup, err := dc.Client() + require.NoError(t, err) + defer cleanup() + require.NoError(t, gc.LoginIntoNamespace(context.Background(), + dgraphapi.DefaultUser, dgraphapi.DefaultPassword, x.GalaxyNamespace)) + + hc, err := dc.HTTPClient() + require.NoError(t, err) + require.NoError(t, hc.LoginIntoNamespace(dgraphapi.DefaultUser, + dgraphapi.DefaultPassword, x.GalaxyNamespace)) + + require.NoError(t, hc.UpdateGQLSchema(fmt.Sprintf(graphQLVectorSchema, "euclidean"))) + + var vectors [][]float32 + numProjects := 100 + projects := generateProjects(numProjects) + for _, project := range projects { + vectors = append(vectors, project.TitleV) + addProject(t, hc, project) + } + + schemaWithoutIndex := `type Project { + id: ID! + title: String! @search(by: [exact]) + title_v: [Float!] @embedding + } ` + + require.NoError(t, hc.UpdateGQLSchema(schemaWithoutIndex)) + + op := &api.Operation{ + DropAttr: "title_v", + } + require.NoError(t, gc.Alter(context.Background(), op)) + + numProjects = 100 + projects = generateProjects(numProjects) + for _, project := range projects { + vectors = append(vectors, project.TitleV) + addProject(t, hc, project) + } + + require.NoError(t, hc.UpdateGQLSchema(fmt.Sprintf(graphQLVectorSchema, "euclidean"))) + + // similar to query + for _, project := range projects { + similarProjects := queryProjectsSimilarByEmbedding(t, hc, project.TitleV, numProjects) + for _, similarVec := range similarProjects { + require.Contains(t, vectors, similarVec.TitleV) + } + } +} diff --git a/systest/vector/backup_test.go b/systest/vector/backup_test.go index ff401a5f52e..2ba3caccc1a 100644 --- a/systest/vector/backup_test.go +++ b/systest/vector/backup_test.go @@ -54,7 +54,7 @@ func TestVectorIncrBackupRestore(t *testing.T) { require.NoError(t, gc.SetupSchema(testSchema)) numVectors := 500 - pred := "project_discription_v" + pred := "project_description_v" allVectors := make([][][]float32, 0, 5) allRdfs := make([]string, 0, 5) for i := 1; i <= 5; i++ { @@ -78,7 +78,7 @@ func TestVectorIncrBackupRestore(t *testing.T) { require.NoError(t, hc.Restore(c, dgraphtest.DefaultBackupDir, "", incrFrom, i)) require.NoError(t, dgraphapi.WaitForRestore(c)) query := `{ - vector(func: has(project_discription_v)) { + vector(func: has(project_description_v)) { count(uid) } }` @@ -131,7 +131,7 @@ func TestVectorBackupRestore(t *testing.T) { require.NoError(t, gc.SetupSchema(testSchema)) numVectors := 1000 - pred := "project_discription_v" + pred := "project_description_v" rdfs, vectors := dgraphapi.GenerateRandomVectors(0, numVectors, 10, pred) mu := &api.Mutation{SetNquads: []byte(rdfs), CommitNow: true} @@ -171,7 +171,7 @@ func TestVectorBackupRestoreDropIndex(t *testing.T) { require.NoError(t, gc.SetupSchema(testSchema)) // add data to the vector predicate numVectors := 3 - pred := "project_discription_v" + pred := "project_description_v" rdfs, vectors := dgraphapi.GenerateRandomVectors(0, numVectors, 1, pred) mu := &api.Mutation{SetNquads: []byte(rdfs), CommitNow: true} _, err = gc.Mutate(mu) @@ -216,7 +216,7 @@ func TestVectorBackupRestoreDropIndex(t *testing.T) { require.NoError(t, dgraphapi.WaitForRestore(c)) query := ` { - vectors(func: has(project_discription_v)) { + vectors(func: has(project_description_v)) { count(uid) } }` @@ -258,7 +258,7 @@ func TestVectorBackupRestoreReIndexing(t *testing.T) { require.NoError(t, gc.SetupSchema(testSchema)) numVectors := 1000 - pred := "project_discription_v" + pred := "project_description_v" rdfs, vectors := dgraphapi.GenerateRandomVectors(0, numVectors, 10, pred) mu := &api.Mutation{SetNquads: []byte(rdfs), CommitNow: true} diff --git a/systest/vector/load_test.go b/systest/vector/load_test.go index 9f8eed39123..b6e31673eeb 100644 --- a/systest/vector/load_test.go +++ b/systest/vector/load_test.go @@ -62,7 +62,7 @@ func testExportAndLiveLoad(t *testing.T, c *dgraphtest.LocalCluster, exportForma require.NoError(t, gc.SetupSchema(testSchema)) numVectors := 100 - pred := "project_discription_v" + pred := "project_description_v" rdfs, vectors := dgraphapi.GenerateRandomVectors(0, numVectors, 10, pred) mu := &api.Mutation{SetNquads: []byte(rdfs), CommitNow: true} @@ -76,7 +76,7 @@ func testExportAndLiveLoad(t *testing.T, c *dgraphtest.LocalCluster, exportForma require.NoError(t, gc.DropAll()) query := `{ - vector(func: has(project_discription_v)) { + vector(func: has(project_description_v)) { count(uid) } }` diff --git a/systest/vector/vector_test.go b/systest/vector/vector_test.go index 87e7b661a87..345f1798f64 100644 --- a/systest/vector/vector_test.go +++ b/systest/vector/vector_test.go @@ -33,10 +33,9 @@ import ( ) const ( - testSchema = ` - project_discription_v: float32vector @index(hnsw(exponent: "5", metric: "euclidean")) .` - - testSchemaWithoutIndex = `project_discription_v: float32vector .` + testSchema = `project_description_v: float32vector @index(hnsw(exponent: "5", metric: "euclidean")) .` + testSchemaWithoutIndex = `project_description_v: float32vector .` + pred = "project_description_v" ) func testVectorQuery(t *testing.T, gc *dgraphapi.GrpcClient, vectors [][]float32, rdfs, pred string, topk int) { @@ -74,9 +73,8 @@ func TestVectorDropAll(t *testing.T) { dgraphapi.DefaultPassword, x.GalaxyNamespace)) numVectors := 100 - pred := "project_discription_v" - testVectorSimilarTo := func(vectors [][]float32, dropAll bool) { + testVectorSimilarTo := func(vectors [][]float32) { for _, vector := range vectors { _, err := gc.QueryMultipleVectorsUsingSimilarTo(vector, pred, 100) require.ErrorContains(t, err, "is not indexed") @@ -92,7 +90,7 @@ func TestVectorDropAll(t *testing.T) { require.NoError(t, err) query := `{ - vector(func: has(project_discription_v)) { + vector(func: has(project_description_v)) { count(uid) } }` @@ -108,7 +106,7 @@ func TestVectorDropAll(t *testing.T) { result, err = gc.Query(query) require.NoError(t, err) require.JSONEq(t, fmt.Sprintf(`{"vector":[{"count":%v}]}`, 0), string(result.GetJson())) - testVectorSimilarTo(vectors, true) + testVectorSimilarTo(vectors) } } @@ -149,14 +147,13 @@ func TestVectorSnapshot(t *testing.T) { require.NoError(t, err) numVectors := 500 - pred := "project_discription_v" rdfs, vectors := dgraphapi.GenerateRandomVectors(0, numVectors, 100, pred) mu := &api.Mutation{SetNquads: []byte(rdfs), CommitNow: true} _, err = gc.Mutate(mu) require.NoError(t, err) query := `{ - vector(func: has(project_discription_v)) { + vector(func: has(project_description_v)) { count(uid) } }` @@ -205,7 +202,6 @@ func TestVectorDropNamespace(t *testing.T) { dgraphapi.DefaultPassword, x.GalaxyNamespace)) numVectors := 500 - pred := "project_discription_v" for i := 0; i < 6; i++ { ns, err := hc.AddNamespace() require.NoError(t, err) @@ -216,7 +212,7 @@ func TestVectorDropNamespace(t *testing.T) { require.NoError(t, err) query := `{ - vector(func: has(project_discription_v)) { + vector(func: has(project_description_v)) { count(uid) } }` @@ -257,14 +253,13 @@ func TestVectorIndexRebuilding(t *testing.T) { require.NoError(t, gc.SetupSchema(testSchema)) - pred := "project_discription_v" numVectors := 1000 rdfs, vectors := dgraphapi.GenerateRandomVectors(0, numVectors, 100, pred) mu := &api.Mutation{SetNquads: []byte(rdfs), CommitNow: true} _, err = gc.Mutate(mu) require.NoError(t, err) query := `{ - vector(func: has(project_discription_v)) { + vector(func: has(project_description_v)) { count(uid) } }` @@ -309,9 +304,170 @@ func TestVectorIndexOnVectorPredWithoutData(t *testing.T) { dgraphapi.DefaultPassword, x.GalaxyNamespace)) require.NoError(t, gc.SetupSchema(testSchema)) - pred := "project_discription_v" vector := []float32{1.0, 2.0, 3.0} _, err = gc.QueryMultipleVectorsUsingSimilarTo(vector, pred, 10) require.NoError(t, err) } + +func TestVectorIndexDropPredicate(t *testing.T) { + conf := dgraphtest.NewClusterConfig().WithNumAlphas(1).WithNumZeros(1).WithReplicas(1).WithACL(time.Hour) + c, err := dgraphtest.NewLocalCluster(conf) + + require.NoError(t, err) + defer func() { c.Cleanup(t.Failed()) }() + require.NoError(t, c.Start()) + + gc, cleanup, err := c.Client() + defer cleanup() + require.NoError(t, err) + + require.NoError(t, gc.LoginIntoNamespace(context.Background(), + dgraphapi.DefaultUser, dgraphapi.DefaultPassword, x.GalaxyNamespace)) + + hc, err := c.HTTPClient() + require.NoError(t, err) + require.NoError(t, hc.LoginIntoNamespace(dgraphapi.DefaultUser, + dgraphapi.DefaultPassword, x.GalaxyNamespace)) + + require.NoError(t, gc.SetupSchema(testSchema)) + numVectors := 1000 + + // add vectors + rdfs, vectors := dgraphapi.GenerateRandomVectors(0, numVectors, 100, pred) + mu := &api.Mutation{SetNquads: []byte(rdfs), CommitNow: true} + _, err = gc.Mutate(mu) + require.NoError(t, err) + + require.NoError(t, gc.SetupSchema(testSchema)) + + for _, vect := range vectors { + similarVects, err := gc.QueryMultipleVectorsUsingSimilarTo(vect, pred, 2) + require.NoError(t, err) + require.Equal(t, 2, len(similarVects)) + } + + query := `{ + vector(func: has(project_description_v)) { + count(uid) + } + }` + + result, err := gc.Query(query) + require.NoError(t, err) + require.JSONEq(t, fmt.Sprintf(`{"vector":[{"count":%v}]}`, numVectors), string(result.GetJson())) + + // remove index from vector predicate + require.NoError(t, gc.SetupSchema(testSchemaWithoutIndex)) + + // drop predicate + op := &api.Operation{ + DropAttr: pred, + } + require.NoError(t, gc.Alter(context.Background(), op)) + + // generate random vectors + rdfs, vectors = dgraphapi.GenerateRandomVectors(0, numVectors, 100, pred) + mu = &api.Mutation{SetNquads: []byte(rdfs), CommitNow: true} + _, err = gc.Mutate(mu) + require.NoError(t, err) + + // add index back + require.NoError(t, gc.SetupSchema(testSchema)) + + result, err = gc.Query(query) + require.NoError(t, err) + require.JSONEq(t, fmt.Sprintf(`{"vector":[{"count":%v}]}`, numVectors), string(result.GetJson())) + + for _, vect := range vectors { + similarVects, err := gc.QueryMultipleVectorsUsingSimilarTo(vect, pred, 2) + require.NoError(t, err) + require.Equal(t, 2, len(similarVects)) + } +} + +func TestVectorIndexWithoutSchema(t *testing.T) { + conf := dgraphtest.NewClusterConfig().WithNumAlphas(1).WithNumZeros(1).WithReplicas(1).WithACL(time.Hour) + c, err := dgraphtest.NewLocalCluster(conf) + + require.NoError(t, err) + defer func() { c.Cleanup(t.Failed()) }() + require.NoError(t, c.Start()) + + gc, cleanup, err := c.Client() + defer cleanup() + require.NoError(t, err) + + require.NoError(t, gc.LoginIntoNamespace(context.Background(), + dgraphapi.DefaultUser, dgraphapi.DefaultPassword, x.GalaxyNamespace)) + + numVectors := 1000 + + // add vectors + rdfs, vectors := dgraphapi.GenerateRandomVectors(0, numVectors, 100, pred) + mu := &api.Mutation{SetNquads: []byte(rdfs), CommitNow: true} + _, err = gc.Mutate(mu) + require.NoError(t, err) + + require.NoError(t, gc.SetupSchema(testSchema)) + + for _, vect := range vectors { + similarVects, err := gc.QueryMultipleVectorsUsingSimilarTo(vect, pred, 2) + require.NoError(t, err) + require.Equal(t, 2, len(similarVects)) + } + + query := `{ + vector(func: has(project_description_v)) { + count(uid) + } + }` + + result, err := gc.Query(query) + require.NoError(t, err) + require.JSONEq(t, fmt.Sprintf(`{"vector":[{"count":%v}]}`, numVectors), string(result.GetJson())) +} + +func TestVectorIndexWithoutSchemaWithoutIndex(t *testing.T) { + conf := dgraphtest.NewClusterConfig().WithNumAlphas(1).WithNumZeros(1).WithReplicas(1).WithACL(time.Hour) + c, err := dgraphtest.NewLocalCluster(conf) + + require.NoError(t, err) + defer func() { c.Cleanup(t.Failed()) }() + require.NoError(t, c.Start()) + + gc, cleanup, err := c.Client() + defer cleanup() + require.NoError(t, err) + + require.NoError(t, gc.LoginIntoNamespace(context.Background(), + dgraphapi.DefaultUser, dgraphapi.DefaultPassword, x.GalaxyNamespace)) + + numVectors := 1000 + + // add vectors + rdfs, vectors := dgraphapi.GenerateRandomVectors(0, numVectors, 100, pred) + mu := &api.Mutation{SetNquads: []byte(rdfs), CommitNow: true} + _, err = gc.Mutate(mu) + require.NoError(t, err) + + require.NoError(t, gc.SetupSchema(testSchemaWithoutIndex)) + + for i, vect := range vectors { + triple := strings.Split(rdfs, "\n")[i] + uid := strings.Split(triple, " ")[0] + queriedVector, err := gc.QuerySingleVectorsUsingUid(uid, pred) + require.NoError(t, err) + require.Equal(t, vect, queriedVector[0]) + } + + query := `{ + vector(func: has(project_description_v)) { + count(uid) + } + }` + + result, err := gc.Query(query) + require.NoError(t, err) + require.JSONEq(t, fmt.Sprintf(`{"vector":[{"count":%v}]}`, numVectors), string(result.GetJson())) +}