Skip to content

Commit

Permalink
add index after adding data without vector schema for vector predicate
Browse files Browse the repository at this point in the history
  • Loading branch information
shivaji-kharse committed Dec 6, 2024
1 parent 889008f commit 9b559ed
Show file tree
Hide file tree
Showing 7 changed files with 283 additions and 41 deletions.
28 changes: 14 additions & 14 deletions chunker/json_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ type Person struct {
type Product struct {
Uid string `json:"uid,omitempty"`
Name string `json:"name"`
Discription string `json:"discription"`
Discription_v string `json:"discription_v"`
Description string `json:"description"`
Description_v string `json:"description_v"`
}

func Parse(b []byte, op int) ([]*api.NQuad, error) {
Expand Down Expand Up @@ -1406,7 +1406,7 @@ func TestNquadsJsonEmptyStringVectorPred(t *testing.T) {
p := Product{
Uid: "1",
Name: "",
Discription_v: "",
Description_v: "",
}

b, err := json.Marshal([]Product{p})
Expand All @@ -1420,16 +1420,16 @@ func TestNquadsJsonEmptyStringVectorPred(t *testing.T) {
require.NoError(t, err)
require.Equal(t, 3, len(fastNQ))

// predicate Name should be empty and edge for Discription_v should not be there
// predicate Name should be empty and edge for Description_v should not be there
// we do not create edge for "" in float32vector.
exp := &Experiment{
t: t,
nqs: nq,
schema: `name: string @index(exact) .
discription_v: float32vector .`,
description_v: float32vector .`,
query: `{product(func: uid(1)) {
name
discription_v
description_v
}}`,
expected: `{"product":[{
"name":""}]}`,
Expand All @@ -1443,7 +1443,7 @@ func TestNquadsJsonEmptyStringVectorPred(t *testing.T) {
func TestNquadsJsonEmptySquareBracketVectorPred(t *testing.T) {
p := Product{
Name: "ipad",
Discription_v: "[]",
Description_v: "[]",
}

b, err := json.Marshal(p)
Expand All @@ -1457,16 +1457,16 @@ func TestNquadsJsonEmptySquareBracketVectorPred(t *testing.T) {
require.NoError(t, err)
require.Equal(t, 3, len(fastNQ))

// predicate Name should have value "ipad" and edge for Discription_v should not be there
// predicate Name should have value "ipad" and edge for Description_v should not be there
// we do not create edge for [] in float32vector.
exp := &Experiment{
t: t,
nqs: nq,
schema: `name: string @index(exact) .
discription_v: float32vector .`,
description_v: float32vector .`,
query: `{product(func: eq(name, "ipad")) {
name
discription_v
description_v
}}`,
expected: `{"product":[{
"name":"ipad"}]}`,
Expand All @@ -1480,7 +1480,7 @@ func TestNquadsJsonEmptySquareBracketVectorPred(t *testing.T) {
func TestNquadsJsonValidVector(t *testing.T) {
p := Product{
Name: "ipad",
Discription_v: "[1.1, 2.2, 3.3]",
Description_v: "[1.1, 2.2, 3.3]",
}

b, err := json.Marshal(p)
Expand All @@ -1498,14 +1498,14 @@ func TestNquadsJsonValidVector(t *testing.T) {
t: t,
nqs: nq,
schema: `name: string @index(exact) .
discription_v: float32vector .`,
description_v: float32vector .`,
query: `{product(func: eq(name, "ipad")) {
name
discription_v
description_v
}}`,
expected: `{"product":[{
"name":"ipad",
"discription_v":[1.1, 2.2, 3.3]}]}`,
"description_v":[1.1, 2.2, 3.3]}]}`,
}
exp.verify()

Expand Down
2 changes: 1 addition & 1 deletion dgraphapi/vector.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ func UnmarshalVectorResp(resp *api.Response) ([][]float32, error) {
type Data struct {
Vector []struct {
UID string `json:"uid"`
ProjectDescriptionV []float32 `json:"project_discription_v"`
ProjectDescriptionV []float32 `json:"project_description_v"`
} `json:"vector"`
}
var data Data
Expand Down
36 changes: 33 additions & 3 deletions posting/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,7 @@ func (txn *Txn) addIndexMutations(ctx context.Context, info *indexMutationInfo)
// Similarly, the current assumption is that we have at most one
// Vector Index, but this assumption may break later.
if info.op != pb.DirectedEdge_DEL &&
len(data) > 0 && data[0].Tid == types.VFloatID &&
len(info.factorySpecs) > 0 {
len(data) > 0 && len(info.factorySpecs) > 0 {
// retrieve vector from inUuid save as inVec
inVec := types.BytesAsFloatArray(data[0].Value.([]byte))
tc := hnsw.NewTxnCache(NewViTxn(txn), txn.StartTs)
Expand Down Expand Up @@ -1387,6 +1386,37 @@ func rebuildTokIndex(ctx context.Context, rb *IndexRebuild) error {
builder.fn = func(uid uint64, pl *List, txn *Txn) ([]*pb.DirectedEdge, error) {
edge := pb.DirectedEdge{Attr: rb.Attr, Entity: uid}
edges := []*pb.DirectedEdge{}

if runForVectors {
val, err := pl.Value(txn.StartTs)
if err != nil {
return []*pb.DirectedEdge{}, err
}

if val.Tid != types.VFloatID {
sv, err := types.Convert(val, types.VFloatID)
if err != nil {
return []*pb.DirectedEdge{}, err
}
b := types.ValueForType(types.BinaryID)
if err = types.Marshal(sv, &b); err != nil {
return []*pb.DirectedEdge{}, err
}
edge.Value = b.Value.([]byte)
edge.ValueType = types.VFloatID.Enum()

inKey := x.DataKey(edge.Attr, uid)
p, err := txn.Get(inKey)
if err != nil {
return []*pb.DirectedEdge{}, err
}

if err := p.addMutation(ctx, txn, &edge); err != nil {
return []*pb.DirectedEdge{}, err
}
}
}

err := pl.Iterate(txn.StartTs, 0, func(p *pb.Posting) error {
// Add index entries based on p.
val := types.Val{
Expand Down Expand Up @@ -1419,7 +1449,7 @@ func rebuildTokIndex(ctx context.Context, rb *IndexRebuild) error {
}
return edges, err
}
if len(factorySpecs) != 0 {
if runForVectors {
return builder.RunWithoutTemp(ctx)
}
return builder.Run(ctx)
Expand Down
56 changes: 56 additions & 0 deletions query/vector/vector_graphql_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,15 @@
package query

import (
"context"
"encoding/json"
"fmt"
"math/rand"
"testing"

"github.com/dgraph-io/dgo/v240/protos/api"
"github.com/dgraph-io/dgraph/v24/dgraphapi"
"github.com/dgraph-io/dgraph/v24/x"
"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -256,3 +259,56 @@ func testVectorGraphQlMutationAndQuery(t *testing.T, hc *dgraphapi.HTTPClient) {
}
}
}

func TestVectorIndexDropPredicate(t *testing.T) {
gc, cleanup, err := dc.Client()
require.NoError(t, err)
defer cleanup()
require.NoError(t, gc.LoginIntoNamespace(context.Background(),
dgraphapi.DefaultUser, dgraphapi.DefaultPassword, x.GalaxyNamespace))

hc, err := dc.HTTPClient()
require.NoError(t, err)
require.NoError(t, hc.LoginIntoNamespace(dgraphapi.DefaultUser,
dgraphapi.DefaultPassword, x.GalaxyNamespace))

require.NoError(t, hc.UpdateGQLSchema(fmt.Sprintf(graphQLVectorSchema, "euclidean")))

var vectors [][]float32
numProjects := 100
projects := generateProjects(numProjects)
for _, project := range projects {
vectors = append(vectors, project.TitleV)
addProject(t, hc, project)
}

schemaWithoutIndex := `type Project {
id: ID!
title: String! @search(by: [exact])
title_v: [Float!] @embedding
} `

require.NoError(t, hc.UpdateGQLSchema(schemaWithoutIndex))

op := &api.Operation{
DropAttr: "title_v",
}
require.NoError(t, gc.Alter(context.Background(), op))

numProjects = 100
projects = generateProjects(numProjects)
for _, project := range projects {
vectors = append(vectors, project.TitleV)
addProject(t, hc, project)
}

require.NoError(t, hc.UpdateGQLSchema(fmt.Sprintf(graphQLVectorSchema, "euclidean")))

// similar to query
for _, project := range projects {
similarProjects := queryProjectsSimilarByEmbedding(t, hc, project.TitleV, numProjects)
for _, similarVec := range similarProjects {
require.Contains(t, vectors, similarVec.TitleV)
}
}
}
12 changes: 6 additions & 6 deletions systest/vector/backup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ func TestVectorIncrBackupRestore(t *testing.T) {
require.NoError(t, gc.SetupSchema(testSchema))

numVectors := 500
pred := "project_discription_v"
pred := "project_description_v"
allVectors := make([][][]float32, 0, 5)
allRdfs := make([]string, 0, 5)
for i := 1; i <= 5; i++ {
Expand All @@ -78,7 +78,7 @@ func TestVectorIncrBackupRestore(t *testing.T) {
require.NoError(t, hc.Restore(c, dgraphtest.DefaultBackupDir, "", incrFrom, i))
require.NoError(t, dgraphapi.WaitForRestore(c))
query := `{
vector(func: has(project_discription_v)) {
vector(func: has(project_description_v)) {
count(uid)
}
}`
Expand Down Expand Up @@ -131,7 +131,7 @@ func TestVectorBackupRestore(t *testing.T) {
require.NoError(t, gc.SetupSchema(testSchema))

numVectors := 1000
pred := "project_discription_v"
pred := "project_description_v"
rdfs, vectors := dgraphapi.GenerateRandomVectors(0, numVectors, 10, pred)

mu := &api.Mutation{SetNquads: []byte(rdfs), CommitNow: true}
Expand Down Expand Up @@ -171,7 +171,7 @@ func TestVectorBackupRestoreDropIndex(t *testing.T) {
require.NoError(t, gc.SetupSchema(testSchema))
// add data to the vector predicate
numVectors := 3
pred := "project_discription_v"
pred := "project_description_v"
rdfs, vectors := dgraphapi.GenerateRandomVectors(0, numVectors, 1, pred)
mu := &api.Mutation{SetNquads: []byte(rdfs), CommitNow: true}
_, err = gc.Mutate(mu)
Expand Down Expand Up @@ -216,7 +216,7 @@ func TestVectorBackupRestoreDropIndex(t *testing.T) {
require.NoError(t, dgraphapi.WaitForRestore(c))

query := ` {
vectors(func: has(project_discription_v)) {
vectors(func: has(project_description_v)) {
count(uid)
}
}`
Expand Down Expand Up @@ -258,7 +258,7 @@ func TestVectorBackupRestoreReIndexing(t *testing.T) {
require.NoError(t, gc.SetupSchema(testSchema))

numVectors := 1000
pred := "project_discription_v"
pred := "project_description_v"
rdfs, vectors := dgraphapi.GenerateRandomVectors(0, numVectors, 10, pred)

mu := &api.Mutation{SetNquads: []byte(rdfs), CommitNow: true}
Expand Down
4 changes: 2 additions & 2 deletions systest/vector/load_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ func testExportAndLiveLoad(t *testing.T, c *dgraphtest.LocalCluster, exportForma
require.NoError(t, gc.SetupSchema(testSchema))

numVectors := 100
pred := "project_discription_v"
pred := "project_description_v"
rdfs, vectors := dgraphapi.GenerateRandomVectors(0, numVectors, 10, pred)

mu := &api.Mutation{SetNquads: []byte(rdfs), CommitNow: true}
Expand All @@ -76,7 +76,7 @@ func testExportAndLiveLoad(t *testing.T, c *dgraphtest.LocalCluster, exportForma
require.NoError(t, gc.DropAll())

query := `{
vector(func: has(project_discription_v)) {
vector(func: has(project_description_v)) {
count(uid)
}
}`
Expand Down
Loading

0 comments on commit 9b559ed

Please sign in to comment.