diff --git a/docs/commands.adoc b/docs/commands.adoc index 83a25dd8..d63b9b57 100644 --- a/docs/commands.adoc +++ b/docs/commands.adoc @@ -635,7 +635,7 @@ Subcommands: vector search - Performs a vector search query Flags: - -h, --help - Display the help message for this comma + -h, --help - Display the help message for this command ``` The vector commands make it easy to explore the value that vector search can add to your data. @@ -669,52 +669,52 @@ Requires a vector to be used as the source of the search which can be supplied i For example imagine we have the following document stored in our cluster: [options="nowrap"] -``` -> doc get 10019 -╭───┬───────┬───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─────╮ -│ # │ id │ content │ ... │ -├───┼───────┼───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┼─────┤ -│ 0 │ 10019 │ ╭───────────────┬───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ │ ... │ -│ │ │ │ title │ Gillingham (Kent) │ │ │ -│ │ │ │ name │ Royal Engineers Museum │ │ │ -│ │ │ │ alt │ │ │ │ -│ │ │ │ address │ Prince Arthur Road, ME4 4UG │ │ │ -│ │ │ │ directions │ │ │ │ -│ │ │ │ phone │ +44 1634 822839 │ │ │ -│ │ │ │ tollfree │ │ │ │ -│ │ │ │ email │ │ │ │ -│ │ │ │ url │ http://www.remuseum.org.uk │ │ │ -│ │ │ │ hours │ Tues - Fri 9.00am to 5.00pm, Sat - Sun 11.30am - 5.00pm │ │ │ -│ │ │ │ image │ │ │ │ -│ │ │ │ price │ │ │ │ -│ │ │ │ content │ Adult - £6.99 for an Adult ticket that allows you to come back for further visits within a year (children's and concessionary tickets also available). Museum on military engineering and the history of │ │ │ -│ │ │ │ │ the British Empire. A quite extensive collection that takes about half a day to see. Of most interest to fans of British and military history or civil engineering. The outside collection of tank │ │ │ -│ │ │ │ │ mounted bridges etc can be seen for free. There is also an extensive series of themed special event weekends, admission to which is included in the cost of the annual ticket. │ │ │ -│ │ │ │ │ ╭──────────┬────────────────────╮ │ │ │ -│ │ │ │ geo │ │ lat │ 51.39 │ │ │ │ -│ │ │ │ │ │ lon │ 0.54 │ │ │ │ -│ │ │ │ │ │ accuracy │ RANGE_INTERPOLATED │ │ │ │ -│ │ │ │ │ ╰──────────┴────────────────────╯ │ │ │ -│ │ │ │ activity │ see │ │ │ -│ │ │ │ type │ landmark │ │ │ -│ │ │ │ id │ 10019 │ │ │ -│ │ │ │ country │ United Kingdom │ │ │ -│ │ │ │ city │ Gillingham │ │ │ -│ │ │ │ state │ │ │ │ -│ │ │ │ │ ╭──────┬───────╮ │ │ │ -│ │ │ │ contentVector │ │ 0 │ 0.03 │ │ │ │ -│ │ │ │ │ │ ... │ ... │ │ │ │ -│ │ │ │ │ │ 1023 │ -0.00 │ │ │ │ -│ │ │ │ │ ╰──────┴───────╯ │ │ │ -│ │ │ ╰───────────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ │ │ -╰───┴───────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴─────╯ +> doc get landmark_10019 +╭───┬────────────────┬───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─────╮ +│ # │ id │ content │ ... │ +├───┼────────────────┼───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┼─────┤ +│ 0 │ landmark_10019 │ ╭───────────────┬───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ │ ... │ +│ │ │ │ title │ Gillingham (Kent) │ │ │ +│ │ │ │ name │ Royal Engineers Museum │ │ │ +│ │ │ │ alt │ │ │ │ +│ │ │ │ address │ Prince Arthur Road, ME4 4UG │ │ │ +│ │ │ │ directions │ │ │ │ +│ │ │ │ phone │ +44 1634 822839 │ │ │ +│ │ │ │ tollfree │ │ │ │ +│ │ │ │ email │ │ │ │ +│ │ │ │ url │ http://www.remuseum.org.uk │ │ │ +│ │ │ │ hours │ Tues - Fri 9.00am to 5.00pm, Sat - Sun 11.30am - 5.00pm │ │ │ +│ │ │ │ image │ │ │ │ +│ │ │ │ price │ │ │ │ +│ │ │ │ content │ Adult - £6.99 for an Adult ticket that allows you to come back for further visits within a year (children's and concessionary tickets also available). Museum on military engineering and the │ │ │ +│ │ │ │ │ history of the British Empire. A quite extensive collection that takes about half a day to see. Of most interest to fans of British and military history or civil engineering. The outside │ │ │ +│ │ │ │ │ collection of tank mounted bridges etc can be seen for free. There is also an extensive series of themed special event weekends, admission to which is included in the cost of the annual │ │ │ +│ │ │ │ │ ticket. │ │ │ +│ │ │ │ │ ╭──────────┬────────────────────╮ │ │ │ +│ │ │ │ geo │ │ lat │ 51.39 │ │ │ │ +│ │ │ │ │ │ lon │ 0.54 │ │ │ │ +│ │ │ │ │ │ accuracy │ RANGE_INTERPOLATED │ │ │ │ +│ │ │ │ │ ╰──────────┴────────────────────╯ │ │ │ +│ │ │ │ activity │ see │ │ │ +│ │ │ │ type │ landmark │ │ │ +│ │ │ │ id │ 10019 │ │ │ +│ │ │ │ country │ United Kingdom │ │ │ +│ │ │ │ city │ Gillingham │ │ │ +│ │ │ │ state │ │ │ │ +│ │ │ │ │ ╭──────┬───────╮ │ │ │ +│ │ │ │ contentVector │ │ 0 │ 0.02 │ │ │ │ +│ │ │ │ │ │ ... │ ... │ │ │ │ +│ │ │ │ │ │ 1023 │ -0.00 │ │ │ │ +│ │ │ │ │ ╰──────┴───────╯ │ │ │ +│ │ │ ╰───────────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ │ │ +╰───┴────────────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴─────╯ ``` The field `contentVector` contains a vector of dimension 1024 that could be used as the input to a vector search. Due to the format of the data returned by <<_reading, doc get>> we need to https://www.nushell.sh/commands/docs/flatten.html[flatten] to remove the nesting then https://www.nushell.sh/commands/docs/select.html[select] the `contentVector` field: ``` -> doc get 10019 | flatten | select contentVector +> doc get landmark_10019 | flatten | select contentVector ╭───┬──────────────────╮ │ # │ contentVector │ ├───┼──────────────────┤ @@ -729,14 +729,14 @@ Due to the format of the data returned by <<_reading, doc get>> we need to https This can then be piped directly into `vector search`: ``` -> doc get 10019 | flatten | select contentVector | vector search landmark-content-index contentVector -╭───┬───────┬─────────────────────────────────────────┬─────────╮ -│ # │ id │ score │ cluster │ -├───┼───────┼─────────────────────────────────────────┼─────────┤ -│ 0 │ 10019 │ 340282350000000000000000000000000000000 │ local │ -│ 1 │ 16379 │ 1.0082568 │ local │ -│ 2 │ 33857 │ 0.9897698 │ local │ -╰───┴───────┴─────────────────────────────────────────┴─────────╯ +> doc get landmark_10019 | flatten | select contentVector | vector search landmark-content-index contentVector +╭───┬────────────────┬─────────────────────────────────────────┬─────────╮ +│ # │ id │ score │ cluster │ +├───┼────────────────┼─────────────────────────────────────────┼─────────┤ +│ 0 │ landmark_10019 │ 340282350000000000000000000000000000000 │ local │ +│ 1 │ landmark_28965 │ 1.0286634 │ local │ +│ 2 │ landmark_3547 │ 1.0150017 │ local │ +╰───┴────────────────┴─────────────────────────────────────────┴─────────╯ ``` We specify `contentVector` as the final positional parameter to `vector search` NOT because this is the name of the field that we got our vector from, but because this is name of the field on which the index was created. @@ -745,14 +745,14 @@ See <<_vector_create_index,vector create-index>> for further explanation. <<_subdoc_get, Subdoc get>> can also be used to fetch the source vector: ``` -> subdoc get contentVector 10019 | select content | vector search landmark-content-index contentVector -╭───┬───────┬─────────────────────────────────────────┬─────────╮ -│ # │ id │ score │ cluster │ -├───┼───────┼─────────────────────────────────────────┼─────────┤ -│ 0 │ 10019 │ 340282350000000000000000000000000000000 │ local │ -│ 1 │ 16379 │ 1.0082568 │ local │ -│ 2 │ 33857 │ 0.9897698 │ local │ -╰───┴───────┴─────────────────────────────────────────┴─────────╯ +> subdoc get contentVector landmark_10019 | select content | vector search landmark-content-index contentVector +╭───┬────────────────┬─────────────────────────────────────────┬─────────╮ +│ # │ id │ score │ cluster │ +├───┼────────────────┼─────────────────────────────────────────┼─────────┤ +│ 0 │ landmark_10019 │ 340282350000000000000000000000000000000 │ local │ +│ 1 │ landmark_28965 │ 1.0286634 │ local │ +│ 2 │ landmark_3547 │ 1.0150017 │ local │ +╰───┴────────────────┴─────────────────────────────────────────┴─────────╯ ``` Here we don't need to flatten and specify the field since the content of the `sub doc` output already only holds the `contentVector` field. @@ -762,13 +762,13 @@ Another format that is accepted is the output of <<_vector_enrich_text,vector e ``` > vector enrich-text "some string" --dimension 1024 | vector search landmark-content-index contentVector Embedding batch 1/1 -╭───┬───────┬────────────┬─────────╮ -│ # │ id │ score │ cluster │ -├───┼───────┼────────────┼─────────┤ -│ 0 │ 21681 │ 0.7402005 │ local │ -│ 1 │ 21682 │ 0.73517126 │ local │ -│ 2 │ 6073 │ 0.70910853 │ local │ -╰───┴───────┴────────────┴─────────╯ +╭───┬────────────────┬────────────┬─────────╮ +│ # │ id │ score │ cluster │ +├───┼────────────────┼────────────┼─────────┤ +│ 0 │ landmark_21681 │ 0.70561004 │ local │ +│ 1 │ landmark_20732 │ 0.7003826 │ local │ +│ 2 │ landmark_21682 │ 0.6987926 │ local │ +╰───┴────────────────┴────────────┴─────────╯ ``` Note the `--dimension` flag used here, the vector provided as the source of the search must match the dimension of the vectors on which the index was <<_vector_create_index,created>> else you will get no results. @@ -791,13 +791,13 @@ If only interested in a particular field then the <<_subdoc_get,subdoc get>> com ``` 👤 Administrator 🏠 local in ☁️ default._default._default > vector search landmark-content-index contentVector $vector | subdoc get address -╭───┬───────┬───────────────────────────────────────┬─────────────────────┬───────┬─────────╮ -│ # │ id │ content │ cas │ error │ cluster │ -├───┼───────┼───────────────────────────────────────┼─────────────────────┼───────┼─────────┤ -│ 0 │ 11956 │ Hornchurch Road, Hornchurch, RM11 1JU │ 1722871671832641536 │ │ local │ -│ 1 │ 25284 │ 4040 Twiggs St │ 1722871685304025088 │ │ local │ -│ 2 │ 7744 │ Grandstand Road │ 1722871709487202304 │ │ local │ -╰───┴───────┴───────────────────────────────────────┴─────────────────────┴───────┴─────────╯ +╭───┬────────────────┬───────────────────────────────────────┬─────────────────────┬───────┬─────────╮ +│ # │ id │ content │ cas │ error │ cluster │ +├───┼────────────────┼───────────────────────────────────────┼─────────────────────┼───────┼─────────┤ +│ 0 │ landmark_11956 │ Hornchurch Road, Hornchurch, RM11 1JU │ 1722871671832641536 │ │ local │ +│ 1 │ landmark_25284 │ 4040 Twiggs St │ 1722871685304025088 │ │ local │ +│ 2 │ landmark_7744 │ Grandstand Road │ 1722871709487202304 │ │ local │ +╰───┴────────────────┴───────────────────────────────────────┴─────────────────────┴───────┴─────────╯ ``` The environment is important when using the `vector search` command as the fully qualified index name is constructed from the active bucket and scope. @@ -823,13 +823,13 @@ If you want to use an index that was created against a different bucket/scope to ``` 👤 Charlie 🏠 local in 🗄 travel-sample.inventory._default > vector search landmark-content-index contentVector $vector --bucket default --scope _default -╭───┬───────┬─────────────────────────────────────────┬─────────╮ -│ # │ id │ score │ cluster │ -├───┼───────┼─────────────────────────────────────────┼─────────┤ -│ 0 │ 10019 │ 340282350000000000000000000000000000000 │ local │ -│ 1 │ 16379 │ 1.0082568 │ local │ -│ 2 │ 33857 │ 0.9897698 │ local │ -╰───┴───────┴─────────────────────────────────────────┴─────────╯ +╭───┬────────────────┬─────────────────────────────────────────┬─────────╮ +│ # │ id │ score │ cluster │ +├───┼────────────────┼─────────────────────────────────────────┼─────────┤ +│ 0 │ landmark_10019 │ 340282350000000000000000000000000000000 │ local │ +│ 1 │ landmark_16379 │ 1.0082568 │ local │ +│ 2 │ landmark_33857 │ 0.9897698 │ local │ +╰───┴────────────────┴─────────────────────────────────────────┴─────────╯ ``` ==== `vector create-index` @@ -910,7 +910,7 @@ Flags: --dimension - dimension of the resulting embeddings --maxTokens - the token per minute limit for the provider/model --id-column - the name of the id column if used with an input stream - --vectorField - the name of the field into which the embedding is written, defaults to fieldVector + --vectorField - the name of the field into which the embedding is written Parameters: field : the field from which the vector is generated @@ -961,45 +961,45 @@ We can use the content field to generate an embedding, and the result will be a [options="nowrap"] ``` -> doc get landmark_10019 | select content | vector enrich-doc content +> doc get landmark_10019 | vector enrich-doc content Embedding batch 1/1 -╭───┬───────┬───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ -│ # │ id │ content │ -├───┼───────┼───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┤ -│ 0 │ 10019 │ ╭───────────────┬───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ │ -│ │ │ │ title │ Gillingham (Kent) │ │ -│ │ │ │ name │ Royal Engineers Museum │ │ -│ │ │ │ alt │ │ │ -│ │ │ │ address │ Prince Arthur Road, ME4 4UG │ │ -│ │ │ │ directions │ │ │ -│ │ │ │ phone │ +44 1634 822839 │ │ -│ │ │ │ tollfree │ │ │ -│ │ │ │ email │ │ │ -│ │ │ │ url │ http://www.remuseum.org.uk │ │ -│ │ │ │ hours │ Tues - Fri 9.00am to 5.00pm, Sat - Sun 11.30am - 5.00pm │ │ -│ │ │ │ image │ │ │ -│ │ │ │ price │ │ │ -│ │ │ │ content │ Adult - £6.99 for an Adult ticket that allows you to come back for further visits within a year (children's and concessionary tickets also available). Museum on military engineering and the history of the │ │ -│ │ │ │ │ British Empire. A quite extensive collection that takes about half a day to see. Of most interest to fans of British and military history or civil engineering. The outside collection of tank mounted bridges │ │ -│ │ │ │ │ etc can be seen for free. There is also an extensive series of themed special event weekends, admission to which is included in the cost of the annual ticket. │ │ -│ │ │ │ │ ╭──────────┬────────────────────╮ │ │ -│ │ │ │ geo │ │ lat │ 51.39 │ │ │ -│ │ │ │ │ │ lon │ 0.54 │ │ │ -│ │ │ │ │ │ accuracy │ RANGE_INTERPOLATED │ │ │ -│ │ │ │ │ ╰──────────┴────────────────────╯ │ │ -│ │ │ │ activity │ see │ │ -│ │ │ │ type │ landmark │ │ -│ │ │ │ id │ 10019 │ │ -│ │ │ │ country │ United Kingdom │ │ -│ │ │ │ city │ Gillingham │ │ -│ │ │ │ state │ │ │ -│ │ │ │ │ ╭──────┬───────╮ │ │ -│ │ │ │ contentVector │ │ 0 │ 0.02 │ │ │ -│ │ │ │ │ │ ... │ ... │ │ │ -│ │ │ │ │ │ 1535 │ -0.01 │ │ │ -│ │ │ │ │ ╰──────┴───────╯ │ │ -│ │ │ ╰───────────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ │ -╰───┴───────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭───┬────────────────┬─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ # │ id │ content │ +├───┼────────────────┼─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┤ +│ 0 │ landmark_10019 │ ╭───────────────┬─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ │ +│ │ │ │ title │ Gillingham (Kent) │ │ +│ │ │ │ name │ Royal Engineers Museum │ │ +│ │ │ │ alt │ │ │ +│ │ │ │ address │ Prince Arthur Road, ME4 4UG │ │ +│ │ │ │ directions │ │ │ +│ │ │ │ phone │ +44 1634 822839 │ │ +│ │ │ │ tollfree │ │ │ +│ │ │ │ email │ │ │ +│ │ │ │ url │ http://www.remuseum.org.uk │ │ +│ │ │ │ hours │ Tues - Fri 9.00am to 5.00pm, Sat - Sun 11.30am - 5.00pm │ │ +│ │ │ │ image │ │ │ +│ │ │ │ price │ │ │ +│ │ │ │ content │ Adult - £6.99 for an Adult ticket that allows you to come back for further visits within a year (children's and concessionary tickets also available). Museum on military engineering and the │ │ +│ │ │ │ │ history of the British Empire. A quite extensive collection that takes about half a day to see. Of most interest to fans of British and military history or civil engineering. The outside │ │ +│ │ │ │ │ collection of tank mounted bridges etc can be seen for free. There is also an extensive series of themed special event weekends, admission to which is included in the cost of the annual ticket. │ │ +│ │ │ │ │ ╭──────────┬────────────────────╮ │ │ +│ │ │ │ geo │ │ lat │ 51.39 │ │ │ +│ │ │ │ │ │ lon │ 0.54 │ │ │ +│ │ │ │ │ │ accuracy │ RANGE_INTERPOLATED │ │ │ +│ │ │ │ │ ╰──────────┴────────────────────╯ │ │ +│ │ │ │ activity │ see │ │ +│ │ │ │ type │ landmark │ │ +│ │ │ │ id │ 10019 │ │ +│ │ │ │ country │ United Kingdom │ │ +│ │ │ │ city │ Gillingham │ │ +│ │ │ │ state │ │ │ +│ │ │ │ │ ╭──────┬───────╮ │ │ +│ │ │ │ contentVector │ │ 0 │ 0.02 │ │ │ +│ │ │ │ │ │ ... │ ... │ │ │ +│ │ │ │ │ │ 1535 │ -0.01 │ │ │ +│ │ │ │ │ ╰──────┴───────╯ │ │ +│ │ │ ╰───────────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ │ +╰───┴────────────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ``` The resulting document is the same as the original, but with a new field `contentVector` which contains the result of embedding the content field with the <<_cb_env_llm,active llm>>. @@ -1008,7 +1008,7 @@ This default behaviour can be overwritten with the `vectorField` flag. The resulting document is formatted with an id and content column which allows it to be piped into a `doc upsert` command to store it in the connected couchbase cluster. ``` -> doc get landmark_10019 | select content | vector enrich-doc content | doc upsert +> doc get landmark_10019 | vector enrich-doc content | doc upsert Embedding batch 1/1 ╭───┬───────────┬─────────┬────────┬──────────┬─────────╮ │ # │ processed │ success │ failed │ failures │ cluster │ @@ -1021,7 +1021,16 @@ Embedding batch 1/1 To repeat what we have done above on all the landmark documents we can get all the docs using a query and pipe the result directly into the `enrich-doc` command: ``` -> query "SELECT * FROM `travel-sample` WHERE type = 'landmark'" | vector enrich-doc content +> query "SELECT meta().id, * FROM `travel-sample` WHERE type = 'landmark'" | vector enrich-doc content +``` + +When using the result of a query as the input to `enrich-doc` you need to query for the document id as well as the contents, hence "SELECT meta().id, *". +This allows the new document output by the command to have the same id as the original. +If there is another field in the doc that you want to use as the id in the resulting documents then just select this and specify the field with the `--id-column` flag. +For example to use the "name" field as the id of the resulting documents do: + +``` +> query "SELECT name, * FROM `travel-sample` WHERE type = 'landmark'" | vector enrich-doc content --id-column name ``` ==== `vector enrich-text` diff --git a/src/cli/vector_enrich_doc.rs b/src/cli/vector_enrich_doc.rs index 227b0932..66e594da 100644 --- a/src/cli/vector_enrich_doc.rs +++ b/src/cli/vector_enrich_doc.rs @@ -102,12 +102,12 @@ impl Command for VectorEnrichDoc { Example { description: "Fetch a single doc with id '12345' and enrich the field named 'description'", - example: "doc get 12345 | select content | vector enrich-doc description --model models/text-embedding-004", + example: "doc get 12345 | vector enrich-doc description --model models/text-embedding-004", result: None, }, Example { description: "Fetch and enrich all landmark documents from travel sample and upload the results to couchabase", - example: "query 'SELECT * FROM `travel-sample` WHERE type = \"landmark\"' | select content | vector enrich-doc content --model amazon.titan-embed-text-v1 | doc upsert", + example: "query 'SELECT meta().id, * FROM `travel-sample` WHERE type = \"landmark\"' | vector enrich-doc content --model amazon.titan-embed-text-v1 | doc upsert", result: None, }, ] @@ -127,6 +127,7 @@ fn vector_enrich_doc( let mut field_contents: Vec = vec![]; let mut input_records: Vec = vec![]; + let mut input_ids: Vec = vec![]; let max_tokens: Option = call.get_flag::(engine_state, stack, "maxTokens")?; @@ -164,11 +165,54 @@ fn vector_enrich_doc( } }; - let doc_json = match rec.get_index(0).unwrap().1.as_record() { - Ok(r) => r, - Err(_) => { - return Err(could_not_parse_input_error(span)); + // Check if the input is from a doc get + let (doc_json, id) = if rec.contains("id") + && rec.contains("content") + && rec.contains("cas") + && rec.contains("error") + && rec.contains("cluster") + { + // error is either Nothing which will result in an empty string or an error string + // in either case the double unwrap here is safe + let err = rec.get("error").unwrap().as_str().unwrap(); + if !err.is_empty() { + return Err(generic_error( + format!("error from doc get input: {}", err), + None, + None, + )); } + + ( + //Safe to unwrap as we have validated the presence of these cols in the record + rec.get("content").unwrap().as_record()?, + rec.get("id").unwrap().as_str()?.to_string(), + ) + } else { + // Else piped input is from a query, which needs to contain 3 columns, one to be used as the ID, one holding the json doc and finally one with the cluster + if rec.len() != 3 { + return Err(generic_error( + "input incorrectly formatted", + "Run 'vector enrich-doc --help' for examples with input from 'doc get' and 'query'".to_string(), + None + )); + } + + let id = read_id(rec, id_column.clone())?; + + // No need to check this is set after loop, since we know there are 3 columns one will not be id or cluster + let mut content_column = "".to_string(); + for column in rec.columns() { + if column != "cluster" && *column != id_column { + content_column = column.clone(); + } + } + + let res = match rec.get(content_column).unwrap().as_record() { + Ok(r) => Ok(r), + Err(_) => Err(could_not_parse_input_error(span)), + }?; + (res, id) }; let content = read_from_field(doc_json, field.clone(), span)?; @@ -177,13 +221,17 @@ fn vector_enrich_doc( if !content.is_empty() { field_contents.push(content); input_records.push(doc_json.clone()); + input_ids.push(id); } } } Value::Record { val, .. } => { let content = read_from_field(&val.clone().into_owned(), field.clone(), span)?; + let id = read_id(&val, id_column)?; + field_contents.push(content); input_records.push(val.into_owned()); + input_ids.push(id); } _ => { return Err(could_not_parse_input_error(span)); @@ -229,31 +277,10 @@ fn vector_enrich_doc( }, ); - let id = match input_records[count].get(id_column.clone()) { - Some(id) => match id { - Value::String { val, .. } => val.clone(), - Value::Int { val, .. } => val.to_string(), - _ => { - return Err(generic_error( - "Contents of 'id' column must be Int or String", - "A different column can be used as the id of the resulting docs with the '--id-column' flag".to_string(), - None - )); - } - }, - None => { - return Err(generic_error( - "No 'id' field in docs", - "An 'id' field is required to use as the IDs for the created docs, if not called 'id' specify using --id-column".to_string(), - None - )); - } - }; - let cols = vec!["id".to_string(), "content".to_string()]; let vals = vec![ Value::String { - val: id, + val: input_ids[count].clone(), internal_span: span, }, Value::Record { @@ -296,6 +323,29 @@ fn read_from_field(doc: &Record, field: String, span: Span) -> Result Result { + match rec.get(id_column.clone()) { + Some(id) => match id { + Value::String { val, .. } => Ok(val.clone()), + Value::Int { val, .. } => Ok(val.to_string()), + _ => { + Err(generic_error( + "Contents of 'id' column must be Int or String", + "A different column can be used as the id of the resulting docs with the '--id-column' flag".to_string(), + None + )) + } + }, + None => { + Err(generic_error( + "No 'id' field in input", + "An 'id' field is required to use as the IDs for the created docs, if not called 'id' specify using --id-column".to_string(), + None + )) + } + } +} + fn could_not_parse_input_error(span: Span) -> ShellError { generic_error( "Could not parse piped input",