From e5eb570efdf375fc24bb7b989b7c3e35722c1189 Mon Sep 17 00:00:00 2001 From: Jon Meredith Date: Tue, 19 Jan 2016 10:46:48 -0700 Subject: [PATCH 001/122] Proof-of-concept branch for relaxing local key constraints. Queries still require equality on family and series with a range of timestamps, but will now retrieve records with local keys defined after the partition key. --- src/riak_kv_eleveldb_backend.erl | 24 +++++++++++++++++++----- src/riak_kv_qry_compiler.erl | 7 ++++--- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/src/riak_kv_eleveldb_backend.erl b/src/riak_kv_eleveldb_backend.erl index cfd3f2bb2b..f5b4cbd718 100644 --- a/src/riak_kv_eleveldb_backend.erl +++ b/src/riak_kv_eleveldb_backend.erl @@ -454,12 +454,24 @@ fold_indexes_fun(FoldIndexFun) -> end end. +build_list({_K, _V}=KV, Acc) -> + [KV | Acc]. + range_scan(FoldIndexFun, Buffer, Opts, #state{fold_opts=_FoldOpts, ref=Ref}) -> {_, Bucket, Qry} = proplists:lookup(index, Opts), ?SQL_SELECT{'WHERE' = W, helper_mod = Mod, local_key = LK} = Qry, + %% Work out what the elements of the local key are after the partitioning key. + %% Used below to add dummy fields to pad out the StartK2/EndK2 fields. + LKAST = LK#key_v1.ast, + ExtraLK = case length(LKAST) > 3 of + true -> + lists:nthtail(3, LKAST); + false -> + [] + end, %% this is all super-fugly {startkey, StartK} = proplists:lookup(startkey, W), {endkey, EndK} = proplists:lookup(endkey, W), @@ -478,17 +490,19 @@ range_scan(FoldIndexFun, Buffer, Opts, #state{fold_opts=_FoldOpts, [] -> AdditionalOptions; _ -> [{range_filter, Filter} | AdditionalOptions] end, - StartK2 = [{Field, Val} || {Field, _Type, Val} <- StartK], + %% Pad with missing local keys after the timestamp - use minimum erlang term value of '0' to make + %% sure we start the search at the beginning. + StartK2 = [{Field, Val} || {Field, _Type, Val} <- StartK] ++ [{N, 0} || #param_v1{name = [N]} <- ExtraLK], StartK3 = riak_ql_ddl:make_key(Mod, LK, StartK2), StartK4 = riak_kv_ts_util:encode_typeval_key(StartK3), %% TODO: Avoid adding/removing type info StartKey = to_object_key(Bucket, StartK4), - EndK2 = [{Field, Val} || {Field, _Type, Val} <- EndK], + %% Pad the missing end - not perfect because there could be longer bitstrings, but will have to do for prototype + %% TODO: FIX END KEY + EndK2 = [{Field, Val} || {Field, _Type, Val} <- EndK] ++ [{N, <<16#ffffffff:64>>} || #param_v1{name = [N]} <- ExtraLK], EndK3 = riak_ql_ddl:make_key(Mod, LK, EndK2), EndK4 = riak_kv_ts_util:encode_typeval_key(EndK3), EndKey = to_object_key(Bucket, EndK4), - FoldFun = fun({K, V}, Acc) -> - [{K, V} | Acc] - end, + FoldFun = fun build_list/2, Options = [ {start_key, StartKey}, {end_key, EndKey}, diff --git a/src/riak_kv_qry_compiler.erl b/src/riak_kv_qry_compiler.erl index 8f2b6b1b0a..6d48f3749f 100644 --- a/src/riak_kv_qry_compiler.erl +++ b/src/riak_kv_qry_compiler.erl @@ -524,11 +524,12 @@ quantum_field_name(#ddl_v1{ partition_key = PK }) -> #hash_fn_v1{args = [#param_v1{name = QFieldName} | _]} = Quantum, QFieldName. -check_if_timeseries(#ddl_v1{table = T, partition_key = PK, local_key = LK} = DDL, +check_if_timeseries(#ddl_v1{table = T, partition_key = PK, local_key = LK0} = DDL, [W]) -> try + LK = LK0#key_v1{ast = lists:sublist(LK0#key_v1.ast, 3)}, #key_v1{ast = PartitionKeyAST} = PK, - LocalFields = [X || #param_v1{name = X} <- LK#key_v1.ast], + LocalFields = [X || #param_v1{name = X} <- lists:sublist(LK#key_v1.ast, 3)], PartitionFields = [X || #param_v1{name = X} <- PartitionKeyAST], [QuantumFieldName] = quantum_field_name(DDL), StrippedW = strip(W, []), @@ -655,7 +656,7 @@ break_out_timeseries(Filters1, LocalFields1, [QuantumFields]) -> {Filters2, {Starts, Ends}} -> %% remove the quanta from the local fields, this has alreadfy been %% removed from the fields - [F1, F2, _] = LocalFields1, + [F1, F2, _] = lists:sublist(LocalFields1, 3), LocalFields2 = [F1,F2], %% create the keys by splitting the key filters and prepending it %% with the time bound. From b966148743740e0383a648cfde68077a60e82c64 Mon Sep 17 00:00:00 2001 From: andytill Date: Wed, 27 Jan 2016 13:57:45 +0000 Subject: [PATCH 002/122] Flexible keys, allow more or less fields in the partition and local keys. --- src/riak_kv_eleveldb_backend.erl | 17 +++++++++------- src/riak_kv_qry_compiler.erl | 35 +++++++++++++++++++++++--------- 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/src/riak_kv_eleveldb_backend.erl b/src/riak_kv_eleveldb_backend.erl index f5b4cbd718..69cdad802f 100644 --- a/src/riak_kv_eleveldb_backend.erl +++ b/src/riak_kv_eleveldb_backend.erl @@ -462,16 +462,19 @@ range_scan(FoldIndexFun, Buffer, Opts, #state{fold_opts=_FoldOpts, {_, Bucket, Qry} = proplists:lookup(index, Opts), ?SQL_SELECT{'WHERE' = W, helper_mod = Mod, - local_key = LK} = Qry, + local_key = LK, + partition_key = #key_v1{ast = PKAST}} = Qry, %% Work out what the elements of the local key are after the partitioning key. %% Used below to add dummy fields to pad out the StartK2/EndK2 fields. LKAST = LK#key_v1.ast, - ExtraLK = case length(LKAST) > 3 of - true -> - lists:nthtail(3, LKAST); - false -> - [] - end, + PKASTLen = length(PKAST), + ExtraLK = + case length(LKAST) > PKASTLen of + true -> + lists:nthtail(PKASTLen, LKAST); + false -> + [] + end, %% this is all super-fugly {startkey, StartK} = proplists:lookup(startkey, W), {endkey, EndK} = proplists:lookup(endkey, W), diff --git a/src/riak_kv_qry_compiler.erl b/src/riak_kv_qry_compiler.erl index 6d48f3749f..d5928d2f6d 100644 --- a/src/riak_kv_qry_compiler.erl +++ b/src/riak_kv_qry_compiler.erl @@ -520,20 +520,19 @@ compile_where(DDL, Where) -> quantum_field_name(#ddl_v1{ partition_key = PK }) -> #key_v1{ ast = PartitionKeyAST } = PK, - [_, _, Quantum] = PartitionKeyAST, + Quantum = lists:last(PartitionKeyAST), #hash_fn_v1{args = [#param_v1{name = QFieldName} | _]} = Quantum, QFieldName. check_if_timeseries(#ddl_v1{table = T, partition_key = PK, local_key = LK0} = DDL, [W]) -> try - LK = LK0#key_v1{ast = lists:sublist(LK0#key_v1.ast, 3)}, #key_v1{ast = PartitionKeyAST} = PK, - LocalFields = [X || #param_v1{name = X} <- lists:sublist(LK#key_v1.ast, 3)], PartitionFields = [X || #param_v1{name = X} <- PartitionKeyAST], + LK = LK0#key_v1{ast = lists:sublist(LK0#key_v1.ast, length(PartitionKeyAST))}, [QuantumFieldName] = quantum_field_name(DDL), StrippedW = strip(W, []), - {StartW, EndW, Filter} = break_out_timeseries(StrippedW, LocalFields, [QuantumFieldName]), + {StartW, EndW, Filter} = break_out_timeseries(StrippedW, PartitionFields, [QuantumFieldName]), Mod = riak_ql_ddl:make_module_name(T), StartKey = rewrite(LK, StartW, Mod), EndKey = rewrite(LK, EndW, Mod), @@ -632,7 +631,7 @@ acc_upper_bounds(_Filter, {_, _U}) -> error({upper_bound_specified_more_than_once, ?E_TSMSG_DUPLICATE_UPPER_BOUND}). %% -break_out_timeseries(Filters1, LocalFields1, [QuantumFields]) -> +break_out_timeseries(Filters1, PartitionFields1, [QuantumFields]) -> case find_timestamp_bounds(QuantumFields, Filters1) of {_, {undefined, undefined}} -> error({incomplete_where_clause, ?E_TSMSG_NO_BOUNDS_SPECIFIED}); @@ -654,13 +653,12 @@ break_out_timeseries(Filters1, LocalFields1, [QuantumFields]) -> error({lower_and_upper_bounds_are_equal_when_no_equals_operator, ?E_TSMSG_LOWER_AND_UPPER_BOUNDS_ARE_EQUAL_WHEN_NO_EQUALS_OPERATOR}); {Filters2, {Starts, Ends}} -> - %% remove the quanta from the local fields, this has alreadfy been + %% remove the quanta from the local fields, this has already been %% removed from the fields - [F1, F2, _] = lists:sublist(LocalFields1, 3), - LocalFields2 = [F1,F2], + % PartitionFields2 = lists:sublist(PartitionFields1, length(PartitionFields1)), % TODO DUUUUDE %% create the keys by splitting the key filters and prepending it %% with the time bound. - {Body, Filters3} = split_key_from_filters(LocalFields2, Filters2), + {Body, Filters3} = split_key_from_filters(PartitionFields1, Filters2), {[Starts | Body], [Ends | Body], Filters3} end. @@ -673,7 +671,7 @@ split_key_from_filters2([FieldName], Filters) when is_binary(FieldName) -> take_key_field(FieldName, Filters, []). %% -take_key_field(FieldName, [], Acc) -> +take_key_field(FieldName, [], Acc) when is_binary(FieldName) -> %% check if the field exists in the clause but used the wrong operator or %% it never existed at all. Give a more helpful message if the wrong op was %% used. @@ -1987,4 +1985,21 @@ compile_query_with_arithmetic_type_error_2_test() -> compile(get_standard_ddl(), Q, 100) ). +%% TODO MOAR tests +flexible_keys_1_test() -> + DDL = get_ddl( + "CREATE TABLE tab4(" + "a1 SINT64 NOT NULL, " + "a TIMESTAMP NOT NULL, " + "b VARCHAR NOT NULL, " + "c VARCHAR NOT NULL, " + "d SINT64 NOT NULL, " + "PRIMARY KEY ((a1, quantum(a, 15, 's')), a1, a, b, c, d))"), + {ok, Q} = get_query( + "SELECT * FROM tab4 WHERE a > 0 AND a < 1000 AND a1 = 1"), + ?assertEqual( + {ok, [#riak_select_v1{}]}, + compile(DDL, Q, 100) + ). + -endif. From 395fc90b27cc3189d4ba3277e8ee3c81da804b64 Mon Sep 17 00:00:00 2001 From: "John R. Daily" Date: Thu, 28 Jan 2016 14:11:17 -0500 Subject: [PATCH 003/122] First pass at extending TS batch puts all the way through to the leveldb backend. Depends on same branch in eleveldb --- include/riak_kv_vnode.hrl | 13 +++++ src/riak_kv_eleveldb_backend.erl | 45 +++++++++++++---- src/riak_kv_pb_timeseries.erl | 86 ++++++++++++++++++++++++++------ src/riak_kv_vnode.erl | 26 ++++++++-- src/riak_kv_w1c_worker.erl | 56 +++++++++++++++++++-- 5 files changed, 193 insertions(+), 33 deletions(-) diff --git a/include/riak_kv_vnode.hrl b/include/riak_kv_vnode.hrl index 296271b7e5..b9110d271c 100644 --- a/include/riak_kv_vnode.hrl +++ b/include/riak_kv_vnode.hrl @@ -19,6 +19,17 @@ type :: primary | fallback }). +%% Currently only for timeseries batches +-record(riak_kv_w1c_batch_put_req_v1, { + objs :: list({{binary(), binary()}, binary()}), + type :: primary | fallback +}). + +-record(riak_kv_w1c_batch_put_reply_v1, { + reply :: ok | {error, term()}, + type :: primary | fallback +}). + -record(riak_kv_get_req_v1, { bkey :: {binary(), binary()}, req_id :: non_neg_integer()}). @@ -75,6 +86,8 @@ -define(KV_PUT_REQ, #riak_kv_put_req_v1). -define(KV_W1C_PUT_REQ, #riak_kv_w1c_put_req_v1). -define(KV_W1C_PUT_REPLY, #riak_kv_w1c_put_reply_v1). +-define(KV_W1C_BATCH_PUT_REQ, #riak_kv_w1c_batch_put_req_v1). +-define(KV_W1C_BATCH_PUT_REPLY, #riak_kv_w1c_batch_put_reply_v1). -define(KV_GET_REQ, #riak_kv_get_req_v1). -define(KV_LISTBUCKETS_REQ, #riak_kv_listbuckets_req_v1). -define(KV_LISTKEYS_REQ, #riak_kv_listkeys_req_v4). diff --git a/src/riak_kv_eleveldb_backend.erl b/src/riak_kv_eleveldb_backend.erl index cfd3f2bb2b..befe519618 100644 --- a/src/riak_kv_eleveldb_backend.erl +++ b/src/riak_kv_eleveldb_backend.erl @@ -30,6 +30,7 @@ start/2, stop/1, get/3, + batch_put/4, put/5, async_put/5, sync_put/5, @@ -179,15 +180,10 @@ get(Bucket, Key, #state{read_opts=ReadOpts, {error, Reason, State} end. -%% @doc Insert an object into the eleveldb backend. --type index_spec() :: {add, Index, SecondaryKey} | {remove, Index, SecondaryKey}. --spec put(riak_object:bucket(), riak_object:key(), [index_spec()], binary(), state()) -> - {ok, state()} | - {error, term(), state()}. -put(Bucket, PrimaryKey, IndexSpecs, Val, #state{ref=Ref, - write_opts=WriteOpts, - legacy_indexes=WriteLegacy, - fixed_indexes=FixedIndexes}=State) -> + +%% Create a list of backend put-related updates for this object +put_operations(Bucket, PrimaryKey, IndexSpecs, Val, #state{legacy_indexes=WriteLegacy, + fixed_indexes=FixedIndexes}) -> %% Create the KV update... StorageKey = to_object_key(Bucket, PrimaryKey), Updates1 = [{put, StorageKey, Val} || Val /= undefined], @@ -204,9 +200,38 @@ put(Bucket, PrimaryKey, IndexSpecs, Val, #state{ref=Ref, index_deletes(FixedIndexes, Bucket, PrimaryKey, Field, Value) end, Updates2 = lists:flatmap(F, IndexSpecs), + Updates1 ++ Updates2. + +%% @doc Insert an object into the eleveldb backend. +-type index_spec() :: {add, Index, SecondaryKey} | {remove, Index, SecondaryKey}. +-spec put(riak_object:bucket(), riak_object:key(), [index_spec()], binary(), state()) -> + {ok, state()} | + {error, term(), state()}. +put(Bucket, PrimaryKey, IndexSpecs, Val, #state{ref=Ref, + write_opts=WriteOpts}=State) -> + Operations = put_operations(Bucket, PrimaryKey, IndexSpecs, Val, State), %% Perform the write... - case eleveldb:write(Ref, Updates1 ++ Updates2, WriteOpts) of + case eleveldb:write(Ref, Operations, WriteOpts) of + ok -> + {ok, State}; + {error, Reason} -> + {error, Reason, State} + end. + +%% @doc Insert a batch of objects (must contain the same index values) into the eleveldb backend. +-spec batch_put(term(), [{{riak_object:bucket(), riak_object:key()}, binary()}], [index_spec()], state()) -> + {ok, state()} | + {error, term(), state()}. +batch_put(Context, Values, IndexSpecs, #state{ref=Ref, + write_opts=WriteOpts}=State) -> + Operations = lists:flatmap(fun({{Bucket, Key}, Val}) -> + put_operations(Bucket, Key, IndexSpecs, Val, State) + end, + Values), + + %% Perform the write... + case eleveldb:sync_write(Context, Ref, Operations, WriteOpts) of ok -> {ok, State}; {error, Reason} -> diff --git a/src/riak_kv_pb_timeseries.erl b/src/riak_kv_pb_timeseries.erl index 59e91d25b0..f0e6278606 100644 --- a/src/riak_kv_pb_timeseries.erl +++ b/src/riak_kv_pb_timeseries.erl @@ -61,6 +61,9 @@ -define(FETCH_RETRIES, 10). %% TODO make it configurable in tsqueryreq -define(TABLE_ACTIVATE_WAIT, 30). %% ditto +-define(MIN_PUT_BATCH_SIZE, 10). %% TODO make it configurable somewhere +-define(MAX_PUT_BATCH_SIZE, 200). %% ditto + -record(state, { req, req_ctx, @@ -296,6 +299,8 @@ put_data(Data, Table, Mod) -> PreflistData = add_preflists(PartitionedData, NVal, riak_core_node_watcher:nodes(riak_kv)), + SendFullBatches = riak_core_capability:get({riak_kv, w1c_batch_vnode}, false), + EncodeFn = fun(O) -> riak_object:to_binary(v1, O, msgpack) end, @@ -305,21 +310,31 @@ put_data(Data, Table, Mod) -> case riak_kv_w1c_worker:validate_options( NVal, Preflist, [], BucketProps) of {ok, W, PW} -> - {Ids, Errs} = - lists:foldl( - fun(Record, {PartReqIds, PartErrors}) -> - {RObj, LK} = - build_object(Bucket, Mod, DDL, - Record, DocIdx), - - {ok, ReqId} = - riak_kv_w1c_worker:async_put( - RObj, W, PW, Bucket, NVal, LK, - EncodeFn, Preflist), - {[ReqId | PartReqIds], PartErrors} - end, - {[], 0}, Records), - {GlobalReqIds ++ Ids, GlobalErrorsCnt + Errs}; + DataForVnode = + case SendFullBatches andalso length(Records) >= ?MIN_PUT_BATCH_SIZE of + true -> + {batches, create_batches(Records, ?MAX_PUT_BATCH_SIZE)}; + false -> + {individual, Records} + end, + + Ids = + invoke_async_put(fun(Record) -> + build_object(Bucket, Mod, DDL, + Record, DocIdx) + end, + fun(RObj, LK) -> + riak_kv_w1c_worker:async_put( + RObj, W, PW, Bucket, NVal, LK, + EncodeFn, Preflist) + end, + fun(RObjs) -> + riak_kv_w1c_worker:ts_batch_put( + RObjs, W, PW, Bucket, NVal, + EncodeFn, Preflist) + end, + DataForVnode), + {GlobalReqIds ++ Ids, GlobalErrorsCnt}; _Error -> {GlobalReqIds, GlobalErrorsCnt + length(Records)} end @@ -351,6 +366,19 @@ row_to_key(Row, DDL, Mod) -> riak_kv_ts_util:encode_typeval_key( riak_ql_ddl:get_partition_key(DDL, Row, Mod)). +%% May be a more efficient way to do this. Take a list of arbitrary +%% data (expected to be a list of lists for this use case) and create +%% a list of MaxSize lists. +create_batches(Rows, MaxSize) -> + create_batches(Rows, MaxSize, MaxSize, [], []). + +create_batches([], _Counter, _Max, ThisBatch, AllBatches) -> + AllBatches ++ [ThisBatch]; +create_batches(Rows, 0, Max, ThisBatch, AllBatches) -> + create_batches(Rows, Max, Max, [], AllBatches ++ [ThisBatch]); +create_batches([H|T], Counter, Max, ThisBatch, AllBatches) -> + create_batches(T, Counter-1, Max, ThisBatch ++ [H], AllBatches). + add_preflists(PartitionedData, NVal, UpNodes) -> lists:map(fun({Idx, Rows}) -> {Idx, riak_core_apl:get_apl_ann(Idx, NVal, UpNodes), @@ -788,7 +816,21 @@ table_created_missing_response(Table) -> to_string(X) -> flat_format("~p", [X]). - +%% Returns a tuple with a list of request IDs and an error tally +invoke_async_put(BuildRObjFun, AsyncPutFun, _BatchPutFun, {individual, Records}) -> + lists:map(fun(Record) -> + {RObj, LK} = BuildRObjFun(Record), + {ok, ReqId} = AsyncPutFun(RObj, LK), + ReqId + end, + Records); +invoke_async_put(BuildRObjFun, _AsyncPutFun, BatchPutFun, {batches, Batches}) -> + lists:map(fun(Batch) -> + RObjs = lists:map(BuildRObjFun, Batch), + {ok, ReqId} = BatchPutFun(RObjs), + ReqId + end, + Batches). %% helpers to make various error responses @@ -909,4 +951,16 @@ validate_rows_error_response_2_test() -> validate_rows_error_response(["1", "2", "3"]) ). +batch_1_test() -> + ?assertEqual([[1, 2, 3, 4], [5, 6, 7, 8], [9]], + create_batches([1, 2, 3, 4, 5, 6, 7, 8, 9], 4)). + +batch_2_test() -> + ?assertEqual([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10]], + create_batches([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 4)). + +batch_3_test() -> + ?assertEqual([[1, 2, 3], [4, 5, 6], [7, 8, 9]], + create_batches([1, 2, 3, 4, 5, 6, 7, 8, 9], 3)). + -endif. diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index d429e41744..88f8144478 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -530,6 +530,8 @@ handle_overload_command(?KV_VNODE_STATUS_REQ{}, Sender, Idx) -> riak_core_vnode:reply(Sender, {vnode_status, Idx, [{error, overload}]}); handle_overload_command(?KV_W1C_PUT_REQ{type=Type}, Sender, _Idx) -> riak_core_vnode:reply(Sender, ?KV_W1C_PUT_REPLY{reply={error, overload}, type=Type}); +handle_overload_command(?KV_W1C_BATCH_PUT_REQ{type=Type}, Sender, _Idx) -> + riak_core_vnode:reply(Sender, ?KV_W1C_BATCH_PUT_REPLY{reply={error, overload}, type=Type}); handle_overload_command(_, Sender, _) -> riak_core_vnode:reply(Sender, {error, mailbox_overload}). @@ -840,6 +842,24 @@ handle_command({get_index_entries, Opts}, {reply, ignore, State} end; +%% For now, ignore async_put +handle_command(?KV_W1C_BATCH_PUT_REQ{objs=Objs, type=Type}, + From, State=#state{mod=Mod, idx=Idx, modstate=ModState}) -> + StartTS = os:timestamp(), + Context = {w1c_batch_put, From, Type, Objs, StartTS}, + case Mod:batch_put(Context, Objs, [], ModState) of + {ok, UpModState} -> + lists:foreach( + fun({{Bucket, Key}, EncodedVal}) -> + update_hashtree(Bucket, Key, EncodedVal, State), + ?INDEX_BIN(Bucket, Key, EncodedVal, put, Idx) + end, + Objs), + {reply, ?KV_W1C_BATCH_PUT_REPLY{reply=ok, type=Type}, State#state{modstate=UpModState}}; + {error, Reason, UpModState} -> + {reply, ?KV_W1C_BATCH_PUT_REPLY{reply={error, Reason}, type=Type}, State#state{modstate=UpModState}} + end; + %% NB. The following two function clauses discriminate on the async_put State field handle_command(?KV_W1C_PUT_REQ{bkey={Bucket, Key}, encoded_obj=EncodedVal, type=Type}, From, State=#state{mod=Mod, idx=Idx, async_put=true, modstate=ModState}) -> @@ -848,10 +868,10 @@ handle_command(?KV_W1C_PUT_REQ{bkey={Bucket, Key}, encoded_obj=EncodedVal, type= case Mod:sync_put(Context, Bucket, Key, EncodedVal, ModState) of {ok, UpModState} -> - update_hashtree(Bucket, Key, EncodedVal, State), - ?INDEX_BIN(Bucket, Key, EncodedVal, put, Idx), + update_hashtree(Bucket, Key, EncodedVal, State), + ?INDEX_BIN(Bucket, Key, EncodedVal, put, Idx), - {reply, ?KV_W1C_PUT_REPLY{reply=ok, type=Type}, State#state{modstate=UpModState}}; + {reply, ?KV_W1C_PUT_REPLY{reply=ok, type=Type}, State#state{modstate=UpModState}}; {error, Reason, UpModState} -> {reply, ?KV_W1C_PUT_REPLY{reply={error, Reason}, type=Type}, State#state{modstate=UpModState}} end; diff --git a/src/riak_kv_w1c_worker.erl b/src/riak_kv_w1c_worker.erl index 701b98833a..c5bcaecf2c 100644 --- a/src/riak_kv_w1c_worker.erl +++ b/src/riak_kv_w1c_worker.erl @@ -22,6 +22,7 @@ %% API -export([start_link/1, put/2, async_put/8, async_put_replies/2, + ts_batch_put/7, workers/0, validate_options/4]). -export([init/1, handle_call/3, @@ -118,10 +119,7 @@ async_put(RObj, W, PW, Bucket, NVal, LocalKey, EncodeFn, Preflist) -> StartTS = os:timestamp(), Worker = random_worker(), ReqId = erlang:monitor(process, Worker), - RObj2 = riak_object:set_vclock(RObj, vclock:fresh(<<0:8>>, 1)), - RObj3 = riak_object:update_last_modified(RObj2), - RObj4 = riak_object:apply_updates(RObj3), - EncodedVal = EncodeFn(RObj4), + EncodedVal = EncodeFn(w1c_vclock(RObj)), gen_server:cast( Worker, @@ -131,6 +129,37 @@ async_put(RObj, W, PW, Bucket, NVal, LocalKey, EncodeFn, Preflist) -> size=size(EncodedVal)}}), {ok, {ReqId, Worker}}. +-spec ts_batch_put(RObjs :: [{riak_object:key(), riak_object:riak_object()}], + W :: pos_integer(), + PW :: pos_integer(), + Bucket :: binary()|{binary(), binary()}, + NVal :: pos_integer(), + EncodeFn :: fun((riak_object:riak_object()) -> binary()), + Preflist :: term()) -> + {ok, {reference(), atom()}}. + +ts_batch_put(RObjs, W, PW, Bucket, NVal, EncodeFn, Preflist) -> + StartTS = os:timestamp(), + Worker = random_worker(), + ReqId = erlang:monitor(process, Worker), + EncodedVals = + lists:map(fun({K, O}) -> {{Bucket, K}, EncodeFn(w1c_vclock(O))} end, + RObjs), + Size = lists:sum(lists:map(fun({_BK, O}) -> size(O) end, EncodedVals)), + + gen_server:cast( + Worker, + {batch_put, EncodedVals, ReqId, Preflist, + #rec{w=W, pw=PW, n_val=NVal, from=self(), + start_ts=StartTS, + size=Size}}), + {ok, {ReqId, Worker}}. + +w1c_vclock(RObj) -> + RObj2 = riak_object:set_vclock(RObj, vclock:fresh(<<0:8>>, 1)), + RObj3 = riak_object:update_last_modified(RObj2), + riak_object:apply_updates(RObj3). + -spec async_put_replies(ReqIdTuples :: list({reference(), pid()}), proplists:proplist()) -> list(term()). async_put_replies(ReqIdTuples, Options) -> @@ -157,6 +186,15 @@ handle_cast({put, Bucket, Key, EncodedVal, ReqId, Preflist, #rec{from=From}=Rec} S end, {noreply, NewState}; +handle_cast({batch_put, EncodedVals, ReqId, Preflist, #rec{from=From}=Rec}, #state{proxies=Proxies}=State) -> + NewState = case store_request_record(ReqId, Rec, State) of + {undefined, S} -> + S#state{proxies=batch_send_vnodes(Preflist, Proxies, EncodedVals, ReqId)}; + {_, S} -> + reply(From, ReqId, {error, request_id_already_defined}), + S + end, + {noreply, NewState}; handle_cast({cancel, ReqId}, State) -> NewState = case erase_request_record(ReqId, State) of {undefined, S} -> @@ -267,6 +305,16 @@ send_vnodes([{{Idx, Node}, Type}|Rest], Proxies, Bucket, Key, EncodedVal, ReqId) ), send_vnodes(Rest, NewProxies, Bucket, Key, EncodedVal, ReqId). +batch_send_vnodes([], Proxies, _EncodedVals, _ReqId) -> + Proxies; +batch_send_vnodes([{{Idx, Node}, Type}|Rest], Proxies, EncodedVals, ReqId) -> + {Proxy, NewProxies} = get_proxy(Idx, Proxies), + Message = ?KV_W1C_BATCH_PUT_REQ{objs=EncodedVals, type=Type}, + gen_fsm:send_event( + {Proxy, Node}, + riak_core_vnode_master:make_request(Message, {raw, ReqId, self()}, Idx) + ), + batch_send_vnodes(Rest, NewProxies, EncodedVals, ReqId). get_proxy(Idx, Proxies) -> case ?DICT_TYPE:find(Idx, Proxies) of From 2f32195c57f6311f18a50cc1f96658acba0d9bfe Mon Sep 17 00:00:00 2001 From: "John R. Daily" Date: Thu, 28 Jan 2016 15:28:33 -0500 Subject: [PATCH 004/122] Register our new capability --- src/riak_kv_app.erl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/riak_kv_app.erl b/src/riak_kv_app.erl index 1abe5fb59c..91a2b77687 100644 --- a/src/riak_kv_app.erl +++ b/src/riak_kv_app.erl @@ -155,6 +155,10 @@ start(_Type, _StartArgs) -> [true, false], false), + riak_core_capability:register({riak_kv, w1c_batch_vnode}, + [true, false], + false), + %% mapred_system should remain until no nodes still exist %% that would propose 'legacy' as the default choice riak_core_capability:register({riak_kv, mapred_system}, From 00077540b36ecf76a1f2edbb6d3cb7bb9d016940 Mon Sep 17 00:00:00 2001 From: "John R. Daily" Date: Thu, 28 Jan 2016 15:29:13 -0500 Subject: [PATCH 005/122] w1c worker expects tuples in the form of {key, obj} so reverse the current order in build_object --- src/riak_kv_pb_timeseries.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/riak_kv_pb_timeseries.erl b/src/riak_kv_pb_timeseries.erl index f0e6278606..0903b26ced 100644 --- a/src/riak_kv_pb_timeseries.erl +++ b/src/riak_kv_pb_timeseries.erl @@ -393,7 +393,7 @@ build_object(Bucket, Mod, DDL, Row, PK) -> RObj = riak_object:newts( Bucket, PK, Obj, dict:from_list([{?MD_DDL_VERSION, ?DDL_VERSION}])), - {RObj, LK}. + {LK, RObj}. %% ----------- @@ -819,7 +819,7 @@ to_string(X) -> %% Returns a tuple with a list of request IDs and an error tally invoke_async_put(BuildRObjFun, AsyncPutFun, _BatchPutFun, {individual, Records}) -> lists:map(fun(Record) -> - {RObj, LK} = BuildRObjFun(Record), + {LK, RObj} = BuildRObjFun(Record), {ok, ReqId} = AsyncPutFun(RObj, LK), ReqId end, From 5086fa846319a2db1e3b03f7aea241fafe293427 Mon Sep 17 00:00:00 2001 From: "John R. Daily" Date: Fri, 29 Jan 2016 07:37:14 -0500 Subject: [PATCH 006/122] w1c worker was not processing batch replies. Question: do we need batch replies or should we just use standard put replies? --- src/riak_kv_w1c_worker.erl | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/riak_kv_w1c_worker.erl b/src/riak_kv_w1c_worker.erl index c5bcaecf2c..a6a612b482 100644 --- a/src/riak_kv_w1c_worker.erl +++ b/src/riak_kv_w1c_worker.erl @@ -207,7 +207,14 @@ handle_cast({cancel, ReqId}, State) -> handle_cast(_Msg, State) -> {noreply, State}. +handle_info({ReqId, ?KV_W1C_BATCH_PUT_REPLY{reply=Reply, type=Type}}, State) -> + handle_put_reply(ReqId, Reply, Type, State); handle_info({ReqId, ?KV_W1C_PUT_REPLY{reply=Reply, type=Type}}, State) -> + handle_put_reply(ReqId, Reply, Type, State); +handle_info(_Msg, State) -> + {noreply, State}. + +handle_put_reply(ReqId, Reply, Type, State) -> case get_request_record(ReqId, State) of undefined-> % the entry was likely purged by the timeout mechanism @@ -247,9 +254,7 @@ handle_info({ReqId, ?KV_W1C_PUT_REPLY{reply=Reply, type=Type}}, State) -> {_, NewState} = store_request_record(ReqId, NewRec, State) end, {noreply, NewState} - end; -handle_info(_Msg, State) -> - {noreply, State}. + end. terminate(_Reason, _State) -> ok. From 299963e79e1bd75279234daa61be8a5fc29fad62 Mon Sep 17 00:00:00 2001 From: andytill Date: Mon, 8 Feb 2016 14:34:40 +0000 Subject: [PATCH 007/122] Use sext:prefix to generate partial start and end keys for range scans. --- priv/riak_kv.schema | 59 ++++++++++++++-- src/riak_kv_eleveldb_backend.erl | 113 ++++++++++++++----------------- src/riak_kv_pb_timeseries.erl | 45 ++++++------ src/riak_kv_qry.erl | 3 +- src/riak_kv_qry_compiler.erl | 22 +++--- src/riak_kv_qry_worker.erl | 44 +++++++----- 6 files changed, 172 insertions(+), 114 deletions(-) diff --git a/priv/riak_kv.schema b/priv/riak_kv.schema index c75dc88b2b..d5e36d6025 100644 --- a/priv/riak_kv.schema +++ b/priv/riak_kv.schema @@ -678,26 +678,77 @@ end (_) -> false end}. +%% @see riak_kv.query.timeseries.timeout %% @doc Timeout in milliseconds for Time Series queries, after which riak %% will return a timeout error. +%% This setting is DEPRECATED use riak_kv.query.timeseries.timeout instead {mapping, "timeseries_query_timeout_ms", "riak_kv.timeseries_query_timeout_ms", [ - {default, 10000}, - {datatype, integer} + {commented, 10000}, + {datatype, integer}, + hidden +]}. + +%% @doc Timeout for Time Series queries, after which riak +%% will return a timeout error - default is 10,000 miliseconds +{mapping, "riak_kv.query.timeseries.timeout", "riak_kv.timeseries_query_timeout_ms", [ + {default, "10000ms"}, + {datatype, {duration, ms}} ]}. +%% @see riak_kv.query.timeseries.max_quanta_span %% @doc Maximum number of quanta that a query can span. Larger quanta spans %% mean the time duration for a query can be bigger. This is constrained to %% prevent excessively long running queries that could affect the performance %% of the cluster. +%% This setting is DEPRECATED use riak_kv.query.timeseries.max_quanta_span {mapping, "timeseries_query_max_quanta_span", "riak_kv.timeseries_query_max_quanta_span", [ + {commented, 5}, + {datatype, integer}, + {validators, ["validate_max_quanta_span"]}, + hidden +]}. + +%% @doc Maximum number of quanta that a query can span. Larger quanta spans +%% mean the time duration for a query can be bigger. This is constrained to +%% prevent excessively long running queries that could affect the performance +%% of the cluster. +{mapping, "riak_kv.query.timeseries.max_quanta_span", "riak_kv.timeseries_query_max_quanta_span", [ {default, 5}, - {datatype, integer} + {datatype, integer}, + {validators, ["validate_max_quanta_span"]} ]}. +{validator, + "validate_max_quanta_span", + "must be an integer > 0 and =< 256", + fun(Value) when is_integer(Value) andalso Value > 1 andalso Value =< 256 -> true; + (_) -> false + end}. + +%% @see riak_kv.query.timeseries.max_concurrent_queries %% @doc The number of individual queries that can run at any one time. %% This is the number per node in the cluster so the total number of %% concurrent queries per cluster is timeseries_max_concurrent_queries * nodes. +%% This setting is DEPRECATED use riak_kv.query.timeseries.max_concurrent_queries {mapping, "timeseries_max_concurrent_queries", "riak_kv.timeseries_max_concurrent_queries", [ + {commented, 3}, + {datatype, integer}, + {validators, ["validate_max_concurrent_queries"]}, + hidden +]}. + +%% @doc The number of individual queries that can run at any one time. +%% This is the number per node in the cluster so the total number of +%% concurrent queries per cluster is timeseries_max_concurrent_queries * nodes. +{mapping, "riak_kv.query.timeseries.max_concurrent_queries", "riak_kv.timeseries_max_concurrent_queries", [ {default, 3}, - {datatype, integer} + {datatype, integer}, + {validators, ["validate_max_concurrent_queries"]} ]}. + +{validator, + "validate_max_concurrent_queries", + "must be an integer > 0 and =< 1000", + fun(Value) when is_integer(Value) andalso Value > 1 andalso Value =< 1000 -> true; + (_) -> false + end}. diff --git a/src/riak_kv_eleveldb_backend.erl b/src/riak_kv_eleveldb_backend.erl index 69cdad802f..4acf628958 100644 --- a/src/riak_kv_eleveldb_backend.erl +++ b/src/riak_kv_eleveldb_backend.erl @@ -47,13 +47,12 @@ is_empty/1, status/1, callback/3]). - -export([data_size/1]). --compile({inline, [ - to_object_key/2, from_object_key/1, - to_index_key/4, from_index_key/1 - ]}). +% -compile({inline, [ +% to_object_key/2, from_object_key/1, +% to_index_key/4, from_index_key/1 +% ]}). %% Remove a few releases after 2.1 series, keeping %% around for debugging/comparison. -export([orig_to_object_key/2, orig_from_object_key/1]). @@ -461,57 +460,23 @@ range_scan(FoldIndexFun, Buffer, Opts, #state{fold_opts=_FoldOpts, ref=Ref}) -> {_, Bucket, Qry} = proplists:lookup(index, Opts), ?SQL_SELECT{'WHERE' = W, - helper_mod = Mod, - local_key = LK, - partition_key = #key_v1{ast = PKAST}} = Qry, - %% Work out what the elements of the local key are after the partitioning key. - %% Used below to add dummy fields to pad out the StartK2/EndK2 fields. - LKAST = LK#key_v1.ast, - PKASTLen = length(PKAST), - ExtraLK = - case length(LKAST) > PKASTLen of - true -> - lists:nthtail(PKASTLen, LKAST); - false -> - [] - end, - %% this is all super-fugly + helper_mod = _Mod, + local_key = #key_v1{ast = LKAST}, + partition_key = #key_v1{ast = _PKAST}} = Qry, {startkey, StartK} = proplists:lookup(startkey, W), {endkey, EndK} = proplists:lookup(endkey, W), - {filter, Filter} = proplists:lookup(filter, W), - StartInclusive = case proplists:lookup(start_inclusive, W) of - none -> []; - STuple -> [STuple] - end, - EndInclusive = case proplists:lookup(end_inclusive, W) of - none -> []; - ETuple -> [ETuple] - end, - AdditionalOptions = lists:flatten(StartInclusive ++ EndInclusive), - AdditionalOptions2 = - case Filter of - [] -> AdditionalOptions; - _ -> [{range_filter, Filter} | AdditionalOptions] - end, - %% Pad with missing local keys after the timestamp - use minimum erlang term value of '0' to make - %% sure we start the search at the beginning. - StartK2 = [{Field, Val} || {Field, _Type, Val} <- StartK] ++ [{N, 0} || #param_v1{name = [N]} <- ExtraLK], - StartK3 = riak_ql_ddl:make_key(Mod, LK, StartK2), - StartK4 = riak_kv_ts_util:encode_typeval_key(StartK3), %% TODO: Avoid adding/removing type info - StartKey = to_object_key(Bucket, StartK4), - %% Pad the missing end - not perfect because there could be longer bitstrings, but will have to do for prototype - %% TODO: FIX END KEY - EndK2 = [{Field, Val} || {Field, _Type, Val} <- EndK] ++ [{N, <<16#ffffffff:64>>} || #param_v1{name = [N]} <- ExtraLK], - EndK3 = riak_ql_ddl:make_key(Mod, LK, EndK2), - EndK4 = riak_kv_ts_util:encode_typeval_key(EndK3), - EndKey = to_object_key(Bucket, EndK4), + StartKey = key_prefix(Bucket, [element(3, E) || E <- StartK], length(LKAST)), + EndKey1 = key_prefix(Bucket, [element(3, E) || E <- EndK], length(LKAST)), + case lists:member({end_inclusive, true}, W) of + true -> EndKey2 = <>; + false -> EndKey2 = EndKey1 + end, FoldFun = fun build_list/2, Options = [ - {start_key, StartKey}, - {end_key, EndKey}, - {fold_method, streaming}, - {encoding, msgpack} | - AdditionalOptions2 + {start_key, StartKey}, + {end_key, EndKey2}, + {fold_method, streaming}, + {encoding, msgpack} | range_scan_additional_options(W) ], KeyFolder = fun() -> Vals = eleveldb:fold(Ref, FoldFun, [], Options), @@ -519,6 +484,35 @@ range_scan(FoldIndexFun, Buffer, Opts, #state{fold_opts=_FoldOpts, end, {async, KeyFolder}. +%% +range_scan_additional_options(Where) -> + Options1 = + case proplists:lookup(start_inclusive, Where) of + none -> []; + STuple -> [STuple] + end, + Options2 = + case proplists:lookup(end_inclusive, Where) of + none -> Options1; + ETuple -> [ETuple | Options1] + end, + case proplists:lookup(filter, Where) of + {filter, []} -> Options2; + {filter, Filter} -> [{range_filter, Filter} | Options2] + end. + +%% +key_prefix({TableName,_}, PK2, LocalKeyLen) -> + PK3 = PK2 ++ lists:duplicate(LocalKeyLen - length(PK2), '_'), + PKPrefix = sext:prefix(list_to_tuple(PK3)), + EncodedBucketType = EncodedBucketName = sext:encode(TableName), + <<16,0,0,0,3, %% 3-tuple - outer + 12,183,128,8, %% o-atom + 16,0,0,0,2, %% 2-tuple for bucket type/name + EncodedBucketType/binary, + EncodedBucketName/binary, + PKPrefix/binary>>. + legacy_key_fold(Ref, FoldFun, Acc, FoldOpts0, Query={index, _, _}) -> {_, FirstKey} = lists:keyfind(first_key, 1, FoldOpts0), LegacyKey = to_legacy_first_key(Query), @@ -981,21 +975,16 @@ orig_to_object_key(Bucket, Key) -> %% be round-tripped (as that would then be a binary-wrapping a sext-encoded %% TS key - for an extra 9 bytes used). %% -to_object_key({TableName, TableName}, {Family, Series, Timestamp}) -> - EncodedBucketType = % sext:encode(BucketType), - EncodedBucketName = sext:encode(TableName), - EncodedFamily = sext:encode(Family), - EncodedSeries = sext:encode(Series), - EncodedTimestamp = sext:encode(Timestamp), +to_object_key({TableName, TableName}, LocalKey) when is_tuple(LocalKey) -> + EncodedBucketType = EncodedBucketName = sext:encode(TableName), + EncodedFamily = sext:encode(LocalKey), + % format like {'o', {TableName,TableName}, LocalKeyTuple} <<16,0,0,0,3, %% 3-tuple - outer 12,183,128,8, %% o-atom 16,0,0,0,2, %% 2-tuple for bucket type/name EncodedBucketType/binary, EncodedBucketName/binary, - 16,0,0,0,3, %% 3-tuple - for time series key - EncodedFamily/binary, - EncodedSeries/binary, - EncodedTimestamp/binary>>; + EncodedFamily/binary>>; to_object_key({BucketType, BucketName}, Key) -> %% Riak 2.0 keys %% sext:encode({o, Bucket, Key}). EncodedBucketType = sext:encode(BucketType), diff --git a/src/riak_kv_pb_timeseries.erl b/src/riak_kv_pb_timeseries.erl index 59e91d25b0..6f5ed09b94 100644 --- a/src/riak_kv_pb_timeseries.erl +++ b/src/riak_kv_pb_timeseries.erl @@ -57,6 +57,8 @@ -define(E_ACTIVATE, 1017). -define(E_BAD_QUERY, 1018). -define(E_TABLE_INACTIVE, 1019). +-define(E_PARSE_ERROR, 1020). +-define(E_DELETE_NOTFOUND, 1021). -define(FETCH_RETRIES, 10). %% TODO make it configurable in tsqueryreq -define(TABLE_ACTIVATE_WAIT, 30). %% ditto @@ -87,14 +89,16 @@ decode(Code, Bin) -> Msg = riak_pb_codec:decode(Code, Bin), case Msg of #tsqueryreq{query = Q, cover_context = Cover} -> + %% convert error returns to ok's, this menas it will be passed into + %% process which will not process it and return the error. case catch decode_query(Q, Cover) of {ok, DecodedQuery} -> PermAndTarget = decode_query_permissions(DecodedQuery), {ok, DecodedQuery, PermAndTarget}; {error, Error} -> - {error, decoder_parse_error_resp(Error)}; + {ok, make_decoder_error_response(Error)}; {'EXIT', {Error, _}} -> - {error, decoder_parse_error_resp(Error)} + {ok, make_decoder_error_response(Error)} end; #tsgetreq{table = Table}-> {ok, Msg, {"riak_kv.ts_get", Table}}; @@ -144,6 +148,9 @@ encode(Message) -> -spec process(atom() | ts_requests() | ts_query_types(), #state{}) -> {reply, ts_responses(), #state{}}. +process(#rpberrorresp{} = Error, State) -> + {reply, Error, State}; + process(M = #tsputreq{table = Table}, State) -> check_table_and_call(Table, fun sub_tsputreq/4, M, State); @@ -458,7 +465,7 @@ sub_tsdelreq(Mod, DDL, #tsdelreq{table = Table, {error, {bad_key_length, Got, Need}} -> {reply, key_element_count_mismatch(Got, Need), State}; {error, notfound} -> - {reply, tsdelresp, State}; + {reply, make_rpberrresp(?E_DELETE_NOTFOUND, "notfound"), State}; {error, Reason} -> {reply, failed_delete_response(Reason), State} end. @@ -494,17 +501,17 @@ sub_tslistkeysreq(Mod, DDL, #tslistkeysreq{table = Table, sub_tscoveragereq(Mod, _DDL, #tscoveragereq{table = Table, query = Q}, State) -> - SQL = compile(Mod, catch decode_query(Q)), - Client = {riak_client, [node(), undefined]}, - - case SQL of - {error, _Error} -> + case compile(Mod, catch decode_query(Q)) of + {error, #rpberrorresp{} = Error} -> + {reply, Error, State}; + {error, _} -> {reply, make_rpberrresp( ?E_BAD_QUERY, "Failed to compile query"), State}; - _ -> + SQL -> %% SQL is a list of queries (1 per quantum) Bucket = riak_kv_ts_util:table_to_bucket(Table), + Client = {riak_client, [node(), undefined]}, convert_cover_list(sql_to_cover(Client, SQL, Bucket, []), State) end. @@ -615,9 +622,9 @@ find_hash_fn([_H|T]) -> compile(_Mod, {error, Err}) -> - {error, decoder_parse_error_resp(Err)}; + {error, make_decoder_error_response(Err)}; compile(_Mod, {'EXIT', {Err, _}}) -> - {error, decoder_parse_error_resp(Err)}; + {error, make_decoder_error_response(Err)}; compile(Mod, {ok, SQL}) -> case (catch Mod:get_ddl()) of {_, {undef, _}} -> @@ -818,20 +825,18 @@ make_tscolumndescription_list(ColumnNames, ColumnTypes) -> [#tscolumndescription{name = Name, type = riak_pb_ts_codec:encode_field_type(Type)} || {Name, Type} <- lists:zip(ColumnNames, ColumnTypes)]. - - -decoder_parse_error_resp({LineNo, riak_ql_parser, Msg}) when is_integer(LineNo) -> - flat_format("~ts", [Msg]); -decoder_parse_error_resp({Token, riak_ql_parser, _}) -> - flat_format("Unexpected token '~p'", [Token]); -decoder_parse_error_resp(Error) -> +make_decoder_error_response({LineNo, riak_ql_parser, Msg}) when is_integer(LineNo) -> + make_rpberrresp(?E_PARSE_ERROR, flat_format("~ts", [Msg])); +make_decoder_error_response({Token, riak_ql_parser, _}) when is_binary(Token) -> + make_rpberrresp(?E_PARSE_ERROR, flat_format("Unexpected token '~s'", [Token])); +make_decoder_error_response({Token, riak_ql_parser, _}) -> + make_rpberrresp(?E_PARSE_ERROR, flat_format("Unexpected token '~p'", [Token])); +make_decoder_error_response(Error) -> Error. - flat_format(Format, Args) -> lists:flatten(io_lib:format(Format, Args)). - -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). diff --git a/src/riak_kv_qry.erl b/src/riak_kv_qry.erl index a115a740a1..5382129220 100644 --- a/src/riak_kv_qry.erl +++ b/src/riak_kv_qry.erl @@ -26,7 +26,8 @@ -module(riak_kv_qry). -export([ - submit/2 + submit/2, + format_query_syntax_errors/1 ]). -include_lib("riak_ql/include/riak_ql_ddl.hrl"). diff --git a/src/riak_kv_qry_compiler.erl b/src/riak_kv_qry_compiler.erl index d5928d2f6d..e51e16df2f 100644 --- a/src/riak_kv_qry_compiler.erl +++ b/src/riak_kv_qry_compiler.erl @@ -171,8 +171,7 @@ compile_select_clause(DDL, ?SQL_SELECT{'SELECT' = #riak_sel_clause_v1{ clause = col_names = get_col_names(DDL, Q), col_return_types = lists:flatten(ColTypes) }}; [_|_] -> - - {error, lists:reverse(Errors)} + {error, {invalid_query, riak_kv_qry:format_query_syntax_errors(lists:reverse(Errors))}} end. %% @@ -401,7 +400,7 @@ infer_op_type(_, T1, T2) when T1 == double andalso T2 == sint64; T1 == sint64 andalso T2 == double -> double; infer_op_type(Op, T1, T2) -> - {error, {invalid_type, Op, T1, T2}}. + {error, {operator_type_mismatch, Op, T1, T2}}. %% compile_select_col_stateless2('+', A, B) -> @@ -1938,8 +1937,9 @@ compile_query_with_function_type_error_1_test() -> "SELECT SUM(location) FROM GeoCheckin " "WHERE time > 5000 AND time < 10000" "AND user = 'user_1' AND location = 'derby'"), + io:format(user, "~p", [compile(get_standard_ddl(), Q, 100)]), ?assertEqual( - {error, [{invalid_function_call,'SUM',[varchar]}]}, + {error,{invalid_query,<<"\nFunction 'SUM' called with arguments of the wrong type [varchar].">>}}, compile(get_standard_ddl(), Q, 100) ). @@ -1948,10 +1948,10 @@ compile_query_with_function_type_error_2_test() -> "SELECT SUM(location), AVG(location) FROM GeoCheckin " "WHERE time > 5000 AND time < 10000" "AND user = 'user_1' AND location = 'derby'"), + io:format(user, "~p", [compile(get_standard_ddl(), Q, 100)]), ?assertEqual( - {error, [ - {invalid_function_call,'SUM',[varchar]}, - {invalid_function_call,'AVG',[varchar]}]}, + {error,{invalid_query,<<"\nFunction 'SUM' called with arguments of the wrong type [varchar].\n" + "Function 'AVG' called with arguments of the wrong type [varchar].">>}}, compile(get_standard_ddl(), Q, 100) ). @@ -1961,7 +1961,7 @@ compile_query_with_function_type_error_3_test() -> "WHERE time > 5000 AND time < 10000" "AND user = 'user_1' AND location = 'derby'"), ?assertEqual( - {error, [{invalid_type,'+',varchar,sint64}]}, + {error,{invalid_query,<<"\nOperator '+' called with mismatched types [varchar vs sint64].">>}}, compile(get_standard_ddl(), Q, 100) ). @@ -1970,8 +1970,9 @@ compile_query_with_arithmetic_type_error_1_test() -> "SELECT location + 1 FROM GeoCheckin " "WHERE time > 5000 AND time < 10000" "AND user = 'user_1' AND location = 'derby'"), + io:format(user, "~p", [compile(get_standard_ddl(), Q, 100)]), ?assertEqual( - {error, [{invalid_type,'+',varchar,sint64}]}, + {error,{invalid_query,<<"\nOperator '+' called with mismatched types [varchar vs sint64].">>}}, compile(get_standard_ddl(), Q, 100) ). @@ -1980,8 +1981,9 @@ compile_query_with_arithmetic_type_error_2_test() -> "SELECT 2*(location + 1) FROM GeoCheckin " "WHERE time > 5000 AND time < 10000" "AND user = 'user_1' AND location = 'derby'"), + io:format(user, "~p", [compile(get_standard_ddl(), Q, 100)]), ?assertEqual( - {error, [{invalid_type,'+',varchar,sint64}]}, + {error,{invalid_query,<<"\nOperator '+' called with mismatched types [varchar vs sint64].">>}}, compile(get_standard_ddl(), Q, 100) ). diff --git a/src/riak_kv_qry_worker.erl b/src/riak_kv_qry_worker.erl index e5623f6ec0..fb6729c7ff 100644 --- a/src/riak_kv_qry_worker.erl +++ b/src/riak_kv_qry_worker.erl @@ -219,7 +219,6 @@ add_subquery_result(SubQId, Chunk, State#state{status = accumulating_chunks, result = QueryResult2, sub_qrys = NSubQ} - catch error:divide_by_zero -> cancel_error_query(divide_by_zero, State) @@ -242,12 +241,10 @@ cancel_error_query(Error, #state{ receiver_pid = ReceiverPid, subqueries_done(QId, #state{qid = QId, receiver_pid = ReceiverPid, - result = QueryResult1, - sub_qrys = SubQQ, - qry = ?SQL_SELECT{'SELECT' = Sel }} = State) -> + sub_qrys = SubQQ} = State) -> case SubQQ of [] -> - QueryResult2 = prepare_final_results(Sel, QueryResult1), + QueryResult2 = prepare_final_results(State), % send the results to the waiting client process ReceiverPid ! {ok, QueryResult2}, pop_next_query(), @@ -258,19 +255,26 @@ subqueries_done(QId, State end. --spec prepare_final_results(#riak_sel_clause_v1{}, [{non_neg_integer(), list()}]) -> +-spec prepare_final_results(#state{}) -> {[riak_pb_ts_codec:tscolumnname()], [riak_pb_ts_codec:tscolumntype()], [[riak_pb_ts_codec:ldbvalue()]]}. -prepare_final_results(#riak_sel_clause_v1{calc_type = rows} = Select, - IndexedChunks) -> +prepare_final_results(#state{ + result = IndexedChunks, + qry = ?SQL_SELECT{'SELECT' = #riak_sel_clause_v1{calc_type = rows} = Select }}) -> %% sort by index, to reassemble according to coverage plan {_, R2} = lists:unzip(lists:sort(IndexedChunks)), prepare_final_results2(Select, lists:append(R2)); -prepare_final_results(#riak_sel_clause_v1{ calc_type = aggregate } = Select, - Aggregate1) -> - Aggregate2 = riak_kv_qry_compiler:finalise_aggregate(Select, Aggregate1), - prepare_final_results2(Select, [Aggregate2]). +prepare_final_results(#state{ + result = Aggregate1, + qry = ?SQL_SELECT{'SELECT' = #riak_sel_clause_v1{calc_type = aggregate} = Select }} = State) -> + try + Aggregate2 = riak_kv_qry_compiler:finalise_aggregate(Select, Aggregate1), + prepare_final_results2(Select, [Aggregate2]) + catch + error:divide_by_zero -> + cancel_error_query(divide_by_zero, State) + end. %% prepare_final_results2(#riak_sel_clause_v1{ col_return_types = ColTypes, @@ -287,14 +291,20 @@ prepare_final_results2(#riak_sel_clause_v1{ col_return_types = ColTypes, prepare_final_results_test() -> Rows = [[12, <<"windy">>], [13, <<"windy">>]], - IndexedChunks = [{1, Rows}], + % IndexedChunks = [{1, Rows}], ?assertEqual( {[<<"a">>, <<"b">>], [sint64, varchar], Rows}, prepare_final_results( - #riak_sel_clause_v1{ - col_names = [<<"a">>, <<"b">>], - col_return_types = [sint64, varchar], - calc_type = rows }, IndexedChunks) + #state{ + qry = + ?SQL_SELECT{ + 'SELECT' = #riak_sel_clause_v1{ + col_names = [<<"a">>, <<"b">>], + col_return_types = [sint64, varchar], + calc_type = rows + } + }, + result = [{1, Rows}]}) ). -endif. From 81d612cd1f0d6541536f449c692fcf7b69b6d230 Mon Sep 17 00:00:00 2001 From: andytill Date: Tue, 9 Feb 2016 11:22:38 +0000 Subject: [PATCH 008/122] Add tuple checks to confirm that a key is a pk/lk and not just a two element local key. --- src/riak_kv_w1c_worker.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/riak_kv_w1c_worker.erl b/src/riak_kv_w1c_worker.erl index 701b98833a..c59bfaae18 100644 --- a/src/riak_kv_w1c_worker.erl +++ b/src/riak_kv_w1c_worker.erl @@ -112,7 +112,7 @@ put(RObj0, Options) -> Preflist :: term()) -> {ok, {reference(), atom()}}. -async_put(RObj, W, PW, Bucket, NVal, {_PK, LK}, EncodeFn, Preflist) -> +async_put(RObj, W, PW, Bucket, NVal, {_PK, LK}, EncodeFn, Preflist) when is_tuple(LK) -> async_put(RObj, W, PW, Bucket, NVal, LK, EncodeFn, Preflist); async_put(RObj, W, PW, Bucket, NVal, LocalKey, EncodeFn, Preflist) -> StartTS = os:timestamp(), @@ -408,7 +408,7 @@ find_put_timeout(Options) -> ?DEFAULT_TIMEOUT end. -chash_key(Bucket, {PartitionKey, _LocalKey}, BucketProps) -> +chash_key(Bucket, {PartitionKey, LocalKey}, BucketProps) when is_tuple(LocalKey) -> riak_core_util:chash_key({Bucket, PartitionKey}, BucketProps); chash_key(Bucket, Key, BucketProps) -> riak_core_util:chash_key({Bucket, Key}, BucketProps). From d6ffc3a4fbcf01a331cc61855b5479b6f44160c8 Mon Sep 17 00:00:00 2001 From: andytill Date: Wed, 10 Feb 2016 13:34:08 +0000 Subject: [PATCH 009/122] Changed tests that checked that short keys failed to check that they pass. --- src/riak_kv_console.erl | 29 ++++++++++--------- src/riak_kv_eleveldb_backend.erl | 5 ++-- src/riak_kv_qry_compiler.erl | 2 +- src/riak_kv_ts_util.erl | 48 ++++++++++++++++---------------- src/riak_kv_w1c_worker.erl | 2 +- 5 files changed, 43 insertions(+), 43 deletions(-) diff --git a/src/riak_kv_console.erl b/src/riak_kv_console.erl index 6e04f64f06..81b869ef57 100644 --- a/src/riak_kv_console.erl +++ b/src/riak_kv_console.erl @@ -978,22 +978,22 @@ bucket_type_create_with_timeseries_table_error_when_write_once_set_to_false_test ). bucket_type_create_with_timeseries_table_error_with_short_primary_key_test() -> - Ref = make_ref(), TableDef = <<"CREATE TABLE my_type (", - "user varchar not null, ", - "time timestamp not null, ", - "PRIMARY KEY ((user, quantum(time, 15, m)), " - "user, time))">>, + "user varchar not null, ", + "time timestamp not null, ", + "PRIMARY KEY ((user, quantum(time, 15, m)), user, time))">>, JSON = json_props([{bucket_type, my_type}, {table_def, TableDef}]), - ?assertEqual( - error, - bucket_type_create( - fun(Props) -> put(Ref, Props) end, - <<"my_type">>, - mochijson2:decode(JSON) - ) + Result = bucket_type_create( + fun(Props) -> Props end, + <<"my_type">>, + mochijson2:decode(JSON) + ), + % just assert that this returns a ddl prop + ?assertMatch( + [{ddl, _}|_], + Result ). bucket_type_create_with_timeseries_table_error_with_misplaced_quantum_test() -> @@ -1024,10 +1024,9 @@ bucket_type_and_table_error_local_key_test() -> "user varchar not null, ", "time timestamp not null, ", "other varchar not null, ", - "PRIMARY KEY ((series, user, quantum(time, 15, m)), " - "series, user, time, other))">>, + "PRIMARY KEY ((series, user, quantum(time, 15, m)), seriesd, user, time, other))">>, JSON = json_props([{bucket_type, my_type}, - {table_def, TableDef}]), + {table_def, TableDef}]), ?assertEqual( error, bucket_type_create( diff --git a/src/riak_kv_eleveldb_backend.erl b/src/riak_kv_eleveldb_backend.erl index 4acf628958..95b184d10b 100644 --- a/src/riak_kv_eleveldb_backend.erl +++ b/src/riak_kv_eleveldb_backend.erl @@ -465,8 +465,9 @@ range_scan(FoldIndexFun, Buffer, Opts, #state{fold_opts=_FoldOpts, partition_key = #key_v1{ast = _PKAST}} = Qry, {startkey, StartK} = proplists:lookup(startkey, W), {endkey, EndK} = proplists:lookup(endkey, W), - StartKey = key_prefix(Bucket, [element(3, E) || E <- StartK], length(LKAST)), - EndKey1 = key_prefix(Bucket, [element(3, E) || E <- EndK], length(LKAST)), + LocalKeyLen = length(LKAST), + StartKey = key_prefix(Bucket, [element(3, E) || E <- StartK], LocalKeyLen), + EndKey1 = key_prefix(Bucket, [element(3, E) || E <- EndK], LocalKeyLen), case lists:member({end_inclusive, true}, W) of true -> EndKey2 = <>; false -> EndKey2 = EndKey1 diff --git a/src/riak_kv_qry_compiler.erl b/src/riak_kv_qry_compiler.erl index 96a5b4c7e0..46b81804ed 100644 --- a/src/riak_kv_qry_compiler.erl +++ b/src/riak_kv_qry_compiler.erl @@ -1999,7 +1999,7 @@ flexible_keys_1_test() -> "PRIMARY KEY ((a1, quantum(a, 15, 's')), a1, a, b, c, d))"), {ok, Q} = get_query( "SELECT * FROM tab4 WHERE a > 0 AND a < 1000 AND a1 = 1"), - ?assertEqual( + ?assertMatch( {ok, [#riak_select_v1{}]}, compile(DDL, Q, 100) ). diff --git a/src/riak_kv_ts_util.erl b/src/riak_kv_ts_util.erl index 33afea05cc..eb146c41ab 100644 --- a/src/riak_kv_ts_util.erl +++ b/src/riak_kv_ts_util.erl @@ -231,30 +231,30 @@ make_ts_keys_1_test() -> ). % a two element key, still using the table definition field order -% make_ts_keys_2_test() -> -% {DDL, Mod} = helper_compile_def_to_module( -% "CREATE TABLE table1 (" -% "a SINT64 NOT NULL, " -% "b TIMESTAMP NOT NULL, " -% "c SINT64 NOT NULL, " -% "PRIMARY KEY((a, quantum(b, 15, 's')), a, b))"), -% ?assertEqual( -% {ok, {{1,0}, {1,2}}}, -% make_ts_keys([1,2], DDL, Mod) -% ). - -% make_ts_keys_3_test() -> -% {DDL, Mod} = helper_compile_def_to_module( -% "CREATE TABLE table2 (" -% "a SINT64 NOT NULL, " -% "b SINT64 NOT NULL, " -% "c TIMESTAMP NOT NULL, " -% "d SINT64 NOT NULL, " -% "PRIMARY KEY ((d,a,quantum(c, 1, 's')), d,a,c))"), -% ?assertEqual( -% {ok, {{10,20,0}, {10,20,1}}}, -% make_ts_keys([10,20,1], DDL, Mod) -% ). +make_ts_keys_2_test() -> + {DDL, Mod} = helper_compile_def_to_module( + "CREATE TABLE table1 (" + "a SINT64 NOT NULL, " + "b TIMESTAMP NOT NULL, " + "c SINT64 NOT NULL, " + "PRIMARY KEY((a, quantum(b, 15, 's')), a, b))"), + ?assertEqual( + {ok, {{1,0}, {1,2}}}, + make_ts_keys([1,2], DDL, Mod) + ). + +make_ts_keys_3_test() -> + {DDL, Mod} = helper_compile_def_to_module( + "CREATE TABLE table2 (" + "a SINT64 NOT NULL, " + "b SINT64 NOT NULL, " + "c TIMESTAMP NOT NULL, " + "d SINT64 NOT NULL, " + "PRIMARY KEY ((d,a,quantum(c, 1, 's')), d,a,c))"), + ?assertEqual( + {ok, {{10,20,0}, {10,20,1}}}, + make_ts_keys([10,20,1], DDL, Mod) + ). make_ts_keys_4_test() -> {DDL, Mod} = helper_compile_def_to_module( diff --git a/src/riak_kv_w1c_worker.erl b/src/riak_kv_w1c_worker.erl index c59bfaae18..ce74195cf7 100644 --- a/src/riak_kv_w1c_worker.erl +++ b/src/riak_kv_w1c_worker.erl @@ -408,7 +408,7 @@ find_put_timeout(Options) -> ?DEFAULT_TIMEOUT end. -chash_key(Bucket, {PartitionKey, LocalKey}, BucketProps) when is_tuple(LocalKey) -> +chash_key(Bucket, {PartitionKey, _LocalKey}, BucketProps) -> riak_core_util:chash_key({Bucket, PartitionKey}, BucketProps); chash_key(Bucket, Key, BucketProps) -> riak_core_util:chash_key({Bucket, Key}, BucketProps). From 7bd3ecc7bab5b42015dc5301ac3ca998ae9245e1 Mon Sep 17 00:00:00 2001 From: andytill Date: Fri, 12 Feb 2016 15:27:56 +0000 Subject: [PATCH 010/122] Support partition keys with no quanta. --- src/riak_kv_qry_compiler.erl | 140 +++++++++++++++++++++++++++-------- 1 file changed, 110 insertions(+), 30 deletions(-) diff --git a/src/riak_kv_qry_compiler.erl b/src/riak_kv_qry_compiler.erl index 46b81804ed..7749b53677 100644 --- a/src/riak_kv_qry_compiler.erl +++ b/src/riak_kv_qry_compiler.erl @@ -33,6 +33,13 @@ -include("riak_kv_index.hrl"). -include("riak_kv_ts_error_msgs.hrl"). +-type where_props() :: [{startkey, [term()]} | + {endkey, [term()]} | + {filter, [term()]} | + {start_inclusive, boolean()} | + {end_inclusive, boolean()}]. +-export_type([where_props/0]). + %% 3rd argument is undefined if we should not be concerned about the %% maximum number of quanta -spec compile(#ddl_v1{}, ?SQL_SELECT{}, 'undefined'|pos_integer()) -> @@ -436,7 +443,24 @@ col_index_and_type_of(Fields, ColumnName) -> {Position, Type} end. -expand_where(Where, #key_v1{ast = PAST}, MaxSubQueries) -> +%% +-spec expand_where(filter(), #key_v1{}, integer()) -> + [where_props()] | {error, any()}. +expand_where(Where, PartitionKey, MaxSubQueries) -> + case find_quanta_function_in_key(PartitionKey) of + [{[QField], QSize, QUnit}] -> + hash_timestamp_to_quanta(QField, QSize, QUnit, MaxSubQueries, Where); + [] -> + [Where] + end. + +%% +find_quanta_function_in_key(#key_v1{ ast = PKAST }) -> + [{X, Y, Z} || #hash_fn_v1{mod = riak_ql_quanta, fn = quantum, + args = [#param_v1{name = X}, Y, Z]} <- PKAST]. + +%% +hash_timestamp_to_quanta(QField, QSize, QUnit, MaxSubQueries, Where) -> GetMaxMinFun = fun({startkey, List}, {_S, E}) -> {element(3, lists:last(List)), E}; ({endkey, List}, {S, _E}) -> @@ -445,24 +469,16 @@ expand_where(Where, #key_v1{ast = PAST}, MaxSubQueries) -> {S, E} end, {Min, Max} = lists:foldl(GetMaxMinFun, {"", ""}, Where), - [{[QField], Q, U}] = [{X, Y, Z} - || #hash_fn_v1{mod = riak_ql_quanta, - fn = quantum, - args = [#param_v1{name = X}, Y, Z]} - <- PAST], EffMin = case proplists:get_value(start_inclusive, Where, true) of - true -> - Min; - _ -> - Min + 1 + true -> Min; + false -> Min + 1 end, EffMax = case proplists:get_value(end_inclusive, Where, false) of - true -> - Max + 1; - _ -> - Max + true -> Max + 1; + false -> Max end, - {NoSubQueries, Boundaries} = riak_ql_quanta:quanta(EffMin, EffMax, Q, U), + {NoSubQueries, Boundaries} = + riak_ql_quanta:quanta(EffMin, EffMax, QSize, QUnit), if NoSubQueries == 1 -> [Where]; @@ -517,11 +533,13 @@ compile_where(DDL, Where) -> {true, NewW} -> NewW end. -quantum_field_name(#ddl_v1{ partition_key = PK }) -> - #key_v1{ ast = PartitionKeyAST } = PK, - Quantum = lists:last(PartitionKeyAST), - #hash_fn_v1{args = [#param_v1{name = QFieldName} | _]} = Quantum, - QFieldName. +quantum_field_name(#ddl_v1{ partition_key = #key_v1{ ast = PKAST } }) -> + case lists:last(PKAST) of + #hash_fn_v1{args = [#param_v1{name = [QFieldName]} | _]} -> + QFieldName; + #param_v1{} -> + no_quanta + end. check_if_timeseries(#ddl_v1{table = T, partition_key = PK, local_key = LK0} = DDL, [W]) -> @@ -529,9 +547,10 @@ check_if_timeseries(#ddl_v1{table = T, partition_key = PK, local_key = LK0} = DD #key_v1{ast = PartitionKeyAST} = PK, PartitionFields = [X || #param_v1{name = X} <- PartitionKeyAST], LK = LK0#key_v1{ast = lists:sublist(LK0#key_v1.ast, length(PartitionKeyAST))}, - [QuantumFieldName] = quantum_field_name(DDL), + QuantumFieldName = quantum_field_name(DDL), StrippedW = strip(W, []), - {StartW, EndW, Filter} = break_out_timeseries(StrippedW, PartitionFields, [QuantumFieldName]), + {StartW, EndW, Filter} = + break_out_timeseries(StrippedW, PartitionFields, QuantumFieldName), Mod = riak_ql_ddl:make_module_name(T), StartKey = rewrite(LK, StartW, Mod), EndKey = rewrite(LK, EndW, Mod), @@ -593,7 +612,7 @@ includes([{Op1, Field, _} | T], Op2, Mod) -> end. %% find the upper and lower bound for the time -find_timestamp_bounds(QuantumField, LocalFields) when is_binary(QuantumField) -> +find_timestamp_bounds(QuantumField, LocalFields) -> find_timestamp_bounds2(QuantumField, LocalFields, [], {undefined, undefined}). %% @@ -630,8 +649,11 @@ acc_upper_bounds(_Filter, {_, _U}) -> error({upper_bound_specified_more_than_once, ?E_TSMSG_DUPLICATE_UPPER_BOUND}). %% -break_out_timeseries(Filters1, PartitionFields1, [QuantumFields]) -> - case find_timestamp_bounds(QuantumFields, Filters1) of +break_out_timeseries(Filters1, PartitionFields1, no_quanta) -> + {Body, Filters2} = split_key_from_filters(PartitionFields1, Filters1), + {Body, Body, Filters2}; +break_out_timeseries(Filters1, PartitionFields1, QuantumField) when is_binary(QuantumField) -> + case find_timestamp_bounds(QuantumField, Filters1) of {_, {undefined, undefined}} -> error({incomplete_where_clause, ?E_TSMSG_NO_BOUNDS_SPECIFIED}); {_, {_, undefined}} -> @@ -641,6 +663,7 @@ break_out_timeseries(Filters1, PartitionFields1, [QuantumFields]) -> {_, {{_,_,{_,Starts}}, {_,_,{_,Ends}}}} when is_integer(Starts), is_integer(Ends), Starts > Ends -> + %% FIXME reliance on three element key!? error({lower_bound_must_be_less_than_upper_bound, ?E_TSMSG_LOWER_BOUND_MUST_BE_LESS_THAN_UPPER_BOUND}); {_, {{'>',_,{_,Starts}}, {'<',_,{_,Ends}}}} when is_integer(Starts), @@ -1937,7 +1960,6 @@ compile_query_with_function_type_error_1_test() -> "SELECT SUM(location) FROM GeoCheckin " "WHERE time > 5000 AND time < 10000" "AND user = 'user_1' AND location = 'derby'"), - io:format(user, "~p", [compile(get_standard_ddl(), Q, 100)]), ?assertEqual( {error,{invalid_query,<<"\nFunction 'SUM' called with arguments of the wrong type [varchar].">>}}, compile(get_standard_ddl(), Q, 100) @@ -1948,7 +1970,6 @@ compile_query_with_function_type_error_2_test() -> "SELECT SUM(location), AVG(location) FROM GeoCheckin " "WHERE time > 5000 AND time < 10000" "AND user = 'user_1' AND location = 'derby'"), - io:format(user, "~p", [compile(get_standard_ddl(), Q, 100)]), ?assertEqual( {error,{invalid_query,<<"\nFunction 'SUM' called with arguments of the wrong type [varchar].\n" "Function 'AVG' called with arguments of the wrong type [varchar].">>}}, @@ -1970,7 +1991,6 @@ compile_query_with_arithmetic_type_error_1_test() -> "SELECT location + 1 FROM GeoCheckin " "WHERE time > 5000 AND time < 10000" "AND user = 'user_1' AND location = 'derby'"), - io:format(user, "~p", [compile(get_standard_ddl(), Q, 100)]), ?assertEqual( {error,{invalid_query,<<"\nOperator '+' called with mismatched types [varchar vs sint64].">>}}, compile(get_standard_ddl(), Q, 100) @@ -1981,13 +2001,11 @@ compile_query_with_arithmetic_type_error_2_test() -> "SELECT 2*(location + 1) FROM GeoCheckin " "WHERE time > 5000 AND time < 10000" "AND user = 'user_1' AND location = 'derby'"), - io:format(user, "~p", [compile(get_standard_ddl(), Q, 100)]), ?assertEqual( {error,{invalid_query,<<"\nOperator '+' called with mismatched types [varchar vs sint64].">>}}, compile(get_standard_ddl(), Q, 100) ). -%% TODO MOAR tests flexible_keys_1_test() -> DDL = get_ddl( "CREATE TABLE tab4(" @@ -2004,4 +2022,66 @@ flexible_keys_1_test() -> compile(DDL, Q, 100) ). +quantum_field_name_test() -> + DDL = get_ddl( + "CREATE TABLE tab1(" + "a SINT64 NOT NULL, " + "b TIMESTAMP NOT NULL, " + "PRIMARY KEY ((a,quantum(b, 15, 's')), a,b))"), + ?assertEqual( + <<"b">>, + quantum_field_name(DDL) + ). + +quantum_field_name_no_quanta_test() -> + DDL = get_ddl( + "CREATE TABLE tab1(" + "a SINT64 NOT NULL, " + "b TIMESTAMP NOT NULL, " + "PRIMARY KEY ((a,b), a,b))"), + ?assertEqual( + no_quanta, + quantum_field_name(DDL) + ). + +% short key, partition and local keys are the same +no_quantum_in_query_1_test() -> + DDL = get_ddl( + "CREATE TABLE tabab(" + "a TIMESTAMP NOT NULL, " + "b VARCHAR NOT NULL, " + "PRIMARY KEY ((a,b), a,b))"), + {ok, Q} = get_query( + "SELECT * FROM tab1 WHERE a = 1 AND b = 1"), + ?assertMatch( + {ok, [#riak_select_v1{ + 'WHERE' = + [{startkey,[{<<"a">>,timestamp,1},{<<"b">>,varchar,1}]}, + {endkey, [{<<"a">>,timestamp,1},{<<"b">>,varchar,1}]}, + {filter,[]}, + {end_inclusive,true}] }]}, + compile(DDL, Q, 100) + ). + +no_quantum_in_query_2_test() -> + DDL = get_ddl( + "CREATE TABLE tabab(" + "a SINT64 NOT NULL, " + "b VARCHAR NOT NULL, " + "c DOUBLE NOT NULL, " + "d BOOLEAN NOT NULL, " + "PRIMARY KEY ((c,a,b), c,a,b,d))"), + {ok, Q} = get_query( + "SELECT * FROM tab1 WHERE a = 1000 AND b = 'bval' AND c = 3.5"), + {ok, [Select]} = compile(DDL, Q, 100), + Key = + [{<<"c">>,double,3.5}, {<<"a">>,sint64,1000},{<<"b">>,varchar,<<"bval">>}], + ?assertEqual( + [{startkey, Key}, + {endkey, Key}, + {filter,[]}, + {end_inclusive,true}], + Select#riak_select_v1.'WHERE' + ). + -endif. From 8819d264b4cb1c5d86096e4f8ec4774937cd1a69 Mon Sep 17 00:00:00 2001 From: andytill Date: Mon, 15 Feb 2016 15:59:16 +0000 Subject: [PATCH 011/122] No quantum in parttion key test. --- src/riak_kv_qry_compiler.erl | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/src/riak_kv_qry_compiler.erl b/src/riak_kv_qry_compiler.erl index 7749b53677..b077028b2c 100644 --- a/src/riak_kv_qry_compiler.erl +++ b/src/riak_kv_qry_compiler.erl @@ -2044,7 +2044,7 @@ quantum_field_name_no_quanta_test() -> quantum_field_name(DDL) ). -% short key, partition and local keys are the same +%% short key, partition and local keys are the same no_quantum_in_query_1_test() -> DDL = get_ddl( "CREATE TABLE tabab(" @@ -2063,6 +2063,7 @@ no_quantum_in_query_1_test() -> compile(DDL, Q, 100) ). +%% partition and local key are different no_quantum_in_query_2_test() -> DDL = get_ddl( "CREATE TABLE tabab(" @@ -2072,7 +2073,7 @@ no_quantum_in_query_2_test() -> "d BOOLEAN NOT NULL, " "PRIMARY KEY ((c,a,b), c,a,b,d))"), {ok, Q} = get_query( - "SELECT * FROM tab1 WHERE a = 1000 AND b = 'bval' AND c = 3.5"), + "SELECT * FROM tabab WHERE a = 1000 AND b = 'bval' AND c = 3.5"), {ok, [Select]} = compile(DDL, Q, 100), Key = [{<<"c">>,double,3.5}, {<<"a">>,sint64,1000},{<<"b">>,varchar,<<"bval">>}], @@ -2084,4 +2085,26 @@ no_quantum_in_query_2_test() -> Select#riak_select_v1.'WHERE' ). + +no_quantum_in_query_3_test() -> + DDL = get_ddl( + "CREATE TABLE tababa(" + "a SINT64 NOT NULL, " + "b VARCHAR NOT NULL, " + "c DOUBLE NOT NULL, " + "d BOOLEAN NOT NULL, " + "PRIMARY KEY ((c,a,b), c,a,b,d))"), + {ok, Q} = get_query( + "SELECT * FROM tababa WHERE a = 1000 AND b = 'bval' AND c = 3.5 AND d = true"), + {ok, [Select]} = compile(DDL, Q, 100), + Key = + [{<<"c">>,double,3.5}, {<<"a">>,sint64,1000},{<<"b">>,varchar,<<"bval">>}], + ?assertEqual( + [{startkey, Key}, + {endkey, Key}, + {filter,{'=',{field,<<"d">>,boolean},{const, true}}}, + {end_inclusive,true}], + Select#riak_select_v1.'WHERE' + ). + -endif. From aefb8609990c446a0152f3ad8c2bfb5128292f3b Mon Sep 17 00:00:00 2001 From: andytill Date: Thu, 18 Feb 2016 11:55:47 +0000 Subject: [PATCH 012/122] More tests, clean up from code review. --- src/riak_kv_console.erl | 2 +- src/riak_kv_eleveldb_backend.erl | 16 +++++----- src/riak_kv_qry_compiler.erl | 51 +++++++++++++++++++++++++++++--- 3 files changed, 56 insertions(+), 13 deletions(-) diff --git a/src/riak_kv_console.erl b/src/riak_kv_console.erl index 81b869ef57..1532f36a9a 100644 --- a/src/riak_kv_console.erl +++ b/src/riak_kv_console.erl @@ -977,7 +977,7 @@ bucket_type_create_with_timeseries_table_error_when_write_once_set_to_false_test ) ). -bucket_type_create_with_timeseries_table_error_with_short_primary_key_test() -> +bucket_type_create_with_timeseries_table_with_two_element_key_test() -> TableDef = <<"CREATE TABLE my_type (", "user varchar not null, ", diff --git a/src/riak_kv_eleveldb_backend.erl b/src/riak_kv_eleveldb_backend.erl index 95b184d10b..0787f7d388 100644 --- a/src/riak_kv_eleveldb_backend.erl +++ b/src/riak_kv_eleveldb_backend.erl @@ -49,10 +49,10 @@ callback/3]). -export([data_size/1]). -% -compile({inline, [ -% to_object_key/2, from_object_key/1, -% to_index_key/4, from_index_key/1 -% ]}). +-compile({inline, [ + to_object_key/2, from_object_key/1, + to_index_key/4, from_index_key/1 + ]}). %% Remove a few releases after 2.1 series, keeping %% around for debugging/comparison. -export([orig_to_object_key/2, orig_from_object_key/1]). @@ -466,8 +466,8 @@ range_scan(FoldIndexFun, Buffer, Opts, #state{fold_opts=_FoldOpts, {startkey, StartK} = proplists:lookup(startkey, W), {endkey, EndK} = proplists:lookup(endkey, W), LocalKeyLen = length(LKAST), - StartKey = key_prefix(Bucket, [element(3, E) || E <- StartK], LocalKeyLen), - EndKey1 = key_prefix(Bucket, [element(3, E) || E <- EndK], LocalKeyLen), + StartKey = key_prefix(Bucket, [Value || {_Name,_Type,Value} <- StartK], LocalKeyLen), + EndKey1 = key_prefix(Bucket, [Value || {_Name,_Type,Value} <- EndK], LocalKeyLen), case lists:member({end_inclusive, true}, W) of true -> EndKey2 = <>; false -> EndKey2 = EndKey1 @@ -978,14 +978,14 @@ orig_to_object_key(Bucket, Key) -> %% to_object_key({TableName, TableName}, LocalKey) when is_tuple(LocalKey) -> EncodedBucketType = EncodedBucketName = sext:encode(TableName), - EncodedFamily = sext:encode(LocalKey), + EncodedLocalKey = sext:encode(LocalKey), % format like {'o', {TableName,TableName}, LocalKeyTuple} <<16,0,0,0,3, %% 3-tuple - outer 12,183,128,8, %% o-atom 16,0,0,0,2, %% 2-tuple for bucket type/name EncodedBucketType/binary, EncodedBucketName/binary, - EncodedFamily/binary>>; + EncodedLocalKey/binary>>; to_object_key({BucketType, BucketName}, Key) -> %% Riak 2.0 keys %% sext:encode({o, Bucket, Key}). EncodedBucketType = sext:encode(BucketType), diff --git a/src/riak_kv_qry_compiler.erl b/src/riak_kv_qry_compiler.erl index b077028b2c..fd5b718c93 100644 --- a/src/riak_kv_qry_compiler.erl +++ b/src/riak_kv_qry_compiler.erl @@ -663,7 +663,6 @@ break_out_timeseries(Filters1, PartitionFields1, QuantumField) when is_binary(Qu {_, {{_,_,{_,Starts}}, {_,_,{_,Ends}}}} when is_integer(Starts), is_integer(Ends), Starts > Ends -> - %% FIXME reliance on three element key!? error({lower_bound_must_be_less_than_upper_bound, ?E_TSMSG_LOWER_BOUND_MUST_BE_LESS_THAN_UPPER_BOUND}); {_, {{'>',_,{_,Starts}}, {'<',_,{_,Ends}}}} when is_integer(Starts), @@ -675,9 +674,6 @@ break_out_timeseries(Filters1, PartitionFields1, QuantumField) when is_binary(Qu error({lower_and_upper_bounds_are_equal_when_no_equals_operator, ?E_TSMSG_LOWER_AND_UPPER_BOUNDS_ARE_EQUAL_WHEN_NO_EQUALS_OPERATOR}); {Filters2, {Starts, Ends}} -> - %% remove the quanta from the local fields, this has already been - %% removed from the fields - % PartitionFields2 = lists:sublist(PartitionFields1, length(PartitionFields1)), % TODO DUUUUDE %% create the keys by splitting the key filters and prepending it %% with the time bound. {Body, Filters3} = split_key_from_filters(PartitionFields1, Filters2), @@ -2017,6 +2013,23 @@ flexible_keys_1_test() -> "PRIMARY KEY ((a1, quantum(a, 15, 's')), a1, a, b, c, d))"), {ok, Q} = get_query( "SELECT * FROM tab4 WHERE a > 0 AND a < 1000 AND a1 = 1"), + {ok, [Select]} = compile(DDL, Q, 100), + ?assertEqual( + [{startkey,[{<<"a1">>,sint64,1}, {<<"a">>,timestamp,0}]}, + {endkey, [{<<"a1">>,sint64,1}, {<<"a">>,timestamp,1000}]}, + {filter,[]}, + {start_inclusive,false}], + Select#riak_select_v1.'WHERE' + ). + +%% two element key with quantum +flexible_keys_2_test() -> + DDL = get_ddl( + "CREATE TABLE tab4(" + "a TIMESTAMP NOT NULL, " + "PRIMARY KEY ((quantum(a, 15, 's')), a))"), + {ok, Q} = get_query( + "SELECT * FROM tab4 WHERE a > 0 AND a < 1000"), ?assertMatch( {ok, [#riak_select_v1{}]}, compile(DDL, Q, 100) @@ -2107,4 +2120,34 @@ no_quantum_in_query_3_test() -> Select#riak_select_v1.'WHERE' ). +%% one element key +no_quantum_in_query_4_test() -> + DDL = get_ddl( + "CREATE TABLE tab1(" + "a TIMESTAMP NOT NULL, " + "PRIMARY KEY ((a), a))"), + {ok, Q} = get_query( + "SELECT * FROM tab1 WHERE a = 1000"), + {ok, [Select]} = compile(DDL, Q, 100), + ?assertEqual( + [{startkey,[{<<"a">>,timestamp,1000}]}, + {endkey,[{<<"a">>,timestamp,1000}]}, + {filter,[]}, + {end_inclusive,true}], + Select#riak_select_v1.'WHERE' + ). + +two_element_key_range_cannot_match_test() -> + DDL = get_ddl( + "CREATE TABLE tabab(" + "a TIMESTAMP NOT NULL, " + "b SINT64 NOT NULL, " + "PRIMARY KEY ((a,quantum(b, 15, 's')), a,b))"), + {ok, Q} = get_query( + "SELECT * FROM tab1 WHERE a = 1 AND b > 1 AND b < 1"), + ?assertMatch( + {error, {lower_and_upper_bounds_are_equal_when_no_equals_operator, <<_/binary>>}}, + compile(DDL, Q, 100) + ). + -endif. From 16d3d56dfbe6c2b2432f469b015df886cea6c411 Mon Sep 17 00:00:00 2001 From: "John R. Daily" Date: Thu, 18 Feb 2016 20:13:54 -0500 Subject: [PATCH 013/122] Switch from hard-coded row counts for batches to configurable estimated total batch size --- src/riak_kv.app.src | 11 +++++++++- src/riak_kv_pb_timeseries.erl | 39 +++++++++++++++++++++++++---------- 2 files changed, 38 insertions(+), 12 deletions(-) diff --git a/src/riak_kv.app.src b/src/riak_kv.app.src index 87a3bb0706..96849d5a7b 100644 --- a/src/riak_kv.app.src +++ b/src/riak_kv.app.src @@ -84,6 +84,15 @@ {timeseries_query_max_quanta_span, 5}, - {timeseries_max_concurrent_queries, 3} + {timeseries_max_concurrent_queries, 3}, + + %% Max batch size (in bytes) of data distributed between + %% nodes during a put operation. Highly recommended that you + %% not increase this above 1MB. + %% + %% This is not a hard cap; the number of records to generate + %% a batch under this value will be estimated based on the + %% size of the first record. + {timeseries_max_batch_size, 1048576} ]} ]}. diff --git a/src/riak_kv_pb_timeseries.erl b/src/riak_kv_pb_timeseries.erl index 5898dacd56..16da6bc7e0 100644 --- a/src/riak_kv_pb_timeseries.erl +++ b/src/riak_kv_pb_timeseries.erl @@ -63,9 +63,6 @@ -define(FETCH_RETRIES, 10). %% TODO make it configurable in tsqueryreq -define(TABLE_ACTIVATE_WAIT, 30). %% ditto --define(MIN_PUT_BATCH_SIZE, 10). %% TODO make it configurable somewhere --define(MAX_PUT_BATCH_SIZE, 200). %% ditto - -record(state, { req, req_ctx, @@ -311,6 +308,9 @@ put_data(Data, Table, Mod) when is_binary(Table) -> riak_core_node_watcher:nodes(riak_kv)), SendFullBatches = riak_core_capability:get({riak_kv, w1c_batch_vnode}, false), + %% Default to 1MB for a max batch size to not overwhelm disterl + CappedBatchSize = app_helper:get_env(riak_kv, timeseries_max_batch_size, + 1024 * 1024), EncodeFn = fun(O) -> riak_object:to_binary(v1, O, msgpack) end, @@ -321,14 +321,11 @@ put_data(Data, Table, Mod) when is_binary(Table) -> case riak_kv_w1c_worker:validate_options( NVal, Preflist, [], BucketProps) of {ok, W, PW} -> - DataForVnode = - case SendFullBatches andalso length(Records) >= ?MIN_PUT_BATCH_SIZE of - true -> - {batches, create_batches(Records, ?MAX_PUT_BATCH_SIZE)}; - false -> - {individual, Records} - end, - + DataForVnode = pick_batch_option(SendFullBatches, + CappedBatchSize, + Records, + termsize(hd(Records)), + length(Records)), Ids = invoke_async_put(fun(Record) -> build_object(Bucket, Mod, DDL, @@ -377,6 +374,25 @@ row_to_key(Row, DDL, Mod) -> riak_kv_ts_util:encode_typeval_key( riak_ql_ddl:get_partition_key(DDL, Row, Mod)). +%%%%%%%% +%% Utility functions for batch delivery of records +termsize(Term) -> + size(term_to_binary(Term)). + +pick_batch_option(_, _, Records, _, 1) -> + {individual, Records}; +pick_batch_option(true, MaxBatch, Records, SampleSize, _NumRecs) -> + {batches, create_batches(Records, + estimated_row_count(SampleSize, MaxBatch))}; +pick_batch_option(false, _, Records, _, _) -> + {individual, Records}. + +estimated_row_count(SampleRowSize, MaxBatchSize) -> + %% Assume some rows will be larger, so introduce a fudge factor of + %% roughly 10 percent. + RowSizeFudged = (SampleRowSize * 10) div 9, + MaxBatchSize div RowSizeFudged. + %% May be a more efficient way to do this. Take a list of arbitrary %% data (expected to be a list of lists for this use case) and create %% a list of MaxSize lists. @@ -389,6 +405,7 @@ create_batches(Rows, 0, Max, ThisBatch, AllBatches) -> create_batches(Rows, Max, Max, [], AllBatches ++ [ThisBatch]); create_batches([H|T], Counter, Max, ThisBatch, AllBatches) -> create_batches(T, Counter-1, Max, ThisBatch ++ [H], AllBatches). +%%%%%%%% add_preflists(PartitionedData, NVal, UpNodes) -> lists:map(fun({Idx, Rows}) -> {Idx, From d8779efa78cdb7c4fb83a693cbcc68421dfd65aa Mon Sep 17 00:00:00 2001 From: "John R. Daily" Date: Fri, 19 Feb 2016 21:13:02 -0500 Subject: [PATCH 014/122] Obvious optimization is obvious: do not bother recursively creating batches if the total number of records is less than the max per batch --- src/riak_kv_pb_timeseries.erl | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/riak_kv_pb_timeseries.erl b/src/riak_kv_pb_timeseries.erl index 16da6bc7e0..87f3b7fbcd 100644 --- a/src/riak_kv_pb_timeseries.erl +++ b/src/riak_kv_pb_timeseries.erl @@ -393,6 +393,8 @@ estimated_row_count(SampleRowSize, MaxBatchSize) -> RowSizeFudged = (SampleRowSize * 10) div 9, MaxBatchSize div RowSizeFudged. +create_batches(Rows, MaxSize) when length(Rows) =< MaxSize -> + [Rows]; %% May be a more efficient way to do this. Take a list of arbitrary %% data (expected to be a list of lists for this use case) and create %% a list of MaxSize lists. @@ -989,4 +991,15 @@ batch_3_test() -> ?assertEqual([[1, 2, 3], [4, 5, 6], [7, 8, 9]], create_batches([1, 2, 3, 4, 5, 6, 7, 8, 9], 3)). +batch_undersized1_test() -> + ?assertEqual([[1, 2, 3, 4, 5, 6]], + create_batches([1, 2, 3, 4, 5, 6], 6)). + +batch_undersized2_test() -> + ?assertEqual([[1, 2, 3, 4, 5, 6]], + create_batches([1, 2, 3, 4, 5, 6], 7)). + +batch_almost_undersized_test() -> + ?assertEqual([[1, 2, 3, 4, 5], [6]], + create_batches([1, 2, 3, 4, 5, 6], 5)). -endif. From e9090f6b763a1773b8ab9328805e67feb03396b5 Mon Sep 17 00:00:00 2001 From: andytill Date: Tue, 23 Feb 2016 14:15:16 +0000 Subject: [PATCH 015/122] Allow the quantum field to be at any position in the partition key, if it is used at all. --- src/riak_kv_qry_compiler.erl | 79 +++++++++++++++++++++++++++++------- 1 file changed, 65 insertions(+), 14 deletions(-) diff --git a/src/riak_kv_qry_compiler.erl b/src/riak_kv_qry_compiler.erl index fd5b718c93..d07062ada9 100644 --- a/src/riak_kv_qry_compiler.erl +++ b/src/riak_kv_qry_compiler.erl @@ -447,24 +447,36 @@ col_index_and_type_of(Fields, ColumnName) -> -spec expand_where(filter(), #key_v1{}, integer()) -> [where_props()] | {error, any()}. expand_where(Where, PartitionKey, MaxSubQueries) -> - case find_quanta_function_in_key(PartitionKey) of - [{[QField], QSize, QUnit}] -> - hash_timestamp_to_quanta(QField, QSize, QUnit, MaxSubQueries, Where); - [] -> + case find_quantum_field_index_in_key(PartitionKey) of + {QField, QSize, QUnit, QIndex} -> + hash_timestamp_to_quanta(QField, QSize, QUnit, QIndex, MaxSubQueries, Where); + notfound -> [Where] end. +%% Return the parameters for the quantum function and it's index in the +%% partition key fields. +-spec find_quantum_field_index_in_key(#key_v1{}) -> + {QName::binary(), QSize::integer(), QUnit::atom(), QIndex::integer()}. +find_quantum_field_index_in_key(#key_v1{ ast = PKAST }) -> + find_quantum_field_index_in_key2(PKAST, 1). + %% -find_quanta_function_in_key(#key_v1{ ast = PKAST }) -> - [{X, Y, Z} || #hash_fn_v1{mod = riak_ql_quanta, fn = quantum, - args = [#param_v1{name = X}, Y, Z]} <- PKAST]. +find_quantum_field_index_in_key2([], _) -> + notfound; +find_quantum_field_index_in_key2([#hash_fn_v1{ mod = riak_ql_quanta, + fn = quantum, + args = [#param_v1{name = [X]}, Y, Z] }|_], Index) -> + {X,Y,Z,Index}; +find_quantum_field_index_in_key2([_|Tail], Index) -> + find_quantum_field_index_in_key2(Tail, Index+1). %% -hash_timestamp_to_quanta(QField, QSize, QUnit, MaxSubQueries, Where) -> +hash_timestamp_to_quanta(QField, QSize, QUnit, QIndex, MaxSubQueries, Where) -> GetMaxMinFun = fun({startkey, List}, {_S, E}) -> - {element(3, lists:last(List)), E}; + {element(3, lists:nth(QIndex, List)), E}; ({endkey, List}, {S, _E}) -> - {S, element(3, lists:last(List))}; + {S, element(3, lists:nth(QIndex, List))}; (_, {S, E}) -> {S, E} end, @@ -533,14 +545,25 @@ compile_where(DDL, Where) -> {true, NewW} -> NewW end. -quantum_field_name(#ddl_v1{ partition_key = #key_v1{ ast = PKAST } }) -> - case lists:last(PKAST) of - #hash_fn_v1{args = [#param_v1{name = [QFieldName]} | _]} -> +%% +quantum_field_name(DDL) -> + case find_quantum_fields(DDL) of + [QFieldName] -> QFieldName; - #param_v1{} -> + [] -> no_quanta end. +%% +find_quantum_fields(#ddl_v1{ partition_key = #key_v1{ ast = PKAST } }) -> + [quantum_fn_to_field_name(QuantumFunc) || #hash_fn_v1{ } = QuantumFunc <- PKAST]. + +%% +quantum_fn_to_field_name(#hash_fn_v1{ mod = riak_ql_quanta, + fn = quantum, + args = [#param_v1{name = [Name]}|_ ] }) -> + Name. + check_if_timeseries(#ddl_v1{table = T, partition_key = PK, local_key = LK0} = DDL, [W]) -> try @@ -2150,4 +2173,32 @@ two_element_key_range_cannot_match_test() -> compile(DDL, Q, 100) ). +quantum_is_not_last_element_test() -> + DDL = get_ddl( + "CREATE TABLE tab1(" + "a SINT64 NOT NULL, " + "b TIMESTAMP NOT NULL, " + "c SINT64 NOT NULL, " + "PRIMARY KEY ((a,quantum(b,1,'s'),c), a,b,c))"), + {ok, Q} = get_query( + "SELECT * FROM tab1 WHERE b >= 1000 AND b < 3000 AND a = 10 AND c = 20"), + {ok, SubQueries} = compile(DDL, Q, 100), + SubQueryWheres = [S#riak_select_v1.'WHERE' || S <- SubQueries], + ?assertEqual( + [ + [{startkey,[{<<"a">>,sint64,10},{<<"b">>,timestamp,1000},{<<"c">>,sint64,20}]}, + {endkey, [{<<"a">>,sint64,10},{<<"b">>,timestamp,2000},{<<"c">>,sint64,20}]}, + {filter,[]}], + [{startkey,[{<<"a">>,sint64,10},{<<"b">>,timestamp,2000},{<<"c">>,sint64,20}]}, + {endkey, [{<<"a">>,sint64,10},{<<"b">>,timestamp,3000},{<<"c">>,sint64,20}]}, + {filter,[]}], + %% FIXME this key should already be covered + [{startkey,[{<<"a">>,sint64,10},{<<"b">>,timestamp,3000},{<<"c">>,sint64,20}]}, + {endkey, [{<<"a">>,sint64,10},{<<"b">>,timestamp,3000},{<<"c">>,sint64,20}]}, + {filter,[]}, + {end_inclusive,true}] + ], + SubQueryWheres + ). + -endif. From 836cfeee4957d90a89f80c7ba98f7500cc26d245 Mon Sep 17 00:00:00 2001 From: andytill Date: Thu, 25 Feb 2016 19:42:40 +0000 Subject: [PATCH 016/122] Add the FF byte to the end of the start key if it is not start_inclusive, to ensure it is always greater than the start key. --- src/riak_kv_eleveldb_backend.erl | 8 ++++++-- src/riak_kv_qry_compiler.erl | 23 +++++++++++------------ 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/riak_kv_eleveldb_backend.erl b/src/riak_kv_eleveldb_backend.erl index 0787f7d388..4a804e5f5e 100644 --- a/src/riak_kv_eleveldb_backend.erl +++ b/src/riak_kv_eleveldb_backend.erl @@ -466,15 +466,19 @@ range_scan(FoldIndexFun, Buffer, Opts, #state{fold_opts=_FoldOpts, {startkey, StartK} = proplists:lookup(startkey, W), {endkey, EndK} = proplists:lookup(endkey, W), LocalKeyLen = length(LKAST), - StartKey = key_prefix(Bucket, [Value || {_Name,_Type,Value} <- StartK], LocalKeyLen), + StartKey1 = key_prefix(Bucket, [Value || {_Name,_Type,Value} <- StartK], LocalKeyLen), EndKey1 = key_prefix(Bucket, [Value || {_Name,_Type,Value} <- EndK], LocalKeyLen), + case lists:member({start_inclusive, false}, W) of + true -> StartKey2 = <>; + false -> StartKey2 = StartKey1 + end, case lists:member({end_inclusive, true}, W) of true -> EndKey2 = <>; false -> EndKey2 = EndKey1 end, FoldFun = fun build_list/2, Options = [ - {start_key, StartKey}, + {start_key, StartKey2}, {end_key, EndKey2}, {fold_method, streaming}, {encoding, msgpack} | range_scan_additional_options(W) diff --git a/src/riak_kv_qry_compiler.erl b/src/riak_kv_qry_compiler.erl index d07062ada9..6f742eb280 100644 --- a/src/riak_kv_qry_compiler.erl +++ b/src/riak_kv_qry_compiler.erl @@ -457,7 +457,7 @@ expand_where(Where, PartitionKey, MaxSubQueries) -> %% Return the parameters for the quantum function and it's index in the %% partition key fields. -spec find_quantum_field_index_in_key(#key_v1{}) -> - {QName::binary(), QSize::integer(), QUnit::atom(), QIndex::integer()}. + {QName::binary(), QSize::integer(), QUnit::atom(), QIndex::integer()} | notfound. find_quantum_field_index_in_key(#key_v1{ ast = PKAST }) -> find_quantum_field_index_in_key2(PKAST, 1). @@ -577,17 +577,17 @@ check_if_timeseries(#ddl_v1{table = T, partition_key = PK, local_key = LK0} = DD Mod = riak_ql_ddl:make_module_name(T), StartKey = rewrite(LK, StartW, Mod), EndKey = rewrite(LK, EndW, Mod), - %% defaults on startkey and endkey are different - IncStart = case includes(StartW, '>', Mod) of - true -> [{start_inclusive, false}]; - false -> [] - end, - IncEnd = case includes(EndW, '<', Mod) of - true -> []; - false -> [{end_inclusive, true}] - end, case has_errors(StartKey, EndKey) of [] -> + %% defaults on startkey and endkey are different + IncStart = case includes(StartW, '>', Mod) of + true -> [{start_inclusive, false}]; + false -> [] + end, + IncEnd = case includes(EndW, '<', Mod) of + true -> []; + false -> [{end_inclusive, true}] + end, RewrittenFilter = add_types_to_filter(Filter, Mod), {true, lists:flatten([ {startkey, StartKey}, @@ -2181,7 +2181,7 @@ quantum_is_not_last_element_test() -> "c SINT64 NOT NULL, " "PRIMARY KEY ((a,quantum(b,1,'s'),c), a,b,c))"), {ok, Q} = get_query( - "SELECT * FROM tab1 WHERE b >= 1000 AND b < 3000 AND a = 10 AND c = 20"), + "SELECT * FROM tab1 WHERE b >= 1000 AND b <= 3000 AND a = 10 AND c = 20"), {ok, SubQueries} = compile(DDL, Q, 100), SubQueryWheres = [S#riak_select_v1.'WHERE' || S <- SubQueries], ?assertEqual( @@ -2192,7 +2192,6 @@ quantum_is_not_last_element_test() -> [{startkey,[{<<"a">>,sint64,10},{<<"b">>,timestamp,2000},{<<"c">>,sint64,20}]}, {endkey, [{<<"a">>,sint64,10},{<<"b">>,timestamp,3000},{<<"c">>,sint64,20}]}, {filter,[]}], - %% FIXME this key should already be covered [{startkey,[{<<"a">>,sint64,10},{<<"b">>,timestamp,3000},{<<"c">>,sint64,20}]}, {endkey, [{<<"a">>,sint64,10},{<<"b">>,timestamp,3000},{<<"c">>,sint64,20}]}, {filter,[]}, From 50bc7e57747981fd4806b1a8589713245cb05c73 Mon Sep 17 00:00:00 2001 From: andytill Date: Mon, 7 Mar 2016 18:06:29 +0000 Subject: [PATCH 017/122] Query explanations, printed to an attached shell. --- src/riak_kv_ts_util.erl | 93 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/src/riak_kv_ts_util.erl b/src/riak_kv_ts_util.erl index 6689f28e51..e0a2bd59df 100644 --- a/src/riak_kv_ts_util.erl +++ b/src/riak_kv_ts_util.erl @@ -37,6 +37,8 @@ build_sql_record/3, sql_record_to_tuple/1 ]). +-export([explain_query/1, explain_query/2]). +-export([explain_query_print/1]). %% NOTE on table_to_bucket/1: Clients will work with table %% names. Those names map to a bucket type/bucket name tuple in Riak, @@ -236,6 +238,97 @@ make_ts_keys(CompoundKey, DDL = #ddl_v1{local_key = #key_v1{ast = LKParams}, encode_typeval_key(TypeVals) -> list_to_tuple([Val || {_Type, Val} <- TypeVals]). +explain_query_print(QueryString) -> + explain_query_print2(1, explain_query(QueryString)). + +explain_query_print2(_, []) -> + ok; +explain_query_print2(Index, [{Start, End, Filter}|Tail]) -> + io:format("SUB QUERY ~p~n~s ~s~n~s~n", + [Index,Start,filter_to_string(Filter),End]), + explain_query_print2(Index+1, Tail). + +%% +explain_query(QueryString) -> + {ok, ?SQL_SELECT{ 'FROM' = Table } = Select} = + explain_compile_query(QueryString), + {ok, _Mod, DDL} = get_table_ddl(Table), + explain_query(DDL, Select). + +%% Explain a query using the ddl and select records. The select can be a query +%% string. +%% +%% Have a flexible API because it is a debugging function. +explain_query(DDL, ?SQL_SELECT{} = Select) -> + {ok, SubQueries} = riak_kv_qry_compiler:compile(DDL, Select, 10000), + [explain_sub_query(SQ) || SQ <- SubQueries]; +explain_query(DDL, QueryString) -> + {ok, Select} = explain_compile_query(QueryString), + explain_query(DDL, Select). + +%% +explain_compile_query(QueryString) -> + {ok, Q} = riak_ql_parser:parse(riak_ql_lexer:get_tokens(QueryString)), + build_sql_record(select, Q, undefined). + +%% +explain_sub_query(#riak_select_v1{ 'WHERE' = SubQueryWhere } = _SubQuery) -> + %% LOCAL KEY + %% a:sint64/b:timestamp/c:sint64/ + %% SUB QUERY 1 + %% > 10/1000/20 d = 10 OR e = 12 + %% <= 10/2000/20 + {_, StartKey1} = lists:keyfind(startkey, 1, SubQueryWhere), + {_, EndKey1} = lists:keyfind(endkey, 1, SubQueryWhere), + {_, Filters} = lists:keyfind(filter, 1, SubQueryWhere), + explain_query_keys(StartKey1, EndKey1, Filters). + +%% +explain_query_keys(StartKey1, EndKey1, Filters) -> + StartKey2 = [[key_element_to_string(V), $/] || {_,_,V} <- StartKey1], + EndKey2 = [[key_element_to_string(V), $/] || {_,_,V} <- EndKey1], + case lists:keyfind(start_inclusive, 1, StartKey1) of + {start_inclusive,true} -> + StartKey3 = [">= ", StartKey2]; + _ -> + StartKey3 = ["> ", StartKey2] + end, + case lists:keyfind(end_inclusive, 1, EndKey1) of + {end_inclusive,true} -> + EndKey3 = ["<= ", EndKey2]; + _ -> + EndKey3 = ["< ", EndKey2] + end, + {StartKey3, EndKey3, Filters}. + +%% +key_element_to_string(V) when is_binary(V) -> varchar_quotes(V); +key_element_to_string(V) when is_float(V) -> mochinum:digits(V); +key_element_to_string(V) -> io_lib:format("~p", [V]). + +%% +filter_to_string([]) -> + "NO FILTER"; +filter_to_string(Filter) -> + ["FILTER ", filter_to_string2(Filter)]. + +%% +filter_to_string2({const,V}) -> + key_element_to_string(V); +filter_to_string2({field,V,_}) -> + V; +filter_to_string2({Op, A, B}) -> + [filter_to_string2(A), op_to_string(Op), filter_to_string2(B)]. + +%% +op_to_string(and_) -> " AND "; +op_to_string(or_) -> " OR "; +op_to_string(Op) -> " " ++ atom_to_list(Op) ++ " ". + +%% +varchar_quotes(V) -> + <<"'", V/binary, "'">>. + %%% %%% TESTS %%% From 90b641b79965c8ed91237eb7bf2deff2f6f0cef5 Mon Sep 17 00:00:00 2001 From: andytill Date: Tue, 8 Mar 2016 11:44:25 +0000 Subject: [PATCH 018/122] Query explanation debug info comments and variable naming. --- src/riak_kv_ts_util.erl | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/riak_kv_ts_util.erl b/src/riak_kv_ts_util.erl index e0a2bd59df..47d95acfef 100644 --- a/src/riak_kv_ts_util.erl +++ b/src/riak_kv_ts_util.erl @@ -238,6 +238,7 @@ make_ts_keys(CompoundKey, DDL = #ddl_v1{local_key = #key_v1{ast = LKParams}, encode_typeval_key(TypeVals) -> list_to_tuple([Val || {_Type, Val} <- TypeVals]). +%% Print the query explanation to the shell. explain_query_print(QueryString) -> explain_query_print2(1, explain_query(QueryString)). @@ -248,7 +249,8 @@ explain_query_print2(Index, [{Start, End, Filter}|Tail]) -> [Index,Start,filter_to_string(Filter),End]), explain_query_print2(Index+1, Tail). -%% +%% Show some debug info about how a query is compiled into sub queries +%% and what key ranges are created. explain_query(QueryString) -> {ok, ?SQL_SELECT{ 'FROM' = Table } = Select} = explain_compile_query(QueryString), @@ -272,19 +274,14 @@ explain_compile_query(QueryString) -> build_sql_record(select, Q, undefined). %% -explain_sub_query(#riak_select_v1{ 'WHERE' = SubQueryWhere } = _SubQuery) -> - %% LOCAL KEY - %% a:sint64/b:timestamp/c:sint64/ - %% SUB QUERY 1 - %% > 10/1000/20 d = 10 OR e = 12 - %% <= 10/2000/20 +explain_sub_query(#riak_select_v1{ 'WHERE' = SubQueryWhere }) -> {_, StartKey1} = lists:keyfind(startkey, 1, SubQueryWhere), {_, EndKey1} = lists:keyfind(endkey, 1, SubQueryWhere), - {_, Filters} = lists:keyfind(filter, 1, SubQueryWhere), - explain_query_keys(StartKey1, EndKey1, Filters). + {_, Filter} = lists:keyfind(filter, 1, SubQueryWhere), + explain_query_keys(StartKey1, EndKey1, Filter). %% -explain_query_keys(StartKey1, EndKey1, Filters) -> +explain_query_keys(StartKey1, EndKey1, Filter) -> StartKey2 = [[key_element_to_string(V), $/] || {_,_,V} <- StartKey1], EndKey2 = [[key_element_to_string(V), $/] || {_,_,V} <- EndKey1], case lists:keyfind(start_inclusive, 1, StartKey1) of @@ -299,7 +296,7 @@ explain_query_keys(StartKey1, EndKey1, Filters) -> _ -> EndKey3 = ["< ", EndKey2] end, - {StartKey3, EndKey3, Filters}. + {StartKey3, EndKey3, Filter}. %% key_element_to_string(V) when is_binary(V) -> varchar_quotes(V); From a1c79010246ed6db872f7ede8c1383e43e324c04 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Tue, 8 Mar 2016 23:17:06 +0100 Subject: [PATCH 019/122] WIP - Total rewrite of riak_kv_wm_timeseries * Full usage of webmachine callbacks. --- src/riak_kv_web.erl | 5 +- src/riak_kv_wm_timeseries.erl | 783 ++++++++++++++-------------------- 2 files changed, 317 insertions(+), 471 deletions(-) diff --git a/src/riak_kv_web.erl b/src/riak_kv_web.erl index 6ae950bfcf..ae73a8563f 100644 --- a/src/riak_kv_web.erl +++ b/src/riak_kv_web.erl @@ -125,8 +125,11 @@ raw_dispatch(Name) -> lists:flatten( [ + %% Right now we only have version 1. When we get version 2 we have to + %% decide if we want to dispatch to separate resource modules or handle + %% the different versions inside the same resource handler module. [{["ts", api_version, "tables", table, "list_keys"], riak_kv_wm_timeseries_listkeys, Props}, - {["ts", api_version, "tables", table, "keys", '*'], riak_kv_wm_timeseries, Props}, + {["ts", "v1", "tables", table, "keys", '*'], riak_kv_wm_timeseries, Props}, {["ts", api_version, "query"], riak_kv_wm_timeseries_query, Props} ] || {_Prefix, Props} <- Props2]). diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 374c8145ef..359b9fd974 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -22,6 +22,11 @@ %% @doc Resource for Riak TS operations over HTTP. %% +%% This resource is responsible for everything under +%% ``` +%% ts/v1/table/Table/keys +%% ``` +%% Specific operations supported: %% ``` %% GET /ts/v1/table/Table/keys/K1/V1/... single-key get %% DELETE /ts/v1/table/Table/keys/K1/V1/... single-key delete @@ -29,63 +34,52 @@ %% on the body %% ''' %% -%% Request body is expected to be a JSON containing key and/or value(s). -%% Response is a JSON containing data rows with column headers. +%% Request body is expected to be a JSON containing a struct or structs for the +%% POST. GET and DELETE have no body. +%% +%% Response is a JSON containing full records. %% -module(riak_kv_wm_timeseries). %% webmachine resource exports --export([ - init/1, +-export([init/1, service_available/2, - is_authorized/2, - forbidden/2, allowed_methods/2, - process_post/2, malformed_request/2, - content_types_accepted/2, - resource_exists/2, - delete_resource/2, + is_authorized/2, + forbidden/2, content_types_provided/2, + content_types_accepted/2, encodings_provided/2, - produce_doc_body/2, - accept_doc_body/2 - ]). + post_is_create/2, + process_post/2, + delete_resource/2, + resource_exists/2]). -include_lib("webmachine/include/webmachine.hrl"). -include_lib("riak_ql/include/riak_ql_ddl.hrl"). -include("riak_kv_wm_raw.hrl"). -include("riak_kv_ts.hrl"). --record(ctx, {api_version, - method :: atom(), - prefix, %% string() - prefix for resource uris - timeout, %% integer() - passed-in timeout value in ms - security, %% security context - client, %% riak_client() - the store client - riak, %% local | {node(), atom()} - params for riak client - api_call :: undefined|get|put|delete, - table :: undefined | binary(), - mod :: undefined | module(), - ddl :: undefined | #ddl_v1{}, - %% data in/out: the following fields are either - %% extracted from the JSON/path elements that came in - %% the request body in case of a PUT, or filled out by - %% retrieved values for shipping (as JSON) in response - %% body - key :: undefined | ts_rec(), %% parsed out of JSON that came in the body - data :: undefined | [ts_rec()], %% ditto - result :: undefined | ok | {Headers::[binary()], Rows::[ts_rec()]} - }). +-record(ctx, + {api_call :: 'undefined' | 'get' | 'put' | 'delete', + table :: 'undefined' | binary(), + mod :: 'undefined' | module(), + key :: 'undefined' | ts_rec(), + object, + timeout :: 'undefined' | integer(), + options, %% for the call towards riak. + prefix, + riak}). -define(DEFAULT_TIMEOUT, 60000). -define(TABLE_ACTIVATE_WAIT, 30). %% wait until table's bucket type is activated --define(CB_RV_SPEC, {boolean()|atom()|tuple(), #wm_reqdata{}, #ctx{}}). +-type cb_rv_spec(T) :: {T, #wm_reqdata{}, #ctx{}}. +-type halt() :: {'halt', 200..599} | {'error' , term()}. -type ts_rec() :: [riak_pb_ts_codec:ldbvalue()]. - -spec init(proplists:proplist()) -> {ok, #ctx{}}. %% @doc Initialize this resource. This function extracts the %% 'prefix' and 'riak' properties from the dispatch args. @@ -93,508 +87,357 @@ init(Props) -> {ok, #ctx{prefix = proplists:get_value(prefix, Props), riak = proplists:get_value(riak, Props)}}. --spec service_available(#wm_reqdata{}, #ctx{}) -> - {boolean(), #wm_reqdata{}, #ctx{}}. +-spec service_available(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean() | halt()). %% @doc Determine whether or not a connection to Riak -%% can be established. This function also takes this -%% opportunity to extract the 'table' and 'key' path -%% bindings from the dispatch. -service_available(RD, Ctx = #ctx{riak = RiakProps}) -> +%% can be established. +%% Convert the table name from the part of the URL. +service_available(RD, #ctx{riak = RiakProps}=Ctx) -> case riak_kv_wm_utils:get_riak_client( RiakProps, riak_kv_wm_utils:get_client_id(RD)) of - {ok, C} -> - {true, RD, - Ctx#ctx{api_version = wrq:path_info(api_version, RD), - method = wrq:method(RD), - client = C, - table = utf8_to_binary( - mochiweb_util:unquote( - wrq:path_info(table, RD))) - }}; + {ok, _C} -> + Table = table(RD), + Mod = riak_ql_ddl:make_module_name(Table), + {true, RD, Ctx#ctx{table=Table, mod=Mod}}; {error, Reason} -> - handle_error({riak_client_error, Reason}, RD, Ctx) + ErrorMsg = flat_format("Unable to connect to Riak: ~p", [Reason]), + Resp = set_text_resp_header(ErrorMsg, RD), + {false, Resp, Ctx} end. - -is_authorized(RD, Ctx) -> +is_authorized(RD, #ctx{table=Table}=Ctx) -> + Call = api_call(wrq:path_tokens(RD), wrq:method(RD)), case riak_api_web_security:is_authorized(RD) of false -> {"Basic realm=\"Riak\"", RD, Ctx}; {true, SecContext} -> - {true, RD, Ctx#ctx{security = SecContext}}; + case riak_core_security:check_permission( + {riak_kv_ts_util:api_call_to_perm(Call), Table}, SecContext) of + {false, Error, _} -> + {utf8_to_binary(Error), RD, Ctx}; + _ -> + {true, RD, Ctx#ctx{api_call=Call}} + end; insecure -> - handle_error(insecure_connection, RD, Ctx) + ErrorMsg = "Security is enabled and Riak does not" ++ + " accept credentials over HTTP. Try HTTPS instead.", + Resp = set_text_resp_header(ErrorMsg, RD), + {{halt, 426}, Resp, Ctx} end. - --spec forbidden(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +-spec forbidden(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). forbidden(RD, Ctx) -> - case riak_kv_wm_utils:is_forbidden(RD) of - true -> - {true, RD, Ctx}; - false -> - %%preexec(RD, Ctx) - %%validate_request(RD, Ctx) - %% plug in early, and just do what it takes to do the job - {false, RD, Ctx} - end. - + Result = riak_kv_wm_utils:is_forbidden(RD), + {Result, RD, Ctx}. --spec allowed_methods(#wm_reqdata{}, #ctx{}) -> - {[atom()], #wm_reqdata{}, #ctx{}}. -%% @doc Get the list of methods this resource supports. +-spec allowed_methods(#wm_reqdata{}, #ctx{}) -> cb_rv_spec([atom()]). allowed_methods(RD, Ctx) -> - {['GET', 'POST', 'DELETE'], RD, Ctx}. + allowed_methods(wrq:path_tokens(RD), RD, Ctx). +allowed_methods([], RD, Ctx) -> + {['POST'], RD, Ctx}; +allowed_methods(_KeyInURL, RD, Ctx) -> + {['GET', 'DELETE'], RD, Ctx}. --spec malformed_request(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -%% @doc Determine whether query parameters, request headers, -%% and request body are badly-formed. +-spec malformed_request(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). malformed_request(RD, Ctx) -> - %% this is plugged because requests are validated against - %% effective parameters contained in the body (and hence, we need - %% accept_doc_body to parse and extract things out of JSON in the - %% body) - {false, RD, Ctx}. - - --spec preexec(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -%% * collect any parameters from request body or, failing that, from -%% POST k=v items; -%% * check API version; -%% * validate those parameters against URL and method; -%% * determine which api call to do, and check permissions on that; -preexec(RD, Ctx = #ctx{api_call = Call}) - when Call /= undefined -> - %% been here, figured and executed api call, stored results for - %% shipping to client - {true, RD, Ctx}; -preexec(RD, Ctx) -> - case validate_request(RD, Ctx) of - {true, RD1, Ctx1} -> - case check_permissions(RD1, Ctx1) of - {true, RD2, Ctx2} -> - call_api_function(RD2, Ctx2); - FalseWithDetails -> - FalseWithDetails - end; - FalseWithDetails -> - FalseWithDetails - end. - --spec validate_request(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -validate_request(RD, Ctx) -> - case wrq:path_info(api_version, RD) of - "v1" -> - validate_request_v1(RD, Ctx); - BadVersion -> - handle_error({unsupported_version, BadVersion}, RD, Ctx) - end. - --spec validate_request_v1(#wm_reqdata{}, #ctx{}) -> - ?CB_RV_SPEC. -validate_request_v1(RD, Ctx = #ctx{method = 'POST'}) -> - Json = binary_to_list(wrq:req_body(RD)), - case extract_data(Json) of - Data when Data /= undefined -> - valid_params( - RD, Ctx#ctx{api_version = "v1", api_call = put, - data = Data}); - _Invalid -> - handle_error({malformed_request, 'POST'}, RD, Ctx) - end; - -validate_request_v1(RD, Ctx = #ctx{method = 'GET', table = Table, - mod = Mod, ddl = DDL}) -> - KeysInUrl = lists:map(fun mochiweb_util:unquote/1, wrq:path_tokens(RD)), - case path_elements_to_key(Table, KeysInUrl, Mod, DDL) of - {ok, Key} -> - valid_params( - RD, Ctx#ctx{api_version = "v1", api_call = get, - key = Key}); - {error, Reason} -> - handle_error(Reason, RD, Ctx) - end; - -validate_request_v1(RD, Ctx = #ctx{method = 'DELETE', table = Table, - mod = Mod, ddl = DDL}) -> - KeysInUrl = lists:map(fun mochiweb_util:unquote/1, wrq:path_tokens(RD)), - case path_elements_to_key(Table, KeysInUrl, Mod, DDL) of - {ok, Key} -> - valid_params( - RD, Ctx#ctx{api_version = "v1", api_call = delete, - key = Key}); - {error, Reason} -> - handle_error(Reason, RD, Ctx) + try + Ctx2 = extract_params(wrq:req_qs(RD), Ctx), + malformed_request(wrq:path_tokens(RD), RD, Ctx2) + catch + throw:ParameterError -> + ErrorMsg = flat_format("parameter error: ~p", [ParameterError]), + Resp = set_text_resp_header(ErrorMsg, RD), + {true, Resp, Ctx} end. - --spec extract_data([byte()]) -> undefined|any(). -extract_data(Json) -> - try mochijson2:decode(Json) of - Decoded when is_list(Decoded) -> - validate_ts_records(Decoded) +malformed_request([], RD, Ctx) -> + %% NOTE: if the supplied JSON body is wrong a malformed requset may be + %% issued later. + %% @todo: should the validation of the JSON happen here??? + {false, RD, Ctx}; +malformed_request(KeyInUrl, RD, Ctx) when length(KeyInUrl) rem 2 == 0 -> + {false, RD, Ctx}; +malformed_request(_, RD, Ctx) -> + {true, RD, Ctx}. + +-spec extract_params([{string(), string()}], #ctx{}) -> #ctx{} . +%% @doc right now we only allow a timeout parameter or nothing. +extract_params([], Ctx) -> + Ctx#ctx{options=[]}; +extract_params([{"timeout", TimeoutStr}], Ctx) -> + try + Timeout = list_to_integer(TimeoutStr), + Ctx#ctx{timeout = Timeout, + options = [{timeout, Timeout}]} catch _:_ -> - undefined - end. + throw(flat_format("timeout not an integer value: ~s", [TimeoutStr])) + end; +extract_params(Params, _Ctx) -> + throw(flat_format("incorrect paramters: ~p", [Params])). +-spec content_types_provided(#wm_reqdata{}, #ctx{}) -> cb_rv_spec([{string(), atom()}]). +content_types_provided(RD, Ctx) -> + {[{"application/json", to_json}], + RD, Ctx}. -validate_ts_record(undefined) -> - undefined; -validate_ts_record(R) when is_list(R) -> - case lists:all( - %% check that all list elements are TS types - fun(X) -> is_integer(X) orelse is_float(X) orelse is_binary(X) end, - R) of +-spec content_types_accepted(#wm_reqdata{}, #ctx{}) -> cb_rv_spec([{string(), atom()}]). +content_types_accepted(RD, Ctx) -> + content_types_accepted(wrq:path_tokens(RD), RD, Ctx). + +content_types_accepted([], RD, Ctx) -> + %% the JSON in the POST will be handled by process_post, + %% so this handler will never be called. + {[{"application/json", undefined}], RD, Ctx}; +content_types_accepted(_, RD, Ctx) -> + {[], RD, Ctx}. + +-spec resource_exists(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean() | halt()). +resource_exists(RD, #ctx{mod=Mod} = Ctx) -> + try table_module_exists(Mod) of true -> - R; + Path = wrq:path_tokens(RD), + Key = validate_key(Path, Mod), + resource_exists(Path, wrq:method(RD), RD, Ctx#ctx{key=Key}); false -> - undefined - end; -validate_ts_record(_) -> - undefined. + Resp = set_error_message("table ~p not created", [Mod], RD), + {false, Resp, Ctx} + catch + throw:{key_problem, Reason} -> + Resp = set_error_message("wrong path to element: ~p", [Reason], RD), + {{halt, 400}, Resp, Ctx} + end. -validate_ts_records(RR) when is_list(RR) -> - case lists:all(fun(R) -> validate_ts_record(R) /= undefined end, RR) of - true -> - RR; - false -> - undefined - end; -validate_ts_records(_) -> - undefined. +validate_key(Path, Mod) -> + UnquotedPath = lists:map(fun mochiweb_util:unquote/1, Path), + FVList = path_elements_to_key(Mod, UnquotedPath), + ensure_lk_order_and_strip(Mod, FVList). +resource_exists([], 'POST', RD, Ctx) -> + {true, RD, Ctx}; +resource_exists(Path, 'GET', RD, + #ctx{table=Table, + mod=Mod, + key=Key, + options=Options}=Ctx) -> + %% Would be nice if something cheaper than using get_data existed to check + %% if a key is present. + try riak_kv_ts_util:get_data(Key, Table, Mod, Options) of + {ok, Record} -> + {true, RD, Ctx#ctx{object=Record}}; + {error, Reason} -> + Resp = set_error_message("Internal error: ~p", Reason, RD), + {{halt, 500}, Resp, Ctx} + catch + _:Reason -> + Resp = set_error_message("lookup on ~p failed due to ~p", + [Path, Reason], + RD), + {false, Resp, Ctx} + end; +resource_exists(_Path, 'DELETE', RD, Ctx) -> + %% Since reading the object is expensive we will assume for now that the + %% object exists for a delete, but if it turns out that it does not then the + %% processing of the delete will return 404 at that point. + {true, RD, Ctx}. %% extract keys from path elements in the URL (.../K1/V1/K2/V2 -> %% [{K1, V1}, {K2, V2}]), check with Table's DDL to make sure keys are %% correct and values are of (convertible to) appropriate types, and %% return the KV list --spec path_elements_to_key(binary(), [string()], module(), #ddl_v1{}) -> - {ok, [{string(), riak_pb_ts_codec:ldbvalue()}]} | - {error, atom()|tuple()}. -path_elements_to_key(Table, PEList, Mod, - #ddl_v1{local_key = #key_v1{ast = LK}}) -> +%% @private +-spec path_elements_to_key(module(), [string()]) -> + [{string(), riak_pb_ts_codec:ldbvalue()}]. +path_elements_to_key(_Mod, []) -> + []; +path_elements_to_key(Mod, [F,V|Rest]) -> + [convert_fv(Mod, F, V)|path_elements_to_key(Mod, Rest)]. + +%% @private +convert_fv(Mod, FieldRaw, V) -> + Field = [list_to_binary(X) || X <- string:tokens(FieldRaw, ".")], try - TableKeyLength = length(LK), - if TableKeyLength * 2 == length(PEList) -> - %% values with field names: "f1/v1/f2/v2/f3/v3" - %% 1. check that supplied key fields exist and values - %% supplied are convertible to their types - FVList = - [convert_fv(Table, Mod, K, V) - || {K, V} <- empair(PEList, [])], - %% 2. possibly reorder field-value pairs to match the LK order - OrderedKeyValues = - ensure_lk_order_and_strip(LK, FVList), - {ok, OrderedKeyValues}; - TableKeyLength == length(PEList) -> - %% bare values: "v1/v2/v3" - %% 1. retrieve field values from the DDL - Fields = [F || #param_v1{name = F} <- LK], - FVList = - [convert_fv(Table, Mod, K, V) - || {K, V} <- lists:zip(Fields, PEList)], - {_, OrderedKeyValues} = - lists:unzip(FVList), - {ok, OrderedKeyValues}; - el/=se -> - {error, url_unpaired_keys} - end + true = Mod:is_field_valid(Field), + convert_field_value(Mod:get_field_type(Field), V) catch - throw:ConvertFailed -> - {error, ConvertFailed} - end. - -empair([], Q) -> lists:reverse(Q); -empair([K, V | T], Q) -> empair(T, [{K, V}|Q]). - -convert_fv(Table, Mod, FieldRaw, V) -> - Field = [list_to_binary(X) || X <- string:tokens(FieldRaw, ".")], - case Mod:is_field_valid(Field) of - true -> - try - convert_field(Table, Field, Mod:get_field_type(Field), V) - catch - error:badarg -> - %% rethrow with key, for more informative reporting - throw({url_key_bad_value, Table, Field}); - false -> - throw({url_key_bad_key, Table, Field}) - end + _:_ -> + throw({url_key_bad_value, Field}) end. -convert_field(_T, F, varchar, V) -> - {F, list_to_binary(V)}; -convert_field(_T, F, sint64, V) -> - {F, list_to_integer(V)}; -convert_field(_T, F, double, V) -> - %% list_to_float("42") will fail, so +%% @private +convert_field_value(varchar, V) -> + list_to_binary(V); +convert_field_value(sint64, V) -> + list_to_integer(V); +convert_field_value(double, V) -> try - {F, list_to_float(V)} + list_to_float(V) catch error:badarg -> - {F, float(list_to_integer(V))} + float(list_to_integer(V)) end; -convert_field(T, F, timestamp, V) -> +convert_field_value(timestamp, V) -> case list_to_integer(V) of - BadValue when BadValue < 1 -> - throw({url_key_bad_value, T, F}); - GoodValue -> - {F, GoodValue} + GoodValue when GoodValue > 0 -> + GoodValue; + _ -> + throw(url_key_bad_value) end. + +%% validate_ts_record(undefined) -> +%% undefined; +%% validate_ts_record(R) when is_list(R) -> +%% case lists:all( +%% %% check that all list elements are TS types +%% fun(X) -> is_integer(X) orelse is_float(X) orelse is_binary(X) end, +%% R) of +%% true -> +%% R; +%% false -> +%% undefined +%% end; +%% validate_ts_record(_) -> +%% undefined. + +%% validate_ts_records(RR) when is_list(RR) -> +%% case lists:all(fun(R) -> validate_ts_record(R) /= undefined end, RR) of +%% true -> +%% RR; +%% false -> +%% undefined +%% end; +%% validate_ts_records(_) -> +%% undefined. + ensure_lk_order_and_strip(LK, FVList) -> [proplists:get_value(F, FVList) || #param_v1{name = F} <- LK]. --spec valid_params(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -valid_params(RD, Ctx) -> - case wrq:get_qs_value("timeout", none, RD) of - none -> - {true, RD, Ctx}; - TimeoutStr -> - try - Timeout = list_to_integer(TimeoutStr), - {true, RD, Ctx#ctx{timeout = Timeout}} - catch - _:_ -> - handle_error({bad_parameter, "timeout"}, RD, Ctx) - end - end. - - --spec check_permissions(#wm_reqdata{}, #ctx{}) -> {term(), #wm_reqdata{}, #ctx{}}. -check_permissions(RD, Ctx = #ctx{security = undefined}) -> - {true, RD, Ctx}; -check_permissions(RD, Ctx = #ctx{security = Security, - api_call = Call, - table = Table}) -> - case riak_core_security:check_permission( - {riak_kv_ts_util:api_call_to_perm(Call), Table}, Security) of - {false, Error, _} -> - handle_error( - {not_permitted, utf8_to_binary(Error)}, RD, Ctx); - _ -> - {true, RD, Ctx} - end. - - --spec content_types_provided(#wm_reqdata{}, #ctx{}) -> - {[{ContentType::string(), Producer::atom()}], - #wm_reqdata{}, #ctx{}}. -content_types_provided(RD, Ctx) -> - {[{"application/json", produce_doc_body}], RD, Ctx}. - - -spec encodings_provided(#wm_reqdata{}, #ctx{}) -> {[{Encoding::string(), Producer::function()}], #wm_reqdata{}, #ctx{}}. encodings_provided(RD, Ctx) -> {riak_kv_wm_utils:default_encodings(), RD, Ctx}. - --spec content_types_accepted(#wm_reqdata{}, #ctx{}) -> - {[{ContentType::string(), Acceptor::atom()}], - #wm_reqdata{}, #ctx{}}. -content_types_accepted(RD, Ctx) -> - {[{"application/json", accept_doc_body}], RD, Ctx}. - - --spec resource_exists(#wm_reqdata{}, #ctx{}) -> - {boolean(), #wm_reqdata{}, #ctx{}}. -resource_exists(RD, Ctx = #ctx{table = Table}) -> - Mod = riak_ql_ddl:make_module_name(Table), - try - DDL = Mod:get_ddl(), - {true, RD, Ctx#ctx{mod = Mod, ddl = DDL}} +-spec table_module_exists(module()) -> boolean(). +table_module_exists(Mod) -> + try Mod:get_dll() of + #ddl_v1{} -> + true catch - error:undef -> - handle_error({no_such_table, Table}, RD, Ctx) - end. - --spec process_post(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -%% @doc Pass through requests to allow POST to function -%% as PUT for clients that do not support PUT. -process_post(RD, Ctx) -> - accept_doc_body(RD, Ctx). - --spec delete_resource(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -%% same for DELETE -delete_resource(RD, Ctx) -> - accept_doc_body(RD, Ctx). - --spec accept_doc_body(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -accept_doc_body(RD0, Ctx0) -> - case preexec(RD0, Ctx0) of - {true, RD, Ctx} -> - call_api_function(RD, Ctx); - FalseWithDetails -> - FalseWithDetails + _:_ -> + false end. --spec call_api_function(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -call_api_function(RD, Ctx = #ctx{result = Result}) - when Result /= undefined -> - lager:debug("Function already executed", []), - {true, RD, Ctx}; -call_api_function(RD, Ctx = #ctx{api_call = put, - table = Table, data = Data}) -> - Mod = riak_ql_ddl:make_module_name(Table), - %% convert records to tuples, just for put - Records = [list_to_tuple(R) || R <- Data], - case riak_kv_ts_util:validate_rows(Mod, Records) of - [] -> - case riak_kv_ts_api:put_data(Records, Table, Mod) of - ok -> - prepare_data_in_body(RD, Ctx#ctx{result = ok}); - {error, {some_failed, ErrorCount}} -> - handle_error({failed_some_puts, ErrorCount, Table}, RD, Ctx); - {error, no_ctype} -> - handle_error({no_such_table, Table}, RD, Ctx) - end; - BadRowIdxs when is_list(BadRowIdxs) -> - handle_error({invalid_data, BadRowIdxs}, RD, Ctx) - end; +-spec post_is_create(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). +post_is_create(RD, Ctx) -> + {false, RD, Ctx}. -call_api_function(RD, Ctx0 = #ctx{api_call = get, - table = Table, key = Key, mod = Mod, - timeout = Timeout}) -> - Options = - if Timeout == undefined -> []; - true -> [{timeout, Timeout}] - end, - case riak_kv_ts_api:get_data(Key, Table, Mod, Options) of - {ok, Record} -> - {ColumnNames, Row} = lists:unzip(Record), - %% ColumnTypes = riak_kv_ts_util:get_column_types(ColumnNames, Mod), - %% We don't need column types here as well (for the PB interface, we - %% needed them in order to properly construct tscells) - DataOut = {ColumnNames, [Row]}, - %% all results (from get as well as query) are returned in - %% a uniform 'tabular' form, hence the [] around Row - Ctx = Ctx0#ctx{result = DataOut}, - prepare_data_in_body(RD, Ctx); - {error, notfound} -> - handle_error(notfound, RD, Ctx0); - {error, {bad_key_length, Got, Need}} -> - handle_error({key_element_count_mismatch, Got, Need}, RD, Ctx0); - {error, Reason} -> - handle_error({riak_error, Reason}, RD, Ctx0) - end; +-spec process_post(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). +process_post(RD, #ctx{mod=Mod, + table=Table}=Ctx) -> + try extract_data(RD) of + Data -> + Records = [list_to_tuple(R) || R <- Data], + case riak_kv_ts_util:validate_rows(Mod, Records) of + [] -> + case riak_kv_ts_api:put_data(Records, Table, Mod) of + ok -> + Json = result_to_json(ok), + Resp = set_json_response(Json, RD), + {true, Resp, Ctx}; + {error, {some_failed, ErrorCount}} -> + Resp = set_error_message("failed some puts ~p ~p", + [ErrorCount, Table], + RD), + {{halt, 400}, Resp, Ctx} + end; + BadRowIdxs when is_list(BadRowIdxs) -> + Resp = set_error_message("invalid data: ~p", + [BadRowIdxs], + RD), + {{halt, 400}, Resp, Ctx} + end + catch + throw:{data_problem,Reason} -> + Resp = set_error_message("wrong body: ~p", Reason, RD), + {{halt, 400}, Resp, Ctx} + end. -call_api_function(RD, Ctx = #ctx{api_call = delete, - table = Table, key = Key, - mod = Mod, - timeout = Timeout}) -> - Options = - if Timeout == undefined -> []; - true -> [{timeout, Timeout}] - end, - case riak_kv_ts_api:delete_data(Key, Table, Mod, Options) of +-spec delete_resource(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()|halt()). +delete_resource(RD, #ctx{table=Table, + mod=Mod, + key=Key, + options=Options}=Ctx) -> + try riak_kv_ts_api:delete_data(Key, Table, Mod, Options) of ok -> - prepare_data_in_body(RD, Ctx#ctx{result = ok}); - {error, {bad_key_length, Got, Need}} -> - handle_error({key_element_count_mismatch, Got, Need}, RD, Ctx); + Json = result_to_json(ok), + Resp = set_json_response(Json, RD), + {true, Resp, Ctx}; {error, notfound} -> - handle_error(notfound, RD, Ctx); - {error, Reason} -> - handle_error({riak_error, Reason}, RD, Ctx) + Resp = set_error_message("object not found", [], RD), + {{halt, 404}, Resp, Ctx} + catch + _:Reason -> + Resp = set_error_message("Internal error: ~p", Reason, RD), + {{halt, 500}, Resp, Ctx} end. - -prepare_data_in_body(RD0, Ctx0) -> - {Json, RD1, Ctx1} = produce_doc_body(RD0, Ctx0), - {true, wrq:append_to_response_body(Json, RD1), Ctx1}. - - --spec produce_doc_body(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -produce_doc_body(RD0, Ctx0 = #ctx{result = undefined}) -> - case preexec(RD0, Ctx0) of - {true, RD1, Ctx1} -> - case call_api_function(RD1, Ctx1) of - {true, RD2, Ctx2} -> - produce_doc_body(RD2, Ctx2); - FalseWithDetails -> - FalseWithDetails - end; - FalseWithDetails -> - FalseWithDetails - end; -produce_doc_body(RD, Ctx = #ctx{result = ok}) -> - {<<"ok">>, RD, Ctx}; -produce_doc_body(RD, Ctx = #ctx{api_call = get, - result = {Columns, Rows}}) -> - {mochijson2:encode( - {struct, [{<<"columns">>, Columns}, - {<<"rows">>, Rows}]}), - RD, Ctx}. - - -error_out(Type, Fmt, Args, RD, Ctx) -> - {Type, - wrq:set_resp_header( - "Content-Type", "text/plain", wrq:append_to_response_body( - flat_format(Fmt, Args), RD)), - Ctx}. - --spec handle_error(atom()|tuple(), #wm_reqdata{}, #ctx{}) -> {tuple(), #wm_reqdata{}, #ctx{}}. -handle_error(Error, RD, Ctx) -> - case Error of - {riak_client_error, Reason} -> - error_out(false, - "Unable to connect to Riak: ~p", [Reason], RD, Ctx); - insecure_connection -> - error_out({halt, 426}, - "Security is enabled and Riak does not" - " accept credentials over HTTP. Try HTTPS instead.", [], RD, Ctx); - {unsupported_version, BadVersion} -> - error_out({halt, 412}, - "Unsupported API version ~s", [BadVersion], RD, Ctx); - {not_permitted, Table} -> - error_out({halt, 401}, - "Access to table ~ts not allowed", [Table], RD, Ctx); - {malformed_request, Method} -> - error_out({halt, 400}, - "Malformed ~s request", [Method], RD, Ctx); - {url_key_bad_method, Method} -> - error_out({halt, 400}, - "Inappropriate ~s request", [Method], RD, Ctx); - {bad_parameter, Param} -> - error_out({halt, 400}, - "Bad value for parameter \"~s\"", [Param], RD, Ctx); - {no_such_table, Table} -> - error_out({halt, 404}, - "Table \"~ts\" does not exist", [Table], RD, Ctx); - {failed_some_puts, NoOfFailures, Table} -> - error_out({halt, 400}, - "Failed to put ~b records to table \"~ts\"", [NoOfFailures, Table], RD, Ctx); - {invalid_data, BadRowIdxs} -> - error_out({halt, 400}, - "Invalid record #~s", [hd(BadRowIdxs)], RD, Ctx); - {key_element_count_mismatch, Got, Need} -> - error_out({halt, 400}, - "Incorrect number of elements (~b) for key of length ~b", [Need, Got], RD, Ctx); - {url_key_bad_key, Table, Key} -> - error_out({halt, 400}, - "Table \"~ts\" has no field named \"~s\"", [Table, Key], RD, Ctx); - {url_key_bad_value, Table, Key} -> - error_out({halt, 400}, - "Bad value for field \"~s\" in table \"~ts\"", [Key, Table], RD, Ctx); - url_unpaired_keys -> - error_out({halt, 400}, - "Unpaired field/value for key spec in URL", [], RD, Ctx); - notfound -> - error_out({halt, 404}, - "Key not found", [], RD, Ctx); - {riak_error, Detailed} -> - error_out({halt, 500}, - "Internal riak error: ~p", [Detailed], RD, Ctx) +extract_data(RD) -> + try + JsonStr = binary_to_list(wrq:req_body(RD)), + mochijson2:decode(JsonStr) + catch + _:Reason -> + throw({data_problem, Reason}) end. +%% -spec extract_data([byte()]) -> undefined|any(). +%% extract_data(Json) -> +%% try mochijson2:decode(Json) of +%% Decoded when is_list(Decoded) -> +%% validate_ts_records(Decoded) +%% catch +%% _:_ -> +%% undefined +%% end. + + +result_to_json(ok) -> + mochijson2:encode([{success, true}]); +result_to_json(_) -> + mochijson2:encode([{some_record, one_day}]). + +set_json_response(Json, RD) -> + wrq:set_resp_header("Content-Type", "application/json", + wrq:append_to_response_body(Json, RD)). + +%% @private +table(RD) -> + utf8_to_binary( + mochiweb_util:unquote( + wrq:path_info(table, RD))). + +%% @private +api_call([], 'POST') -> + put; +api_call(_KeyInURL, 'GET') -> + get; +api_call(_KeyInURL, 'DELETE') -> + delete. + +%% move to util module. +utf8_to_binary(S) -> + unicode:characters_to_binary(S, utf8, utf8). + flat_format(Format, Args) -> lists:flatten(io_lib:format(Format, Args)). -utf8_to_binary(S) -> - unicode:characters_to_binary(S, utf8, utf8). +set_text_resp_header(IoList, RD) -> + wrq:set_resp_header( + "Content-Type", "text/plain", wrq:append_to_response_body(IoList,RD)). + +set_error_message(Format, Args, RD) -> + set_text_resp_header(flat_format(Format, Args), RD). From 027cfc0cd68d2c1336446fd017011817b2c190cd Mon Sep 17 00:00:00 2001 From: andytill Date: Wed, 9 Mar 2016 10:23:54 +0000 Subject: [PATCH 020/122] Make it obvious why the table definition should fail. --- src/riak_kv_console.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/riak_kv_console.erl b/src/riak_kv_console.erl index 1532f36a9a..42894dba48 100644 --- a/src/riak_kv_console.erl +++ b/src/riak_kv_console.erl @@ -1024,7 +1024,7 @@ bucket_type_and_table_error_local_key_test() -> "user varchar not null, ", "time timestamp not null, ", "other varchar not null, ", - "PRIMARY KEY ((series, user, quantum(time, 15, m)), seriesd, user, time, other))">>, + "PRIMARY KEY ((series, user, quantum(time, 15, m)), seriesTYPO, user, time))">>, JSON = json_props([{bucket_type, my_type}, {table_def, TableDef}]), ?assertEqual( From 1b1dcb33dbc2e6b63923fa84fae4d76a6f70de47 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Wed, 9 Mar 2016 11:31:30 +0100 Subject: [PATCH 021/122] Add case for undefined SecContext --- src/riak_kv_wm_timeseries.erl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 359b9fd974..ad3189e503 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -109,7 +109,10 @@ is_authorized(RD, #ctx{table=Table}=Ctx) -> case riak_api_web_security:is_authorized(RD) of false -> {"Basic realm=\"Riak\"", RD, Ctx}; + {true, undefined} -> %% @todo: why is this returned during testing? + {true, RD, Ctx#ctx{api_call=Call}}; {true, SecContext} -> + io:format("SecContext ~p",SecContext), case riak_core_security:check_permission( {riak_kv_ts_util:api_call_to_perm(Call), Table}, SecContext) of {false, Error, _} -> @@ -324,7 +327,7 @@ encodings_provided(RD, Ctx) -> -spec table_module_exists(module()) -> boolean(). table_module_exists(Mod) -> try Mod:get_dll() of - #ddl_v1{} -> + _ -> %#ddl_v1{} -> true catch _:_ -> From a4d7faa45673734d442c6ca2aa75d106a68da874 Mon Sep 17 00:00:00 2001 From: andytill Date: Wed, 9 Mar 2016 14:47:45 +0000 Subject: [PATCH 022/122] 'cleanup' for range keys. --- src/riak_kv_eleveldb_backend.erl | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/riak_kv_eleveldb_backend.erl b/src/riak_kv_eleveldb_backend.erl index 3c3dc87613..0585aaa5b5 100644 --- a/src/riak_kv_eleveldb_backend.erl +++ b/src/riak_kv_eleveldb_backend.erl @@ -468,14 +468,20 @@ range_scan(FoldIndexFun, Buffer, Opts, #state{fold_opts=_FoldOpts, LocalKeyLen = length(LKAST), StartKey1 = key_prefix(Bucket, [Value || {_Name,_Type,Value} <- StartK], LocalKeyLen), EndKey1 = key_prefix(Bucket, [Value || {_Name,_Type,Value} <- EndK], LocalKeyLen), - case lists:member({start_inclusive, false}, W) of - true -> StartKey2 = <>; - false -> StartKey2 = StartKey1 - end, - case lists:member({end_inclusive, true}, W) of - true -> EndKey2 = <>; - false -> EndKey2 = EndKey1 - end, + %% append extra byte to the key when it is not inclusive so that it compares + %% as greater + StartKey2 = + case lists:keyfind(start_inclusive, 1, W) of + {start_inclusive, false} -> <>; + _ -> StartKey1 + end, + %% append extra byte to the key when it is inclusive so that it compares + %% as greater + EndKey2 = + case lists:keyfind(end_inclusive, 1, W) of + {end_inclusive, true} -> <>; + _ -> EndKey1 + end, FoldFun = fun build_list/2, Options = [ {start_key, StartKey2}, From 08905d105ab43493a9c2870a637984560ce9f823 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Thu, 10 Mar 2016 11:08:53 +0100 Subject: [PATCH 023/122] Many small fixes. Basic functionality works. Need to add more tests. --- src/riak_kv_wm_timeseries.erl | 215 ++++++++++++++++++++-------------- 1 file changed, 125 insertions(+), 90 deletions(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index ad3189e503..b558ff9eb7 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -57,6 +57,9 @@ delete_resource/2, resource_exists/2]). +%% webmachine body-producing functions +-export([to_json/2]). + -include_lib("webmachine/include/webmachine.hrl"). -include_lib("riak_ql/include/riak_ql_ddl.hrl"). -include("riak_kv_wm_raw.hrl"). @@ -86,6 +89,9 @@ init(Props) -> {ok, #ctx{prefix = proplists:get_value(prefix, Props), riak = proplists:get_value(riak, Props)}}. + %% {{trace, "/tmp"}, #ctx{prefix = proplists:get_value(prefix, Props), + %% riak = proplists:get_value(riak, Props)}}. +%% wmtrace_resource:add_dispatch_rule("wmtrace", "/tmp"). -spec service_available(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean() | halt()). %% @doc Determine whether or not a connection to Riak @@ -112,7 +118,6 @@ is_authorized(RD, #ctx{table=Table}=Ctx) -> {true, undefined} -> %% @todo: why is this returned during testing? {true, RD, Ctx#ctx{api_call=Call}}; {true, SecContext} -> - io:format("SecContext ~p",SecContext), case riak_core_security:check_permission( {riak_kv_ts_util:api_call_to_perm(Call), Table}, SecContext) of {false, Error, _} -> @@ -197,40 +202,41 @@ content_types_accepted(_, RD, Ctx) -> -spec resource_exists(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean() | halt()). resource_exists(RD, #ctx{mod=Mod} = Ctx) -> - try table_module_exists(Mod) of + case table_module_exists(Mod) of true -> - Path = wrq:path_tokens(RD), - Key = validate_key(Path, Mod), - resource_exists(Path, wrq:method(RD), RD, Ctx#ctx{key=Key}); + resource_exists(wrq:path_tokens(RD), wrq:method(RD), RD, Ctx); false -> Resp = set_error_message("table ~p not created", [Mod], RD), {false, Resp, Ctx} - catch - throw:{key_problem, Reason} -> - Resp = set_error_message("wrong path to element: ~p", [Reason], RD), - {{halt, 400}, Resp, Ctx} end. validate_key(Path, Mod) -> UnquotedPath = lists:map(fun mochiweb_util:unquote/1, Path), - FVList = path_elements_to_key(Mod, UnquotedPath), - ensure_lk_order_and_strip(Mod, FVList). + path_elements(Mod, UnquotedPath). +%% ensure_lk_order_and_strip(Mod, FVList). resource_exists([], 'POST', RD, Ctx) -> {true, RD, Ctx}; resource_exists(Path, 'GET', RD, #ctx{table=Table, mod=Mod, - key=Key, options=Options}=Ctx) -> %% Would be nice if something cheaper than using get_data existed to check %% if a key is present. - try riak_kv_ts_util:get_data(Key, Table, Mod, Options) of - {ok, Record} -> - {true, RD, Ctx#ctx{object=Record}}; - {error, Reason} -> - Resp = set_error_message("Internal error: ~p", Reason, RD), - {{halt, 500}, Resp, Ctx} + try + lager:log(info, self(), "resource_exists(~p, 'GET')", [Path]), + Key = validate_key(Path, Mod), + lager:log(info, self(), "resource_exists: Key=~p", [Key]), + case riak_kv_ts_api:get_data(Key, Table, Mod, Options) of + {ok, Record} -> + {true, RD, Ctx#ctx{object=Record, + key=Key}}; + {error, notfound} -> + {{halt, 404}, RD, Ctx}; + {error, InternalReason} -> + InternalResp = set_error_message("Internal error: ~p", [InternalReason], RD), + {{halt, 500}, InternalResp, Ctx} + end catch _:Reason -> Resp = set_error_message("lookup on ~p failed due to ~p", @@ -238,34 +244,44 @@ resource_exists(Path, 'GET', RD, RD), {false, Resp, Ctx} end; -resource_exists(_Path, 'DELETE', RD, Ctx) -> +resource_exists(Path, 'DELETE', RD, #ctx{mod=Mod}=Ctx) -> %% Since reading the object is expensive we will assume for now that the %% object exists for a delete, but if it turns out that it does not then the %% processing of the delete will return 404 at that point. - {true, RD, Ctx}. + try + Key = validate_key(Path, Mod), + {true, RD, Ctx#ctx{key=Key}} + catch + _:Reason -> + Resp = set_error_message("lookup on ~p failed due to ~p", + [Path, Reason], + RD), + {false, Resp, Ctx} + end. -%% extract keys from path elements in the URL (.../K1/V1/K2/V2 -> -%% [{K1, V1}, {K2, V2}]), check with Table's DDL to make sure keys are +%% extract keys from path elements in the URL (.../K1/V1/K2/V2/... -> +%% [V1, V2, ...]), check with Table's DDL to make sure keys are %% correct and values are of (convertible to) appropriate types, and %% return the KV list %% @private --spec path_elements_to_key(module(), [string()]) -> - [{string(), riak_pb_ts_codec:ldbvalue()}]. -path_elements_to_key(_Mod, []) -> +-spec path_elements(module(), [string()]) -> + [riak_pb_ts_codec:ldbvalue()]. +path_elements(Mod, Path) -> + LK = local_key(Mod), + lager:log(info, self(), "path_elements: LK=~p", [LK]), + Types = [Mod:get_field_type([F]) || F <- LK ], + lager:log(info, self(), "path_elements: Types=~p", [Types]), + LKStr = [ binary_to_list(F) || F <- LK ], + KeyTypes = lists:zip(LKStr, Types), + lager:log(info, self(), "path_elements: KeyTypes=~p, Path=~p", [KeyTypes, Path]), + match_path(Path, KeyTypes). + +match_path([], []) -> []; -path_elements_to_key(Mod, [F,V|Rest]) -> - [convert_fv(Mod, F, V)|path_elements_to_key(Mod, Rest)]. - -%% @private -convert_fv(Mod, FieldRaw, V) -> - Field = [list_to_binary(X) || X <- string:tokens(FieldRaw, ".")], - try - true = Mod:is_field_valid(Field), - convert_field_value(Mod:get_field_type(Field), V) - catch - _:_ -> - throw({url_key_bad_value, Field}) - end. +match_path([F,V|Path], [{F, Type}|KeyTypes]) -> + [convert_field_value(Type, V)|match_path(Path, KeyTypes)]; +match_path(Path, _KeyTypes) -> + throw(io_lib:format("incorrect path ~p", [Path])). %% @private convert_field_value(varchar, V) -> @@ -287,37 +303,6 @@ convert_field_value(timestamp, V) -> throw(url_key_bad_value) end. - -%% validate_ts_record(undefined) -> -%% undefined; -%% validate_ts_record(R) when is_list(R) -> -%% case lists:all( -%% %% check that all list elements are TS types -%% fun(X) -> is_integer(X) orelse is_float(X) orelse is_binary(X) end, -%% R) of -%% true -> -%% R; -%% false -> -%% undefined -%% end; -%% validate_ts_record(_) -> -%% undefined. - -%% validate_ts_records(RR) when is_list(RR) -> -%% case lists:all(fun(R) -> validate_ts_record(R) /= undefined end, RR) of -%% true -> -%% RR; -%% false -> -%% undefined -%% end; -%% validate_ts_records(_) -> -%% undefined. - -ensure_lk_order_and_strip(LK, FVList) -> - [proplists:get_value(F, FVList) - || #param_v1{name = F} <- LK]. - - -spec encodings_provided(#wm_reqdata{}, #ctx{}) -> {[{Encoding::string(), Producer::function()}], #wm_reqdata{}, #ctx{}}. @@ -326,8 +311,8 @@ encodings_provided(RD, Ctx) -> -spec table_module_exists(module()) -> boolean(). table_module_exists(Mod) -> - try Mod:get_dll() of - _ -> %#ddl_v1{} -> + try Mod:get_ddl() of + #ddl_v1{} -> true catch _:_ -> @@ -341,9 +326,9 @@ post_is_create(RD, Ctx) -> -spec process_post(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). process_post(RD, #ctx{mod=Mod, table=Table}=Ctx) -> - try extract_data(RD) of - Data -> - Records = [list_to_tuple(R) || R <- Data], + try extract_data(RD, Mod) of + Records -> + %Records = [], %[list_to_tuple(R) || R <- Data], case riak_kv_ts_util:validate_rows(Mod, Records) of [] -> case riak_kv_ts_api:put_data(Records, Table, Mod) of @@ -365,7 +350,7 @@ process_post(RD, #ctx{mod=Mod, end catch throw:{data_problem,Reason} -> - Resp = set_error_message("wrong body: ~p", Reason, RD), + Resp = set_error_message("wrong body: ~p", [Reason], RD), {{halt, 400}, Resp, Ctx} end. @@ -380,39 +365,89 @@ delete_resource(RD, #ctx{table=Table, Resp = set_json_response(Json, RD), {true, Resp, Ctx}; {error, notfound} -> - Resp = set_error_message("object not found", [], RD), - {{halt, 404}, Resp, Ctx} +% Resp = set_error_message("object not found", [], RD), + {{halt, 404}, RD, Ctx} catch _:Reason -> - Resp = set_error_message("Internal error: ~p", Reason, RD), + Resp = set_error_message("Internal error: ~p", [Reason], RD), {{halt, 500}, Resp, Ctx} end. -extract_data(RD) -> +extract_data(RD, Mod) -> try JsonStr = binary_to_list(wrq:req_body(RD)), - mochijson2:decode(JsonStr) + Json = mochijson2:decode(JsonStr), + lager:log(info, self(), "extract_data: Json=~p", [Json]), + DDLFields = ddl_fields(Mod), + lager:log(info, self(), "extract_data: DDLFields=~p", [DDLFields]), + extract_records(Json, DDLFields) catch - _:Reason -> + Error:Reason -> + lager:log(info, self(), "extract_data: ~p:~p", [Error, Reason]), throw({data_problem, Reason}) end. -%% -spec extract_data([byte()]) -> undefined|any(). -%% extract_data(Json) -> -%% try mochijson2:decode(Json) of -%% Decoded when is_list(Decoded) -> -%% validate_ts_records(Decoded) -%% catch -%% _:_ -> -%% undefined -%% end. +extract_records({struct, _}=Struct, Fields) -> + [json_struct_to_obj(Struct, Fields)]; +extract_records(Structs, Fields) when is_list(Structs) -> + [json_struct_to_obj(S, Fields) || S <- Structs]. + +json_struct_to_obj({struct, FieldValueList}, Fields) -> + List = [ extract_field_value(Field, FieldValueList) + || Field <- Fields], + list_to_tuple(List). + +extract_field_value(#riak_field_v1{name=Name, type=Type}, FVList) -> + case proplists:get_value(Name, FVList) of + undefined -> + throw({data_problem, {missing_field, Name}}); + Value -> + check_field_value(Type, Value) + end. +local_key(Mod) -> + ddl_local_key(Mod:get_ddl()). + +-spec ddl_local_key(#ddl_v1{}) -> [binary()]. +ddl_local_key(#ddl_v1{local_key=LK}) -> + #key_v1{ast=Ast} = LK, + [ param_name(P) || P <- Ast]. + +param_name(#param_v1{name=[Name]}) -> + Name. + +check_field_value(varchar, V) when is_binary(V) -> + V; +check_field_value(sint64, V) when is_integer(V) -> + V; +check_field_value(double, V) when is_number(V) -> + V; +check_field_value(timestamp, V) when is_integer(V), V>0 -> + V; +check_field_value(boolean, V) when is_boolean(V) -> + V; +check_field_value(Type, V) -> + throw({data_problem, {wrong_type, Type, V}}). + +ddl_fields(Mod) -> + #ddl_v1{fields=Fields} = Mod:get_ddl(), + Fields. result_to_json(ok) -> mochijson2:encode([{success, true}]); result_to_json(_) -> mochijson2:encode([{some_record, one_day}]). +to_json(RD, #ctx{api_call=get, object=Object}=Ctx) -> + try + Json = mochijson2:encode(Object), + {Json, RD, Ctx} + catch + _:Reason -> + Resp = set_error_message("object error ~p", [Reason], RD), + {{halt, 500}, Resp, Ctx} + end. + set_json_response(Json, RD) -> wrq:set_resp_header("Content-Type", "application/json", wrq:append_to_response_body(Json, RD)). From 9a18d69ae53717d8f1f93d861c9591c7c7305c23 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Thu, 10 Mar 2016 11:48:43 +0100 Subject: [PATCH 024/122] Changed ddl_fields/1 to ddl_fieldsand_types in prep for moving it to the DDL helper module. --- src/riak_kv_wm_timeseries.erl | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index b558ff9eb7..91aeab6c11 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -369,6 +369,7 @@ delete_resource(RD, #ctx{table=Table, {{halt, 404}, RD, Ctx} catch _:Reason -> + lager:log(info, self(), "delete_resource failed: ~p", Reason), Resp = set_error_message("Internal error: ~p", [Reason], RD), {{halt, 500}, Resp, Ctx} end. @@ -377,10 +378,8 @@ extract_data(RD, Mod) -> try JsonStr = binary_to_list(wrq:req_body(RD)), Json = mochijson2:decode(JsonStr), - lager:log(info, self(), "extract_data: Json=~p", [Json]), - DDLFields = ddl_fields(Mod), - lager:log(info, self(), "extract_data: DDLFields=~p", [DDLFields]), - extract_records(Json, DDLFields) + DDLFieldTypes = ddl_fields_and_types(Mod), + extract_records(Json, DDLFieldTypes) catch Error:Reason -> lager:log(info, self(), "extract_data: ~p:~p", [Error, Reason]), @@ -397,7 +396,7 @@ json_struct_to_obj({struct, FieldValueList}, Fields) -> || Field <- Fields], list_to_tuple(List). -extract_field_value(#riak_field_v1{name=Name, type=Type}, FVList) -> +extract_field_value({Name, Type}, FVList) -> case proplists:get_value(Name, FVList) of undefined -> throw({data_problem, {missing_field, Name}}); @@ -408,6 +407,7 @@ extract_field_value(#riak_field_v1{name=Name, type=Type}, FVList) -> local_key(Mod) -> ddl_local_key(Mod:get_ddl()). +%% this should be in the DDL helper module. -spec ddl_local_key(#ddl_v1{}) -> [binary()]. ddl_local_key(#ddl_v1{local_key=LK}) -> #key_v1{ast=Ast} = LK, @@ -416,22 +416,21 @@ ddl_local_key(#ddl_v1{local_key=LK}) -> param_name(#param_v1{name=[Name]}) -> Name. -check_field_value(varchar, V) when is_binary(V) -> - V; -check_field_value(sint64, V) when is_integer(V) -> - V; -check_field_value(double, V) when is_number(V) -> - V; -check_field_value(timestamp, V) when is_integer(V), V>0 -> - V; -check_field_value(boolean, V) when is_boolean(V) -> - V; +%% @todo: might be better if the DDL helper module had a +%% valid_field_value(Field, Value) -> boolean() function. +check_field_value(varchar, V) when is_binary(V) -> V; +check_field_value(sint64, V) when is_integer(V) -> V; +check_field_value(double, V) when is_number(V) -> V; +check_field_value(timestamp, V) when is_integer(V), V>0 -> V; +check_field_value(boolean, V) when is_boolean(V) -> V; check_field_value(Type, V) -> throw({data_problem, {wrong_type, Type, V}}). -ddl_fields(Mod) -> +%% @todo: this should be in the DDL helper module, so that the records don't +%% leak out of riak_ql. +ddl_fields_and_types(Mod) -> #ddl_v1{fields=Fields} = Mod:get_ddl(), - Fields. + [ {Name, Type} || #riak_field_v1{name=Name, type=Type} <- Fields ]. result_to_json(ok) -> mochijson2:encode([{success, true}]); From 2f794d5eaec6c7d53fd67f47c497ef02a298a7f7 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Thu, 10 Mar 2016 12:19:37 +0100 Subject: [PATCH 025/122] Code re-org. Moved all helper modules to the end of the file. --- src/riak_kv_wm_timeseries.erl | 211 +++++++++++++++++----------------- 1 file changed, 104 insertions(+), 107 deletions(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 91aeab6c11..7ffa8f2e14 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -153,8 +153,7 @@ malformed_request(RD, Ctx) -> malformed_request(wrq:path_tokens(RD), RD, Ctx2) catch throw:ParameterError -> - ErrorMsg = flat_format("parameter error: ~p", [ParameterError]), - Resp = set_text_resp_header(ErrorMsg, RD), + Resp = set_error_message("parameter error: ~p", [ParameterError], RD), {true, Resp, Ctx} end. @@ -168,22 +167,6 @@ malformed_request(KeyInUrl, RD, Ctx) when length(KeyInUrl) rem 2 == 0 -> malformed_request(_, RD, Ctx) -> {true, RD, Ctx}. --spec extract_params([{string(), string()}], #ctx{}) -> #ctx{} . -%% @doc right now we only allow a timeout parameter or nothing. -extract_params([], Ctx) -> - Ctx#ctx{options=[]}; -extract_params([{"timeout", TimeoutStr}], Ctx) -> - try - Timeout = list_to_integer(TimeoutStr), - Ctx#ctx{timeout = Timeout, - options = [{timeout, Timeout}]} - catch - _:_ -> - throw(flat_format("timeout not an integer value: ~s", [TimeoutStr])) - end; -extract_params(Params, _Ctx) -> - throw(flat_format("incorrect paramters: ~p", [Params])). - -spec content_types_provided(#wm_reqdata{}, #ctx{}) -> cb_rv_spec([{string(), atom()}]). content_types_provided(RD, Ctx) -> {[{"application/json", to_json}], @@ -210,11 +193,6 @@ resource_exists(RD, #ctx{mod=Mod} = Ctx) -> {false, Resp, Ctx} end. -validate_key(Path, Mod) -> - UnquotedPath = lists:map(fun mochiweb_util:unquote/1, Path), - path_elements(Mod, UnquotedPath). -%% ensure_lk_order_and_strip(Mod, FVList). - resource_exists([], 'POST', RD, Ctx) -> {true, RD, Ctx}; resource_exists(Path, 'GET', RD, @@ -224,9 +202,7 @@ resource_exists(Path, 'GET', RD, %% Would be nice if something cheaper than using get_data existed to check %% if a key is present. try - lager:log(info, self(), "resource_exists(~p, 'GET')", [Path]), Key = validate_key(Path, Mod), - lager:log(info, self(), "resource_exists: Key=~p", [Key]), case riak_kv_ts_api:get_data(Key, Table, Mod, Options) of {ok, Record} -> {true, RD, Ctx#ctx{object=Record, @@ -259,66 +235,11 @@ resource_exists(Path, 'DELETE', RD, #ctx{mod=Mod}=Ctx) -> {false, Resp, Ctx} end. -%% extract keys from path elements in the URL (.../K1/V1/K2/V2/... -> -%% [V1, V2, ...]), check with Table's DDL to make sure keys are -%% correct and values are of (convertible to) appropriate types, and -%% return the KV list -%% @private --spec path_elements(module(), [string()]) -> - [riak_pb_ts_codec:ldbvalue()]. -path_elements(Mod, Path) -> - LK = local_key(Mod), - lager:log(info, self(), "path_elements: LK=~p", [LK]), - Types = [Mod:get_field_type([F]) || F <- LK ], - lager:log(info, self(), "path_elements: Types=~p", [Types]), - LKStr = [ binary_to_list(F) || F <- LK ], - KeyTypes = lists:zip(LKStr, Types), - lager:log(info, self(), "path_elements: KeyTypes=~p, Path=~p", [KeyTypes, Path]), - match_path(Path, KeyTypes). - -match_path([], []) -> - []; -match_path([F,V|Path], [{F, Type}|KeyTypes]) -> - [convert_field_value(Type, V)|match_path(Path, KeyTypes)]; -match_path(Path, _KeyTypes) -> - throw(io_lib:format("incorrect path ~p", [Path])). - -%% @private -convert_field_value(varchar, V) -> - list_to_binary(V); -convert_field_value(sint64, V) -> - list_to_integer(V); -convert_field_value(double, V) -> - try - list_to_float(V) - catch - error:badarg -> - float(list_to_integer(V)) - end; -convert_field_value(timestamp, V) -> - case list_to_integer(V) of - GoodValue when GoodValue > 0 -> - GoodValue; - _ -> - throw(url_key_bad_value) - end. - -spec encodings_provided(#wm_reqdata{}, #ctx{}) -> - {[{Encoding::string(), Producer::function()}], - #wm_reqdata{}, #ctx{}}. + cb_rv_spec([{Encoding::string(), Producer::function()}]). encodings_provided(RD, Ctx) -> {riak_kv_wm_utils:default_encodings(), RD, Ctx}. --spec table_module_exists(module()) -> boolean(). -table_module_exists(Mod) -> - try Mod:get_ddl() of - #ddl_v1{} -> - true - catch - _:_ -> - false - end. - -spec post_is_create(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). post_is_create(RD, Ctx) -> {false, RD, Ctx}. @@ -328,7 +249,6 @@ process_post(RD, #ctx{mod=Mod, table=Table}=Ctx) -> try extract_data(RD, Mod) of Records -> - %Records = [], %[list_to_tuple(R) || R <- Data], case riak_kv_ts_util:validate_rows(Mod, Records) of [] -> case riak_kv_ts_api:put_data(Records, Table, Mod) of @@ -365,7 +285,6 @@ delete_resource(RD, #ctx{table=Table, Resp = set_json_response(Json, RD), {true, Resp, Ctx}; {error, notfound} -> -% Resp = set_error_message("object not found", [], RD), {{halt, 404}, RD, Ctx} catch _:Reason -> @@ -374,6 +293,95 @@ delete_resource(RD, #ctx{table=Table, {{halt, 500}, Resp, Ctx} end. +-spec to_json(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(iolist()|halt()). +to_json(RD, #ctx{api_call=get, object=Object}=Ctx) -> + try + Json = mochijson2:encode(Object), + {Json, RD, Ctx} + catch + _:Reason -> + Resp = set_error_message("object error ~p", [Reason], RD), + {{halt, 500}, Resp, Ctx} + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% helper functions +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% @todo: this should be in riak_ql_ddl and should probably check deeper. +-spec table_module_exists(module()) -> boolean(). +table_module_exists(Mod) -> + try Mod:get_ddl() of + #ddl_v1{} -> + true + catch + _:_ -> + false + end. + +-spec extract_params([{string(), string()}], #ctx{}) -> #ctx{} . +%% @doc right now we only allow a timeout parameter or nothing. +extract_params([], Ctx) -> + Ctx#ctx{options=[]}; +extract_params([{"timeout", TimeoutStr}], Ctx) -> + try + Timeout = list_to_integer(TimeoutStr), + Ctx#ctx{timeout = Timeout, + options = [{timeout, Timeout}]} + catch + _:_ -> + throw(flat_format("timeout not an integer value: ~s", [TimeoutStr])) + end; +extract_params(Params, _Ctx) -> + throw(flat_format("incorrect paramters: ~p", [Params])). + +validate_key(Path, Mod) -> + UnquotedPath = lists:map(fun mochiweb_util:unquote/1, Path), + path_elements(Mod, UnquotedPath). + +%% extract keys from path elements in the URL (.../K1/V1/K2/V2/... -> +%% [V1, V2, ...]), check with Table's DDL to make sure keys are +%% correct and values are of (convertible to) appropriate types, and +%% return the KV list +%% @private +-spec path_elements(module(), [string()]) -> + [riak_pb_ts_codec:ldbvalue()]. +path_elements(Mod, Path) -> + KeyTypes = local_key_fields_and_types(Mod), + match_path(Path, KeyTypes). + +local_key_fields_and_types(Mod) -> + LK = local_key(Mod), + Types = [Mod:get_field_type([F]) || F <- LK ], + LKStr = [ binary_to_list(F) || F <- LK ], + lists:zip(LKStr, Types). + +match_path([], []) -> + []; +match_path([F,V|Path], [{F, Type}|KeyTypes]) -> + [convert_field_value(Type, V)|match_path(Path, KeyTypes)]; +match_path(Path, _KeyTypes) -> + throw(io_lib:format("incorrect path ~p", [Path])). + +%% @private +convert_field_value(varchar, V) -> + list_to_binary(V); +convert_field_value(sint64, V) -> + list_to_integer(V); +convert_field_value(double, V) -> + try + list_to_float(V) + catch + error:badarg -> + float(list_to_integer(V)) + end; +convert_field_value(timestamp, V) -> + case list_to_integer(V) of + GoodValue when GoodValue > 0 -> + GoodValue; + _ -> + throw(url_key_bad_value) + end. + extract_data(RD, Mod) -> try JsonStr = binary_to_list(wrq:req_body(RD)), @@ -426,30 +434,14 @@ check_field_value(boolean, V) when is_boolean(V) -> V; check_field_value(Type, V) -> throw({data_problem, {wrong_type, Type, V}}). + + %% @todo: this should be in the DDL helper module, so that the records don't %% leak out of riak_ql. ddl_fields_and_types(Mod) -> #ddl_v1{fields=Fields} = Mod:get_ddl(), [ {Name, Type} || #riak_field_v1{name=Name, type=Type} <- Fields ]. -result_to_json(ok) -> - mochijson2:encode([{success, true}]); -result_to_json(_) -> - mochijson2:encode([{some_record, one_day}]). - -to_json(RD, #ctx{api_call=get, object=Object}=Ctx) -> - try - Json = mochijson2:encode(Object), - {Json, RD, Ctx} - catch - _:Reason -> - Resp = set_error_message("object error ~p", [Reason], RD), - {{halt, 500}, Resp, Ctx} - end. - -set_json_response(Json, RD) -> - wrq:set_resp_header("Content-Type", "application/json", - wrq:append_to_response_body(Json, RD)). %% @private table(RD) -> @@ -458,12 +450,13 @@ table(RD) -> wrq:path_info(table, RD))). %% @private -api_call([], 'POST') -> - put; -api_call(_KeyInURL, 'GET') -> - get; -api_call(_KeyInURL, 'DELETE') -> - delete. +api_call([] , 'POST') -> put; +api_call(_KeyInURL, 'GET') -> get; +api_call(_KeyInURL, 'DELETE') -> delete. + +%% @private +result_to_json(ok) -> + mochijson2:encode([{success, true}]). %% move to util module. utf8_to_binary(S) -> @@ -478,3 +471,7 @@ set_text_resp_header(IoList, RD) -> set_error_message(Format, Args, RD) -> set_text_resp_header(flat_format(Format, Args), RD). + +set_json_response(Json, RD) -> + wrq:set_resp_header("Content-Type", "application/json", + wrq:append_to_response_body(Json, RD)). From 480a4baf33a15850df50f4a7de9e79ce6bd9f860 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Thu, 10 Mar 2016 15:16:43 +0100 Subject: [PATCH 026/122] Remove unused macro. --- src/riak_kv_wm_timeseries.erl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 7ffa8f2e14..6fa60969d8 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -77,7 +77,6 @@ riak}). -define(DEFAULT_TIMEOUT, 60000). --define(TABLE_ACTIVATE_WAIT, 30). %% wait until table's bucket type is activated -type cb_rv_spec(T) :: {T, #wm_reqdata{}, #ctx{}}. -type halt() :: {'halt', 200..599} | {'error' , term()}. From d459c5deb3560033309261ed79eda56486b9be9d Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Thu, 10 Mar 2016 15:30:18 +0100 Subject: [PATCH 027/122] Move common functions to riak_kv_wm_ts_util --- src/riak_kv_wm_timeseries.erl | 101 +++++++++++++--------------------- 1 file changed, 38 insertions(+), 63 deletions(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 6fa60969d8..5362975463 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -100,12 +100,12 @@ service_available(RD, #ctx{riak = RiakProps}=Ctx) -> case riak_kv_wm_utils:get_riak_client( RiakProps, riak_kv_wm_utils:get_client_id(RD)) of {ok, _C} -> - Table = table(RD), + Table = riak_kv_wm_ts_util:table_from_request(RD), Mod = riak_ql_ddl:make_module_name(Table), {true, RD, Ctx#ctx{table=Table, mod=Mod}}; {error, Reason} -> - ErrorMsg = flat_format("Unable to connect to Riak: ~p", [Reason]), - Resp = set_text_resp_header(ErrorMsg, RD), + ErrorMsg = riak_kv_wm_ts_util:flat_format("Unable to connect to Riak: ~p", [Reason]), + Resp = riak_kv_wm_ts_util:set_text_resp_header(ErrorMsg, RD), {false, Resp, Ctx} end. @@ -120,14 +120,14 @@ is_authorized(RD, #ctx{table=Table}=Ctx) -> case riak_core_security:check_permission( {riak_kv_ts_util:api_call_to_perm(Call), Table}, SecContext) of {false, Error, _} -> - {utf8_to_binary(Error), RD, Ctx}; - _ -> - {true, RD, Ctx#ctx{api_call=Call}} + {riak_kv_wm_ts_util:utf8_to_binary(Error), RD, Ctx}; + _ -> + {true, RD, Ctx#ctx{api_call=Call}} end; insecure -> ErrorMsg = "Security is enabled and Riak does not" ++ " accept credentials over HTTP. Try HTTPS instead.", - Resp = set_text_resp_header(ErrorMsg, RD), + Resp = riak_kv_wm_ts_util:set_text_resp_header(ErrorMsg, RD), {{halt, 426}, Resp, Ctx} end. @@ -152,7 +152,7 @@ malformed_request(RD, Ctx) -> malformed_request(wrq:path_tokens(RD), RD, Ctx2) catch throw:ParameterError -> - Resp = set_error_message("parameter error: ~p", [ParameterError], RD), + Resp = riak_kv_wm_ts_util:set_error_message("parameter error: ~p", [ParameterError], RD), {true, Resp, Ctx} end. @@ -188,7 +188,7 @@ resource_exists(RD, #ctx{mod=Mod} = Ctx) -> true -> resource_exists(wrq:path_tokens(RD), wrq:method(RD), RD, Ctx); false -> - Resp = set_error_message("table ~p not created", [Mod], RD), + Resp = riak_kv_wm_ts_util:set_error_message("table ~p not created", [Mod], RD), {false, Resp, Ctx} end. @@ -209,14 +209,14 @@ resource_exists(Path, 'GET', RD, {error, notfound} -> {{halt, 404}, RD, Ctx}; {error, InternalReason} -> - InternalResp = set_error_message("Internal error: ~p", [InternalReason], RD), + InternalResp = riak_kv_wm_ts_util:set_error_message("Internal error: ~p", [InternalReason], RD), {{halt, 500}, InternalResp, Ctx} end catch _:Reason -> - Resp = set_error_message("lookup on ~p failed due to ~p", - [Path, Reason], - RD), + Resp = riak_kv_wm_ts_util:set_error_message("lookup on ~p failed due to ~p", + [Path, Reason], + RD), {false, Resp, Ctx} end; resource_exists(Path, 'DELETE', RD, #ctx{mod=Mod}=Ctx) -> @@ -228,9 +228,9 @@ resource_exists(Path, 'DELETE', RD, #ctx{mod=Mod}=Ctx) -> {true, RD, Ctx#ctx{key=Key}} catch _:Reason -> - Resp = set_error_message("lookup on ~p failed due to ~p", - [Path, Reason], - RD), + Resp = riak_kv_wm_ts_util:set_error_message("lookup on ~p failed due to ~p", + [Path, Reason], + RD), {false, Resp, Ctx} end. @@ -253,23 +253,23 @@ process_post(RD, #ctx{mod=Mod, case riak_kv_ts_api:put_data(Records, Table, Mod) of ok -> Json = result_to_json(ok), - Resp = set_json_response(Json, RD), + Resp = riak_kv_wm_ts_util:set_json_response(Json, RD), {true, Resp, Ctx}; {error, {some_failed, ErrorCount}} -> - Resp = set_error_message("failed some puts ~p ~p", - [ErrorCount, Table], - RD), + Resp = riak_kv_wm_ts_util:set_error_message("failed some puts ~p ~p", + [ErrorCount, Table], + RD), {{halt, 400}, Resp, Ctx} end; BadRowIdxs when is_list(BadRowIdxs) -> - Resp = set_error_message("invalid data: ~p", - [BadRowIdxs], - RD), + Resp = riak_kv_wm_ts_util:set_error_message("invalid data: ~p", + [BadRowIdxs], + RD), {{halt, 400}, Resp, Ctx} end catch throw:{data_problem,Reason} -> - Resp = set_error_message("wrong body: ~p", [Reason], RD), + Resp = riak_kv_wm_ts_util:set_error_message("wrong body: ~p", [Reason], RD), {{halt, 400}, Resp, Ctx} end. @@ -281,16 +281,16 @@ delete_resource(RD, #ctx{table=Table, try riak_kv_ts_api:delete_data(Key, Table, Mod, Options) of ok -> Json = result_to_json(ok), - Resp = set_json_response(Json, RD), - {true, Resp, Ctx}; - {error, notfound} -> - {{halt, 404}, RD, Ctx} - catch - _:Reason -> - lager:log(info, self(), "delete_resource failed: ~p", Reason), - Resp = set_error_message("Internal error: ~p", [Reason], RD), - {{halt, 500}, Resp, Ctx} - end. + Resp = riak_kv_wm_ts_util:set_json_response(Json, RD), + {true, Resp, Ctx}; + {error, notfound} -> + {{halt, 404}, RD, Ctx} + catch + _:Reason -> + lager:log(info, self(), "delete_resource failed: ~p", Reason), + Resp = riak_kv_wm_ts_util:set_error_message("Internal error: ~p", [Reason], RD), + {{halt, 500}, Resp, Ctx} + end. -spec to_json(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(iolist()|halt()). to_json(RD, #ctx{api_call=get, object=Object}=Ctx) -> @@ -299,7 +299,7 @@ to_json(RD, #ctx{api_call=get, object=Object}=Ctx) -> {Json, RD, Ctx} catch _:Reason -> - Resp = set_error_message("object error ~p", [Reason], RD), + Resp = riak_kv_wm_ts_util:set_error_message("object error ~p", [Reason], RD), {{halt, 500}, Resp, Ctx} end. @@ -328,10 +328,10 @@ extract_params([{"timeout", TimeoutStr}], Ctx) -> options = [{timeout, Timeout}]} catch _:_ -> - throw(flat_format("timeout not an integer value: ~s", [TimeoutStr])) + throw(riak_kv_wm_ts_util:flat_format("timeout not an integer value: ~s", [TimeoutStr])) end; extract_params(Params, _Ctx) -> - throw(flat_format("incorrect paramters: ~p", [Params])). + throw(riak_kv_wm_ts_util:flat_format("incorrect paramters: ~p", [Params])). validate_key(Path, Mod) -> UnquotedPath = lists:map(fun mochiweb_util:unquote/1, Path), @@ -441,13 +441,6 @@ ddl_fields_and_types(Mod) -> #ddl_v1{fields=Fields} = Mod:get_ddl(), [ {Name, Type} || #riak_field_v1{name=Name, type=Type} <- Fields ]. - -%% @private -table(RD) -> - utf8_to_binary( - mochiweb_util:unquote( - wrq:path_info(table, RD))). - %% @private api_call([] , 'POST') -> put; api_call(_KeyInURL, 'GET') -> get; @@ -455,22 +448,4 @@ api_call(_KeyInURL, 'DELETE') -> delete. %% @private result_to_json(ok) -> - mochijson2:encode([{success, true}]). - -%% move to util module. -utf8_to_binary(S) -> - unicode:characters_to_binary(S, utf8, utf8). - -flat_format(Format, Args) -> - lists:flatten(io_lib:format(Format, Args)). - -set_text_resp_header(IoList, RD) -> - wrq:set_resp_header( - "Content-Type", "text/plain", wrq:append_to_response_body(IoList,RD)). - -set_error_message(Format, Args, RD) -> - set_text_resp_header(flat_format(Format, Args), RD). - -set_json_response(Json, RD) -> - wrq:set_resp_header("Content-Type", "application/json", - wrq:append_to_response_body(Json, RD)). + mochijson2:encode([{success, true}]). \ No newline at end of file From 344ad14a26eb9c24046aacac6d128c5e3031fd9a Mon Sep 17 00:00:00 2001 From: andytill Date: Thu, 10 Mar 2016 14:44:26 +0000 Subject: [PATCH 028/122] Increment query start keys if they are not start inclusive, to hash to the correct partition for the first key we want to return. --- src/riak_kv_qry_compiler.erl | 40 ++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/src/riak_kv_qry_compiler.erl b/src/riak_kv_qry_compiler.erl index f8ac62b150..c6f2517054 100644 --- a/src/riak_kv_qry_compiler.erl +++ b/src/riak_kv_qry_compiler.erl @@ -477,7 +477,7 @@ find_quantum_field_index_in_key2([_|Tail], Index) -> find_quantum_field_index_in_key2(Tail, Index+1). %% -hash_timestamp_to_quanta(QField, QSize, QUnit, QIndex, MaxSubQueries, Where) -> +hash_timestamp_to_quanta(QField, QSize, QUnit, QIndex, MaxSubQueries, Where1) -> GetMaxMinFun = fun({startkey, List}, {_S, E}) -> {element(3, lists:nth(QIndex, List)), E}; ({endkey, List}, {S, _E}) -> @@ -485,23 +485,37 @@ hash_timestamp_to_quanta(QField, QSize, QUnit, QIndex, MaxSubQueries, Where) -> (_, {S, E}) -> {S, E} end, - {Min, Max} = lists:foldl(GetMaxMinFun, {"", ""}, Where), - EffMin = case proplists:get_value(start_inclusive, Where, true) of - true -> Min; - false -> Min + 1 - end, - EffMax = case proplists:get_value(end_inclusive, Where, false) of - true -> Max + 1; - false -> Max - end, + {Min1, Max1} = lists:foldl(GetMaxMinFun, {"", ""}, Where1), + %% if the start range is not inclusive then add one and remove the + %% start_inclusive flag. This is so that the query start key hashes to the + %% correct quanta when it is on the boundary since the start_inclusive flag + %% is not taken into account in the partition hashing. For example given a + %% one second quantum `mytime > 1999` should return keys with mytime greater + %% than 2000 but will hash to the quantum before 2000 and receive no results + %% from it. + case lists:keytake(start_inclusive, 1, Where1) of + {value, {start_inclusive, false}, WhereX} -> + Where2 = WhereX, + Min2 = Min1 + 1; + _ -> + Where2 = Where1, + Min2 = Min1 + end, + Max2 = + case proplists:get_value(end_inclusive, Where2, false) of + true -> Max1 + 1; + false -> Max1 + end, {NoSubQueries, Boundaries} = - riak_ql_quanta:quanta(EffMin, EffMax, QSize, QUnit), + riak_ql_quanta:quanta(Min2, Max2, QSize, QUnit), if NoSubQueries == 1 -> - [Where]; + [Where2]; NoSubQueries > 1 andalso (MaxSubQueries == undefined orelse NoSubQueries =< MaxSubQueries) -> - make_wheres(Where, QField, Min, Max, Boundaries); + %% use the maximum value that has not been incremented, we still use + %% the end_inclusive flag because the end key is not used to hash + make_wheres(Where2, QField, Min2, Max1, Boundaries); NoSubQueries > MaxSubQueries -> {error, {too_many_subqueries, NoSubQueries}} end. From 63bdb3be4d47cbe64c0b0a33ce9eec264f8af829 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Thu, 10 Mar 2016 15:58:25 +0100 Subject: [PATCH 029/122] Use common authorize function in riak_kv_wm_timeseries and riak_kv_wm_timeserise_listkeys --- src/riak_kv_wm_timeseries.erl | 25 ++++---------- src/riak_kv_wm_timeseries_listkeys.erl | 48 +++++++++++++------------- 2 files changed, 31 insertions(+), 42 deletions(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 5362975463..7a35823f97 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -111,24 +111,13 @@ service_available(RD, #ctx{riak = RiakProps}=Ctx) -> is_authorized(RD, #ctx{table=Table}=Ctx) -> Call = api_call(wrq:path_tokens(RD), wrq:method(RD)), - case riak_api_web_security:is_authorized(RD) of - false -> - {"Basic realm=\"Riak\"", RD, Ctx}; - {true, undefined} -> %% @todo: why is this returned during testing? + case riak_kv_wm_ts_util:authorize(Call, Table, RD) of + ok -> {true, RD, Ctx#ctx{api_call=Call}}; - {true, SecContext} -> - case riak_core_security:check_permission( - {riak_kv_ts_util:api_call_to_perm(Call), Table}, SecContext) of - {false, Error, _} -> - {riak_kv_wm_ts_util:utf8_to_binary(Error), RD, Ctx}; - _ -> - {true, RD, Ctx#ctx{api_call=Call}} - end; - insecure -> - ErrorMsg = "Security is enabled and Riak does not" ++ - " accept credentials over HTTP. Try HTTPS instead.", - Resp = riak_kv_wm_ts_util:set_text_resp_header(ErrorMsg, RD), - {{halt, 426}, Resp, Ctx} + {error, ErrorMsg} -> + {ErrorMsg, RD, Ctx}; + {insecure, Halt, Resp} -> + {Halt, Resp, Ctx} end. -spec forbidden(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). @@ -448,4 +437,4 @@ api_call(_KeyInURL, 'DELETE') -> delete. %% @private result_to_json(ok) -> - mochijson2:encode([{success, true}]). \ No newline at end of file + mochijson2:encode([{success, true}]). diff --git a/src/riak_kv_wm_timeseries_listkeys.erl b/src/riak_kv_wm_timeseries_listkeys.erl index 6460b531fa..52adf0e34f 100644 --- a/src/riak_kv_wm_timeseries_listkeys.erl +++ b/src/riak_kv_wm_timeseries_listkeys.erl @@ -50,11 +50,11 @@ -include("riak_kv_wm_raw.hrl"). -include_lib("webmachine/include/webmachine.hrl"). --record(ctx, {api_version, - riak, +-record(ctx, {riak, security, client, - table :: undefined | binary() + table :: undefined | binary(), + mod :: module() }). -define(CB_RV_SPEC, {boolean()|atom()|tuple(), #wm_reqdata{}, #ctx{}}). @@ -65,9 +65,7 @@ %% @doc Initialize this resource. This function extracts the %% 'prefix' and 'riak' properties from the dispatch args. init(Props) -> - {ok, #ctx{api_version = proplists:get_value(api_version, Props), - riak = proplists:get_value(riak, Props), - table = proplists:get_value(table, Props)}}. + {ok, #ctx{riak = proplists:get_value(riak, Props)}}. -spec service_available(#wm_reqdata{}, #ctx{}) -> {boolean(), #wm_reqdata{}, #ctx{}}. @@ -77,29 +75,29 @@ service_available(RD, Ctx = #ctx{riak = RiakProps}) -> case riak_kv_wm_utils:get_riak_client( RiakProps, riak_kv_wm_utils:get_client_id(RD)) of {ok, C} -> + Table = riak_kv_wm_ts_util:table_from_request(RD), + Mod = riak_ql_ddl:make_module_name(Table), {true, RD, - Ctx#ctx{api_version = wrq:path_info(api_version, RD), - client = C, - table = utf8_to_binary( - mochiweb_util:unquote( - wrq:path_info(table, RD))) - }}; + Ctx#ctx{client = C, + table = Table, + mod = Mod}}; {error, Reason} -> handle_error({riak_client_error, Reason}, RD, Ctx) end. -is_authorized(RD, Ctx) -> - case riak_api_web_security:is_authorized(RD) of - false -> - {"Basic realm=\"Riak\"", RD, Ctx}; - {true, SecContext} -> - {true, RD, Ctx#ctx{security = SecContext}}; - insecure -> - handle_error(insecure_connection, RD, Ctx) +is_authorized(RD, #ctx{table=Table}=Ctx) -> + case riak_kv_wm_ts_util:authorize(listkeys, Table, RD) of + ok -> + {true, RD, Ctx}; + {error, ErrorMsg} -> + {ErrorMsg, RD, Ctx}; + {insecure, Halt, Resp} -> + {Halt, Resp, Ctx} end. + -spec forbidden(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. forbidden(RD, Ctx) -> case riak_kv_wm_utils:is_forbidden(RD) of @@ -130,10 +128,12 @@ check_permissions(RD, Ctx = #ctx{security = Security, -spec malformed_request(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -malformed_request(RD, Ctx = #ctx{api_version = "v1"}) -> - {false, RD, Ctx}; -malformed_request(RD, Ctx = #ctx{api_version = UnsupportedVersion}) -> - handle_error({unsupported_version, UnsupportedVersion}, RD, Ctx). +malformed_request(RD, Ctx) -> + {false, RD, Ctx}. +%% malformed_request(RD, Ctx = #ctx{api_version = "v1"}) -> +%% {false, RD, Ctx}; +%% malformed_request(RD, Ctx = #ctx{api_version = UnsupportedVersion}) -> +%% handle_error({unsupported_version, UnsupportedVersion}, RD, Ctx). -spec allowed_methods(#wm_reqdata{}, #ctx{}) -> From 68b9cd81a66c02fe911df17c40be4aae79468cca Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Thu, 10 Mar 2016 15:58:57 +0100 Subject: [PATCH 030/122] Initial commit of riak_kv_wm_ts_util --- src/riak_kv_wm_ts_util.erl | 80 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 src/riak_kv_wm_ts_util.erl diff --git a/src/riak_kv_wm_ts_util.erl b/src/riak_kv_wm_ts_util.erl new file mode 100644 index 0000000000..71a6c5de54 --- /dev/null +++ b/src/riak_kv_wm_ts_util.erl @@ -0,0 +1,80 @@ +%% ------------------------------------------------------------------- +%% +%% riak_kv_wm_ts_util: utility functions for riak_kv_wm_timeseries* resources. +%% +%% Copyright (c) 2016 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +-module(riak_kv_wm_ts_util). + + +-export([table_from_request/1]). +-export([utf8_to_binary/1]). +-export([flat_format/2]). +-export([set_text_resp_header/2]). +-export([set_error_message/3]). +-export([set_json_response/2]). + +-export([authorize/3]). + + +%% @private +table_from_request(RD) -> + utf8_to_binary( + mochiweb_util:unquote( + wrq:path_info(table, RD))). + +%% move to util module. +utf8_to_binary(S) -> + unicode:characters_to_binary(S, utf8, utf8). + +flat_format(Format, Args) -> + lists:flatten(io_lib:format(Format, Args)). + +set_text_resp_header(IoList, RD) -> + wrq:set_resp_header( + "Content-Type", "text/plain", wrq:append_to_response_body(IoList,RD)). + +set_error_message(Format, Args, RD) -> + set_text_resp_header(flat_format(Format, Args), RD). + +set_json_response(Json, RD) -> + wrq:set_resp_header("Content-Type", "application/json", + wrq:append_to_response_body(Json, RD)). + + + +authorize(Call, Table, RD) -> + case riak_api_web_security:is_authorized(RD) of + false -> + {error, "Basic realm=\"Riak\""}; + {true, undefined} -> %% @todo: why is this returned during testing? + ok; + {true, SecContext} -> + case riak_core_security:check_permission( + {riak_kv_ts_util:api_call_to_perm(Call), Table}, SecContext) of + {false, Error, _} -> + {error, utf8_to_binary(Error)}; + _ -> + ok + end; + insecure -> + ErrorMsg = "Security is enabled and Riak does not" ++ + " accept credentials over HTTP. Try HTTPS instead.", + Resp = set_text_resp_header(ErrorMsg, RD), + {insecure, {halt, 426}, Resp} + end. \ No newline at end of file From 826e641f2e005bfcbf6476ec262f1cccf195bd70 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Thu, 10 Mar 2016 16:11:36 +0100 Subject: [PATCH 031/122] riak_kv_wm_timeseries_listkey refactorings * removed malformed_request * changed CB_RV_SPEC to cb_rv_spec(T) type * simplified forbidden/2 * removed check_permissions/2 as it is no longer needed (is_authorized/2 handles that job now) * removed utf8_to_binary/1 as it is no longer used (riak_kv_wm_ts_util has it for those functions who needs it). --- src/riak_kv_wm_timeseries_listkeys.erl | 54 ++++---------------------- 1 file changed, 8 insertions(+), 46 deletions(-) diff --git a/src/riak_kv_wm_timeseries_listkeys.erl b/src/riak_kv_wm_timeseries_listkeys.erl index 52adf0e34f..a16fc9d4e9 100644 --- a/src/riak_kv_wm_timeseries_listkeys.erl +++ b/src/riak_kv_wm_timeseries_listkeys.erl @@ -40,7 +40,6 @@ allowed_methods/2, is_authorized/2, forbidden/2, - malformed_request/2, resource_exists/2, content_types_provided/2, encodings_provided/2, @@ -57,7 +56,7 @@ mod :: module() }). --define(CB_RV_SPEC, {boolean()|atom()|tuple(), #wm_reqdata{}, #ctx{}}). +-type cb_rv_spec(T) :: {T, #wm_reqdata{}, #ctx{}}. -define(DEFAULT_TIMEOUT, 60000). @@ -98,53 +97,21 @@ is_authorized(RD, #ctx{table=Table}=Ctx) -> --spec forbidden(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +-spec forbidden(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). forbidden(RD, Ctx) -> - case riak_kv_wm_utils:is_forbidden(RD) of - true -> - {true, RD, Ctx}; - false -> - case check_permissions(RD, Ctx) of - {true, RD1, Ctx1} -> - {false, RD1, Ctx1}; - ErrorAlreadyReported -> - ErrorAlreadyReported - end - end. - --spec check_permissions(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -check_permissions(RD, Ctx = #ctx{security = undefined}) -> - {true, RD, Ctx}; -check_permissions(RD, Ctx = #ctx{security = Security, - table = Table}) -> - case riak_core_security:check_permission( - {riak_kv_ts_util:api_call_to_perm(listkeys), Table}, Security) of - {false, Error, _} -> - handle_error( - {not_permitted, utf8_to_binary(Error)}, RD, Ctx); - _ -> - {true, RD, Ctx} - end. + Result = riak_kv_wm_utils:is_forbidden(RD), + {Result, RD, Ctx}. --spec malformed_request(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -malformed_request(RD, Ctx) -> - {false, RD, Ctx}. -%% malformed_request(RD, Ctx = #ctx{api_version = "v1"}) -> -%% {false, RD, Ctx}; -%% malformed_request(RD, Ctx = #ctx{api_version = UnsupportedVersion}) -> -%% handle_error({unsupported_version, UnsupportedVersion}, RD, Ctx). --spec allowed_methods(#wm_reqdata{}, #ctx{}) -> - {[atom()], #wm_reqdata{}, #ctx{}}. +-spec allowed_methods(#wm_reqdata{}, #ctx{}) -> cb_rv_spec([atom()]). %% @doc Get the list of methods this resource supports. allowed_methods(RD, Ctx) -> {['GET'], RD, Ctx}. --spec resource_exists(#wm_reqdata{}, #ctx{}) -> - {boolean(), #wm_reqdata{}, #ctx{}}. +-spec resource_exists(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). resource_exists(RD, #ctx{table = Table} = Ctx) -> Mod = riak_ql_ddl:make_module_name(Table), case catch Mod:get_ddl() of @@ -156,16 +123,14 @@ resource_exists(RD, #ctx{table = Table} = Ctx) -> -spec encodings_provided(#wm_reqdata{}, #ctx{}) -> - {[{Encoding::string(), Producer::function()}], - #wm_reqdata{}, #ctx{}}. + cb_rv_spec([{Encoding::string(), Producer::function()}]). %% @doc List the encodings available for representing this resource. %% "identity" and "gzip" are available. encodings_provided(RD, Ctx) -> {riak_kv_wm_utils:default_encodings(), RD, Ctx}. -spec content_types_provided(#wm_reqdata{}, #ctx{}) -> - {[{ContentType::string(), Producer::atom()}], - #wm_reqdata{}, #ctx{}}. + cb_rv_spec([{ContentType::string(), Producer::atom()}]). %% @doc List the content types available for representing this resource. content_types_provided(RD, Ctx) -> {[{"application/json", produce_doc_body}], RD, Ctx}. @@ -235,6 +200,3 @@ handle_error(Error, RD, Ctx) -> flat_format(Format, Args) -> lists:flatten(io_lib:format(Format, Args)). - -utf8_to_binary(S) -> - unicode:characters_to_binary(S, utf8, utf8). From eff0b47b6c6c895df7e41a63a8583cdd4cdab81a Mon Sep 17 00:00:00 2001 From: "John R. Daily" Date: Thu, 10 Mar 2016 10:38:36 -0500 Subject: [PATCH 032/122] Test feedback from Andrei on efficiency --- src/riak_kv_pb_timeseries.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/riak_kv_pb_timeseries.erl b/src/riak_kv_pb_timeseries.erl index 87f3b7fbcd..eb5ad67632 100644 --- a/src/riak_kv_pb_timeseries.erl +++ b/src/riak_kv_pb_timeseries.erl @@ -402,7 +402,7 @@ create_batches(Rows, MaxSize) -> create_batches(Rows, MaxSize, MaxSize, [], []). create_batches([], _Counter, _Max, ThisBatch, AllBatches) -> - AllBatches ++ [ThisBatch]; + [ThisBatch|AllBatches]; create_batches(Rows, 0, Max, ThisBatch, AllBatches) -> create_batches(Rows, Max, Max, [], AllBatches ++ [ThisBatch]); create_batches([H|T], Counter, Max, ThisBatch, AllBatches) -> From 6577fd5b745bf9b8893f41f146bf608ad74524bf Mon Sep 17 00:00:00 2001 From: Jon Meredith Date: Wed, 3 Feb 2016 09:45:59 -0700 Subject: [PATCH 033/122] Proof of concept for INSERT statement. Adds support in the PB endpoint for supporting INSERT statements. There are a couple of open issues 1) DDL should be updated to find field indices. 2) As we add new statements/capabilities we should reduce duplication between different endpoints (PB, HTTP, shell), so do we need to refactor how describe and insert work into a common place. --- src/riak_kv_pb_timeseries.erl | 93 ++++++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 1 deletion(-) diff --git a/src/riak_kv_pb_timeseries.erl b/src/riak_kv_pb_timeseries.erl index ccef05265d..65d97d6db3 100644 --- a/src/riak_kv_pb_timeseries.erl +++ b/src/riak_kv_pb_timeseries.erl @@ -184,7 +184,10 @@ process(M = ?SQL_SELECT{'FROM' = Table}, State) -> check_table_and_call(Table, fun sub_tsqueryreq/4, M, State); process(M = #riak_sql_describe_v1{'DESCRIBE' = Table}, State) -> - check_table_and_call(Table, fun sub_tsqueryreq/4, M, State). + check_table_and_call(Table, fun sub_tsqueryreq/4, M, State); + +process(M = #riak_sql_insert_v1{table =Table}, State) -> + check_table_and_call(Table, fun sub_insert/4, M, State). %% There is no two-tuple variants of process_stream for tslistkeysresp @@ -255,6 +258,94 @@ wait_until_active(Table, State, Seconds) -> %% functions called from check_table_and_call, one per ts* request %% --------------------------------------------------- + +%% +%% INSERT statements, called from check_table_and_call. +%% +sub_insert(Mod, DDL, #riak_sql_insert_v1{table = Table, fields = Fields, values = Values}, State) -> + case lookup_insert_fields(DDL, Fields) of + {error, FieldReason} -> + {reply, make_rpberrresp(?E_BAD_QUERY, FieldReason), State}; + {ok, FieldDesc} -> + Empty = empty(DDL), + case make_putdata(Values, FieldDesc, Empty) of + {error, ValueReason} -> + {reply, make_rpberrresp(?E_BAD_QUERY, ValueReason), State}; + {ok, Data} -> + sub_putreq_common(Mod, Table, Data, State) + end + end. + +%% +%% Return an all-null empty row ready to be populated by the values +%% +empty(#ddl_v1{fields = Fields}) -> + list_to_tuple(lists:duplicate(length(Fields), undefined)). + +%% +%% Lookup the index of the field names selected to insert. If no field names are given +%% use the positions defined in the DDL. This *requires* that once schema changes +%% take place the DDL fields are left in order. +%% +%% TODO: Field position lookup should be implemented as part of the DDL module rather +%% than digging through #ddl_v1{} records to make this future proof through upgrades. +%% +lookup_insert_fields(#ddl_v1{fields = Fields}, undefined) -> + {ok, [Pos || #riak_field_v1{position = Pos} <- Fields]}; +lookup_insert_fields(#ddl_v1{fields = Fields}, FieldIdentifiers) -> + case lists:foldl( + fun({identifier, FieldName}, {Good, Bad}) -> + case lists:keyfind(FieldName, #riak_field_v1.name, Fields) of + false -> + {Good, [flat_format("undefined field ~s", [FieldName]) | Bad]}; + #riak_field_v1{position = Pos} -> + {[Pos | Good], Bad} + end + end, {[], []}, FieldIdentifiers) + of + {Pos, []} -> + {ok, lists:reverse(Pos)}; + {_, Errors} -> + %% Only returns the first error, could investigate returning multiple. + {error, hd(lists:reverse(Errors))} + end. + +%% +%% Map the list of values from statement order into the correct place in the tuple. +%% If there are less values given than the field list the NULL will carry through +%% and the general validation rules should pick that up. +%% If there are too many values given for the fields it returns an error. +%% +make_putdata(Values, FieldsPos, Empty) -> + case lists:foldl( + fun(RowVals, {Good, Bad, RowNum}) -> + case make_row(RowVals, FieldsPos, Empty) of + {ok, Row} -> + {[Row | Good], Bad, RowNum + 1}; + {error, Reason} -> + Reason1 = flat_format("~s in row index ~b", + [Reason, RowNum]), + {Good, [Reason1 | Bad], RowNum + 1} + end + end, {[], [], 1}, Values) of + {PutData, [], _} -> + {ok, lists:reverse(PutData)}; + {_, Errors, _} -> + %% Only returns the first error, could investigate returning multiple. + {error, lists:last(Errors)} + end. + +make_row([], _FieldsPos, Row) -> + %% Out of entries in the value - row is populated with default values + %% so if run out of data for implicit/explicit fieldnames can just return + {ok, Row}; +make_row(_, [], _Row) -> + %% Too many values for the field + {error, "too many values"}; +make_row([{_Type, Val} | Fields], [Pos | FieldsPos], Row) -> + make_row(Fields, FieldsPos, setelement(Pos, Row, Val)). + + %% ----------- %% put %% ----------- From 21889eafa76ff14b13b665db220053e8c1a485eb Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Thu, 10 Mar 2016 17:12:03 +0100 Subject: [PATCH 034/122] Moved table_module_exists to util module. Simpler resource_exists for riak_kv_wm_timeseries_listkeys. --- src/riak_kv_wm_timeseries.erl | 16 +--------------- src/riak_kv_wm_timeseries_listkeys.erl | 10 ++-------- src/riak_kv_wm_ts_util.erl | 18 +++++++++++++++++- 3 files changed, 20 insertions(+), 24 deletions(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 7a35823f97..27c3a2c27f 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -173,7 +173,7 @@ content_types_accepted(_, RD, Ctx) -> -spec resource_exists(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean() | halt()). resource_exists(RD, #ctx{mod=Mod} = Ctx) -> - case table_module_exists(Mod) of + case riak_kv_wm_ts_util:table_module_exists(Mod) of true -> resource_exists(wrq:path_tokens(RD), wrq:method(RD), RD, Ctx); false -> @@ -292,20 +292,6 @@ to_json(RD, #ctx{api_call=get, object=Object}=Ctx) -> {{halt, 500}, Resp, Ctx} end. -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% helper functions -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% @todo: this should be in riak_ql_ddl and should probably check deeper. --spec table_module_exists(module()) -> boolean(). -table_module_exists(Mod) -> - try Mod:get_ddl() of - #ddl_v1{} -> - true - catch - _:_ -> - false - end. - -spec extract_params([{string(), string()}], #ctx{}) -> #ctx{} . %% @doc right now we only allow a timeout parameter or nothing. extract_params([], Ctx) -> diff --git a/src/riak_kv_wm_timeseries_listkeys.erl b/src/riak_kv_wm_timeseries_listkeys.erl index a16fc9d4e9..37416e29c7 100644 --- a/src/riak_kv_wm_timeseries_listkeys.erl +++ b/src/riak_kv_wm_timeseries_listkeys.erl @@ -112,14 +112,8 @@ allowed_methods(RD, Ctx) -> -spec resource_exists(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). -resource_exists(RD, #ctx{table = Table} = Ctx) -> - Mod = riak_ql_ddl:make_module_name(Table), - case catch Mod:get_ddl() of - {_, {undef, _}} -> - handle_error({no_such_table, Table}, RD, Ctx); - _ -> - {true, RD, Ctx} - end. +resource_exists(RD, #ctx{mod=Mod} = Ctx) -> + {riak_kv_wm_utils:table_module_exists(Mod), RD, Ctx}. -spec encodings_provided(#wm_reqdata{}, #ctx{}) -> diff --git a/src/riak_kv_wm_ts_util.erl b/src/riak_kv_wm_ts_util.erl index 71a6c5de54..6265502085 100644 --- a/src/riak_kv_wm_ts_util.erl +++ b/src/riak_kv_wm_ts_util.erl @@ -31,6 +31,8 @@ -export([authorize/3]). +-export([table_module_exists/1]). + %% @private table_from_request(RD) -> @@ -77,4 +79,18 @@ authorize(Call, Table, RD) -> " accept credentials over HTTP. Try HTTPS instead.", Resp = set_text_resp_header(ErrorMsg, RD), {insecure, {halt, 426}, Resp} - end. \ No newline at end of file + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% helper functions +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% @todo: this should be in riak_ql_ddl and should probably check deeper. +-spec table_module_exists(module()) -> boolean(). +table_module_exists(Mod) -> + try Mod:get_ddl() of + _DDL -> %#ddl_v1{} -> + true + catch + _:_ -> + false + end. \ No newline at end of file From f2fc0bdec2f134f30eee524d01cfd5250d654eae Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Thu, 10 Mar 2016 17:17:45 +0100 Subject: [PATCH 035/122] rtimeseries_listkeys service available simplified and functions removed. * error_out, handle_error and flat_format removed as they are no longer used. --- src/riak_kv_wm_timeseries_listkeys.erl | 35 ++------------------------ 1 file changed, 2 insertions(+), 33 deletions(-) diff --git a/src/riak_kv_wm_timeseries_listkeys.erl b/src/riak_kv_wm_timeseries_listkeys.erl index 37416e29c7..7e6ef6e7b2 100644 --- a/src/riak_kv_wm_timeseries_listkeys.erl +++ b/src/riak_kv_wm_timeseries_listkeys.erl @@ -81,7 +81,8 @@ service_available(RD, Ctx = #ctx{riak = RiakProps}) -> table = Table, mod = Mod}}; {error, Reason} -> - handle_error({riak_client_error, Reason}, RD, Ctx) + Resp = riak_kv_wm_ts_util:set_error_message("Unable to connect to Riak: ~p", [Reason], RD), + {false, Resp, Ctx} end. @@ -162,35 +163,3 @@ ts_keys_to_json(Keys) -> KeysTerm = [tuple_to_list(sext:decode(A)) || A <- Keys, A /= []], mochijson2:encode({struct, [{<<"keys">>, KeysTerm}]}). - - -error_out(Type, Fmt, Args, RD, Ctx) -> - {Type, - wrq:set_resp_header( - "Content-Type", "text/plain", wrq:append_to_response_body( - flat_format(Fmt, Args), RD)), - Ctx}. - --spec handle_error(atom()|tuple(), #wm_reqdata{}, #ctx{}) -> {tuple(), #wm_reqdata{}, #ctx{}}. -handle_error(Error, RD, Ctx) -> - case Error of - {riak_client_error, Reason} -> - error_out(false, - "Unable to connect to Riak: ~p", [Reason], RD, Ctx); - insecure_connection -> - error_out({halt, 426}, - "Security is enabled and Riak does not" - " accept credentials over HTTP. Try HTTPS instead.", [], RD, Ctx); - {unsupported_version, BadVersion} -> - error_out({halt, 412}, - "Unsupported API version ~s", [BadVersion], RD, Ctx); - {not_permitted, Table} -> - error_out({halt, 401}, - "Access to table ~ts not allowed", [Table], RD, Ctx); - {no_such_table, Table} -> - error_out({halt, 404}, - "Table \"~ts\" does not exist", [Table], RD, Ctx) - end. - -flat_format(Format, Args) -> - lists:flatten(io_lib:format(Format, Args)). From 3c436226fc998df1a78ea5145e936afba9b192cf Mon Sep 17 00:00:00 2001 From: Brett Hazen Date: Wed, 2 Mar 2016 16:28:09 -0700 Subject: [PATCH 036/122] Integrate Jon's POC and get INSERT statement working --- include/riak_kv_ts.hrl | 11 +- src/riak_kv_pb_timeseries.erl | 191 +++++++++++++++++++++++----------- src/riak_kv_qry.erl | 5 +- src/riak_kv_ts_util.erl | 15 ++- 4 files changed, 155 insertions(+), 67 deletions(-) diff --git a/include/riak_kv_ts.hrl b/include/riak_kv_ts.hrl index 3b3c9816ee..97e8d338fc 100644 --- a/include/riak_kv_ts.hrl +++ b/include/riak_kv_ts.hrl @@ -26,7 +26,7 @@ %% For dialyzer types -include_lib("riak_ql/include/riak_ql_ddl.hrl"). -%% the result type of a query, rows means to return all mataching rows, aggregate +%% the result type of a query, rows means to return all matching rows, aggregate %% returns one row calculated from the result set for the query. -type select_result_type() :: rows | aggregate. @@ -59,7 +59,14 @@ -record(riak_sql_describe_v1, { - 'DESCRIBE' = <<>> :: binary() + 'DESCRIBE' = <<>> :: binary() + }). + +-record(riak_sql_insert_v1, + { + 'INSERT' = <<>> :: binary(), + fields :: [{identifier, binary()}], + values :: list(list(term())) }). -define(SQL_SELECT, #riak_select_v1). diff --git a/src/riak_kv_pb_timeseries.erl b/src/riak_kv_pb_timeseries.erl index 65d97d6db3..c72897d3ff 100644 --- a/src/riak_kv_pb_timeseries.erl +++ b/src/riak_kv_pb_timeseries.erl @@ -74,7 +74,8 @@ -type ts_responses() :: #tsputresp{} | #tsdelresp{} | #tsgetresp{} | #tslistkeysresp{} | #tsqueryresp{} | #rpberrorresp{}. --type ts_query_types() :: #ddl_v1{} | ?SQL_SELECT{} | #riak_sql_describe_v1{}. +-type ts_query_types() :: #ddl_v1{} | ?SQL_SELECT{} | #riak_sql_describe_v1{} | + #riak_sql_insert_v1{}. -type process_retval() :: {reply, RpbOrTsMessage::tuple(), #state{}}. @@ -89,7 +90,7 @@ decode(Code, Bin) -> Msg = riak_pb_codec:decode(Code, Bin), case Msg of #tsqueryreq{query = Q, cover_context = Cover} -> - %% convert error returns to ok's, this menas it will be passed into + %% convert error returns to ok's, this means it will be passed into %% process which will not process it and return the error. case catch decode_query(Q, Cover) of {ok, DecodedQuery} -> @@ -128,6 +129,8 @@ decode_query(#tsinterpolation{ base = BaseQuery }, Cover) -> riak_kv_ts_util:build_sql_record(select, SQL, Cover); {describe, SQL} -> riak_kv_ts_util:build_sql_record(describe, SQL, Cover); + {insert, SQL} -> + riak_kv_ts_util:build_sql_record(insert, SQL, Cover); {ddl, DDL} -> {ok, DDL}; Other -> @@ -141,8 +144,9 @@ decode_query_permissions(#ddl_v1{table = NewBucketType}) -> decode_query_permissions(?SQL_SELECT{'FROM' = Table}) -> {"riak_kv.ts_query", Table}; decode_query_permissions(#riak_sql_describe_v1{'DESCRIBE' = Table}) -> - {"riak_kv.ts_describe", Table}. - + {"riak_kv.ts_describe", Table}; +decode_query_permissions(#riak_sql_insert_v1{'INSERT' = Table}) -> + {"riak_kv.ts_insert", Table}. -spec encode(tuple()) -> {ok, iolist()}. @@ -186,9 +190,8 @@ process(M = ?SQL_SELECT{'FROM' = Table}, State) -> process(M = #riak_sql_describe_v1{'DESCRIBE' = Table}, State) -> check_table_and_call(Table, fun sub_tsqueryreq/4, M, State); -process(M = #riak_sql_insert_v1{table =Table}, State) -> - check_table_and_call(Table, fun sub_insert/4, M, State). - +process(M = #riak_sql_insert_v1{'INSERT' = Table}, State) -> + check_table_and_call(Table, fun sub_tsqueryreq/4, M, State). %% There is no two-tuple variants of process_stream for tslistkeysresp %% as TS list_keys senders always use backpressure. @@ -262,49 +265,57 @@ wait_until_active(Table, State, Seconds) -> %% %% INSERT statements, called from check_table_and_call. %% -sub_insert(Mod, DDL, #riak_sql_insert_v1{table = Table, fields = Fields, values = Values}, State) -> - case lookup_insert_fields(DDL, Fields) of +-spec make_insert_response(module(), #riak_sql_insert_v1{}) -> + #tsqueryresp{} | #rpberrorresp{}. +make_insert_response(Mod, #riak_sql_insert_v1{'INSERT' = Table, fields = Fields, values = Values}) -> + case lookup_field_positions(Mod, Fields) of {error, FieldReason} -> - {reply, make_rpberrresp(?E_BAD_QUERY, FieldReason), State}; - {ok, FieldDesc} -> - Empty = empty(DDL), - case make_putdata(Values, FieldDesc, Empty) of - {error, ValueReason} -> - {reply, make_rpberrresp(?E_BAD_QUERY, ValueReason), State}; - {ok, Data} -> - sub_putreq_common(Mod, Table, Data, State) + make_rpberrresp(?E_BAD_QUERY, FieldReason); + {ok, Positions} -> + Empty = make_empty_row(Mod), + case xlate_insert_to_putdata(Values, Positions, Empty) of + {error, ValueReason} -> + make_rpberrresp(?E_BAD_QUERY, ValueReason); + {ok, Data} -> + Response = sub_putreq_common(Mod, Table, Data, #state{}), + xlate_put_to_qry_resp(Response) end end. +xlate_put_to_qry_resp({reply, #tsputresp{}, _State}) -> + #tsqueryresp{}; +xlate_put_to_qry_resp({reply, Err, _State}) -> + Err. + + %% %% Return an all-null empty row ready to be populated by the values %% -empty(#ddl_v1{fields = Fields}) -> - list_to_tuple(lists:duplicate(length(Fields), undefined)). +-spec make_empty_row(module()) -> tuple(undefined). +make_empty_row(Mod) -> + Positions = Mod:get_field_positions(), + list_to_tuple(lists:duplicate(length(Positions), undefined)). %% -%% Lookup the index of the field names selected to insert. If no field names are given -%% use the positions defined in the DDL. This *requires* that once schema changes -%% take place the DDL fields are left in order. +%% Lookup the index of the field names selected to insert. %% -%% TODO: Field position lookup should be implemented as part of the DDL module rather -%% than digging through #ddl_v1{} records to make this future proof through upgrades. +%% This *requires* that once schema changes take place the DDL fields are left in order. %% -lookup_insert_fields(#ddl_v1{fields = Fields}, undefined) -> - {ok, [Pos || #riak_field_v1{position = Pos} <- Fields]}; -lookup_insert_fields(#ddl_v1{fields = Fields}, FieldIdentifiers) -> +-spec lookup_field_positions(module(), [identifier()]) -> + {ok, [pos_integer()]} | {error, string()}. +lookup_field_positions(Mod, FieldIdentifiers) -> case lists:foldl( fun({identifier, FieldName}, {Good, Bad}) -> - case lists:keyfind(FieldName, #riak_field_v1.name, Fields) of - false -> - {Good, [flat_format("undefined field ~s", [FieldName]) | Bad]}; - #riak_field_v1{position = Pos} -> - {[Pos | Good], Bad} + case Mod:is_field_valid(FieldName) of + false -> + {Good, [flat_format("undefined field ~s", [FieldName]) | Bad]}; + true -> + {[Mod:get_field_position(FieldName) | Good], Bad} end end, {[], []}, FieldIdentifiers) of - {Pos, []} -> - {ok, lists:reverse(Pos)}; + {Positions, []} -> + {ok, lists:reverse(Positions)}; {_, Errors} -> %% Only returns the first error, could investigate returning multiple. {error, hd(lists:reverse(Errors))} @@ -316,34 +327,40 @@ lookup_insert_fields(#ddl_v1{fields = Fields}, FieldIdentifiers) -> %% and the general validation rules should pick that up. %% If there are too many values given for the fields it returns an error. %% -make_putdata(Values, FieldsPos, Empty) -> - case lists:foldl( - fun(RowVals, {Good, Bad, RowNum}) -> - case make_row(RowVals, FieldsPos, Empty) of - {ok, Row} -> - {[Row | Good], Bad, RowNum + 1}; - {error, Reason} -> - Reason1 = flat_format("~s in row index ~b", - [Reason, RowNum]), - {Good, [Reason1 | Bad], RowNum + 1} - end - end, {[], [], 1}, Values) of - {PutData, [], _} -> - {ok, lists:reverse(PutData)}; - {_, Errors, _} -> - %% Only returns the first error, could investigate returning multiple. - {error, lists:last(Errors)} +-spec xlate_insert_to_putdata([[riak_ql_ddl:data_value()]], [pos_integer()], tuple(undefined)) -> + {ok, [tuple()]} | {error, string()}. +xlate_insert_to_putdata(Values, Positions, Empty) -> + ConvFn = fun(RowVals, {Good, Bad, RowNum}) -> + case make_insert_row(RowVals, Positions, Empty) of + {ok, Row} -> + {[Row | Good], Bad, RowNum + 1}; + {error, Reason} -> + Reason1 = flat_format("~s in row index ~b", + [Reason, RowNum]), + {Good, [Reason1 | Bad], RowNum + 1} + end + end, + Converted = lists:foldl(ConvFn, {[], [], 1}, Values), + case Converted of + {PutData, [], _} -> + {ok, lists:reverse(PutData)}; + {_, Errors, _} -> + %% Only returns the first error, could investigate returning multiple. + {error, lists:last(Errors)} end. -make_row([], _FieldsPos, Row) -> +-spec make_insert_row([] | [riak_ql_ddl:data_value()], [pos_integer()], tuple()) -> + {ok, tuple()} | {error, string()}. +make_insert_row([], _Positions, Row) -> %% Out of entries in the value - row is populated with default values - %% so if run out of data for implicit/explicit fieldnames can just return + %% so if we run out of data for implicit/explicit fieldnames can just return {ok, Row}; -make_row(_, [], _Row) -> +make_insert_row(_, [], _Row) -> %% Too many values for the field {error, "too many values"}; -make_row([{_Type, Val} | Fields], [Pos | FieldsPos], Row) -> - make_row(Fields, FieldsPos, setelement(Pos, Row, Val)). +%% Make sure the types match +make_insert_row([{_Type, Val} | Values], [Pos | Positions], Row) -> + make_insert_row(Values, Positions, setelement(Pos, Row, Val)). %% ----------- @@ -743,12 +760,14 @@ compile(Mod, {ok, ?SQL_SELECT{}=SQL}) -> %% query %% -sub_tsqueryreq(_Mod, DDL, SQL, State) -> +-spec sub_tsqueryreq(module(), #ddl_v1{}, + ?SQL_SELECT{} | #riak_sql_describe_v1{} | #riak_sql_insert_v1{}, + #state{}) -> + {reply, #tsqueryresp{} | #rpberrorresp{}, #state{}}. +sub_tsqueryreq(Mod, DDL, SQL, State) -> case riak_kv_qry:submit(SQL, DDL) of - {ok, Data} when element(1, SQL) =:= ?SQL_SELECT_RECORD_NAME -> - {reply, make_tsqueryresp(Data), State}; - {ok, Data} when element(1, SQL) =:= riak_sql_describe_v1 -> - {reply, make_describe_response(Data), State}; + {ok, Data} -> + {reply, make_tsquery_resp(Mod, SQL, Data), State}; %% parser messages have a tuple for Reason: {error, {E, Reason}} when is_atom(E), is_binary(Reason) -> @@ -769,6 +788,12 @@ sub_tsqueryreq(_Mod, DDL, SQL, State) -> {reply, make_rpberrresp(?E_SUBMIT, to_string(Reason)), State} end. +make_tsquery_resp(_Mod, ?SQL_SELECT{}, Data) -> + make_tsqueryresp(Data); +make_tsquery_resp(_Mod, #riak_sql_describe_v1{}, Data) -> + make_describe_response(Data); +make_tsquery_resp(Mod, SQL = #riak_sql_insert_v1{}, _Data) -> + make_insert_response(Mod, SQL). %% --------------------------------------------------- %% local functions @@ -1009,4 +1034,48 @@ validate_rows_error_response_2_test() -> validate_rows_error_response(["1", "2", "3"]) ). +validate_make_insert_row_basic_test() -> + Data = [{integer,4}, {binary,<<"bamboozle">>}, {float, 3.14}], + Positions = [3, 1, 2], + Row = {undefined, undefined, undefined}, + Result = make_insert_row(Data, Positions, Row), + ?assertEqual( + {ok, {<<"bamboozle">>, 3.14, 4}}, + Result + ). + +validate_make_insert_row_too_many_test() -> + Data = [{integer,4}, {binary,<<"bamboozle">>}, {float, 3.14}, {integer, 8}], + Positions = [3, 1, 2], + Row = {undefined, undefined, undefined}, + Result = make_insert_row(Data, Positions, Row), + ?assertEqual( + {error, "too many values"}, + Result + ). + + +validate_xlate_insert_to_putdata_ok_test() -> + Empty = list_to_tuple(lists:duplicate(5, undefined)), + Values = [[{integer, 4}, {binary, <<"babs">>}, {float, 5.67}, {binary, <<"bingo">>}], + [{integer, 8}, {binary, <<"scat">>}, {float, 7.65}, {binary, <<"yolo!">>}]], + Positions = [5, 3, 1, 2, 4], + Result = xlate_insert_to_putdata(Values, Positions, Empty), + ?assertEqual( + {ok,[{5.67,<<"bingo">>,<<"babs">>,undefined,4}, + {7.65,<<"yolo!">>,<<"scat">>,undefined,8}]}, + Result + ). + +validate_xlate_insert_to_putdata_too_many_values_test() -> + Empty = list_to_tuple(lists:duplicate(5, undefined)), + Values = [[{integer, 4}, {binary, <<"babs">>}, {float, 5.67}, {binary, <<"bingo">>}, {integer, 7}], + [{integer, 8}, {binary, <<"scat">>}, {float, 7.65}, {binary, <<"yolo!">>}]], + Positions = [3, 1, 2, 4], + Result = xlate_insert_to_putdata(Values, Positions, Empty), + ?assertEqual( + {error,"too many values in row index 1"}, + Result + ). + -endif. diff --git a/src/riak_kv_qry.erl b/src/riak_kv_qry.erl index 8eb397e703..c3d1bad379 100644 --- a/src/riak_kv_qry.erl +++ b/src/riak_kv_qry.erl @@ -33,7 +33,7 @@ -include("riak_kv_ts.hrl"). %% No coverage plan for parallel requests --spec submit(string() | ?SQL_SELECT{} | #riak_sql_describe_v1{}, #ddl_v1{}) -> +-spec submit(string() | ?SQL_SELECT{} | #riak_sql_describe_v1{} | #riak_sql_insert_v1{}, #ddl_v1{}) -> {ok, any()} | {error, any()}. %% @doc Parse, validate against DDL, and submit a query for execution. %% To get the results of running the query, use fetch/1. @@ -49,6 +49,9 @@ submit(SQLString, DDL) when is_list(SQLString) -> submit(#riak_sql_describe_v1{}, DDL) -> describe_table_columns(DDL); +submit(SQL = #riak_sql_insert_v1{}, _DDL) -> + {ok, SQL}; + submit(SQL = ?SQL_SELECT{}, DDL) -> maybe_submit_to_queue(SQL, DDL). diff --git a/src/riak_kv_ts_util.erl b/src/riak_kv_ts_util.erl index a8e3376fe6..c507eb8d93 100644 --- a/src/riak_kv_ts_util.erl +++ b/src/riak_kv_ts_util.erl @@ -79,7 +79,15 @@ build_sql_record(select, SQL, Cover) -> end; build_sql_record(describe, SQL, _Cover) -> D = proplists:get_value(identifier, SQL), - {ok, #riak_sql_describe_v1{'DESCRIBE' = D}}. + {ok, #riak_sql_describe_v1{'DESCRIBE' = D}}; +build_sql_record(insert, SQL, _Cover) -> + T = proplists:get_value(table, SQL), + F = proplists:get_value(fields, SQL), + V = proplists:get_value(values, SQL), + {ok, #riak_sql_insert_v1{'INSERT' = T, + fields = F, + values = V + }}. %% Useful key extractors for functions (e.g., in get or delete code @@ -100,10 +108,11 @@ table_to_bucket(Table) when is_binary(Table) -> {Table, Table}. --spec queried_table(#riak_sql_describe_v1{} | ?SQL_SELECT{}) -> binary(). +-spec queried_table(#riak_sql_describe_v1{} | ?SQL_SELECT{} | #riak_sql_insert_v1{}) -> binary(). %% Extract table name from various sql records. queried_table(#riak_sql_describe_v1{'DESCRIBE' = Table}) -> Table; -queried_table(?SQL_SELECT{'FROM' = Table}) -> Table. +queried_table(?SQL_SELECT{'FROM' = Table}) -> Table; +queried_table(#riak_sql_insert_v1{'INSERT' = Table}) -> Table. -spec get_table_ddl(binary()) -> From 2a8677d31970013a31a83cdde2954d401ddfce22 Mon Sep 17 00:00:00 2001 From: "John R. Daily" Date: Thu, 10 Mar 2016 16:45:40 -0500 Subject: [PATCH 037/122] Use lists:split instead of recursively touching each record, and fix tests for new reversed order per the suggestion from Andrei --- src/riak_kv_pb_timeseries.erl | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/src/riak_kv_pb_timeseries.erl b/src/riak_kv_pb_timeseries.erl index eb5ad67632..c2dd4d089b 100644 --- a/src/riak_kv_pb_timeseries.erl +++ b/src/riak_kv_pb_timeseries.erl @@ -393,20 +393,16 @@ estimated_row_count(SampleRowSize, MaxBatchSize) -> RowSizeFudged = (SampleRowSize * 10) div 9, MaxBatchSize div RowSizeFudged. -create_batches(Rows, MaxSize) when length(Rows) =< MaxSize -> - [Rows]; -%% May be a more efficient way to do this. Take a list of arbitrary -%% data (expected to be a list of lists for this use case) and create -%% a list of MaxSize lists. create_batches(Rows, MaxSize) -> - create_batches(Rows, MaxSize, MaxSize, [], []). - -create_batches([], _Counter, _Max, ThisBatch, AllBatches) -> - [ThisBatch|AllBatches]; -create_batches(Rows, 0, Max, ThisBatch, AllBatches) -> - create_batches(Rows, Max, Max, [], AllBatches ++ [ThisBatch]); -create_batches([H|T], Counter, Max, ThisBatch, AllBatches) -> - create_batches(T, Counter-1, Max, ThisBatch ++ [H], AllBatches). + create_batches(Rows, MaxSize, []). + +create_batches([], _MaxSize, Accum) -> + Accum; +create_batches(Rows, MaxSize, Accum) when length(Rows) < MaxSize -> + [Rows|Accum]; +create_batches(Rows, MaxSize, Accum) -> + {First, Rest} = lists:split(MaxSize, Rows), + create_batches(Rest, MaxSize, [First|Accum]). %%%%%%%% add_preflists(PartitionedData, NVal, UpNodes) -> @@ -980,15 +976,15 @@ validate_rows_error_response_2_test() -> ). batch_1_test() -> - ?assertEqual([[1, 2, 3, 4], [5, 6, 7, 8], [9]], + ?assertEqual(lists:reverse([[1, 2, 3, 4], [5, 6, 7, 8], [9]]), create_batches([1, 2, 3, 4, 5, 6, 7, 8, 9], 4)). batch_2_test() -> - ?assertEqual([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10]], + ?assertEqual(lists:reverse([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10]]), create_batches([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 4)). batch_3_test() -> - ?assertEqual([[1, 2, 3], [4, 5, 6], [7, 8, 9]], + ?assertEqual(lists:reverse([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), create_batches([1, 2, 3, 4, 5, 6, 7, 8, 9], 3)). batch_undersized1_test() -> @@ -1000,6 +996,6 @@ batch_undersized2_test() -> create_batches([1, 2, 3, 4, 5, 6], 7)). batch_almost_undersized_test() -> - ?assertEqual([[1, 2, 3, 4, 5], [6]], + ?assertEqual(lists:reverse([[1, 2, 3, 4, 5], [6]]), create_batches([1, 2, 3, 4, 5, 6], 5)). -endif. From 803f0e64647ec145edbf9fc0f9ba63fe2aedff5a Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Fri, 11 Mar 2016 12:17:25 +0100 Subject: [PATCH 038/122] moved local_key(Mod) to wm_ts_util --- src/riak_kv_wm_timeseries.erl | 25 +++---------------------- src/riak_kv_wm_ts_util.erl | 33 +++++++++++++++++++++++++++++---- 2 files changed, 32 insertions(+), 26 deletions(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 27c3a2c27f..02a36f290a 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -104,8 +104,7 @@ service_available(RD, #ctx{riak = RiakProps}=Ctx) -> Mod = riak_ql_ddl:make_module_name(Table), {true, RD, Ctx#ctx{table=Table, mod=Mod}}; {error, Reason} -> - ErrorMsg = riak_kv_wm_ts_util:flat_format("Unable to connect to Riak: ~p", [Reason]), - Resp = riak_kv_wm_ts_util:set_text_resp_header(ErrorMsg, RD), + Resp = riak_kv_wm_ts_util:set_error_message("Unable to connect to Riak: ~p", [Reason], RD), {false, Resp, Ctx} end. @@ -276,7 +275,7 @@ delete_resource(RD, #ctx{table=Table, {{halt, 404}, RD, Ctx} catch _:Reason -> - lager:log(info, self(), "delete_resource failed: ~p", Reason), + lager:log(info, self(), "delete_resource failed: ~p", [Reason]), Resp = riak_kv_wm_ts_util:set_error_message("Internal error: ~p", [Reason], RD), {{halt, 500}, Resp, Ctx} end. @@ -320,15 +319,9 @@ validate_key(Path, Mod) -> -spec path_elements(module(), [string()]) -> [riak_pb_ts_codec:ldbvalue()]. path_elements(Mod, Path) -> - KeyTypes = local_key_fields_and_types(Mod), + KeyTypes = riak_kv_wm_ts_util:local_key_fields_and_types(Mod), match_path(Path, KeyTypes). -local_key_fields_and_types(Mod) -> - LK = local_key(Mod), - Types = [Mod:get_field_type([F]) || F <- LK ], - LKStr = [ binary_to_list(F) || F <- LK ], - lists:zip(LKStr, Types). - match_path([], []) -> []; match_path([F,V|Path], [{F, Type}|KeyTypes]) -> @@ -386,18 +379,6 @@ extract_field_value({Name, Type}, FVList) -> check_field_value(Type, Value) end. -local_key(Mod) -> - ddl_local_key(Mod:get_ddl()). - -%% this should be in the DDL helper module. --spec ddl_local_key(#ddl_v1{}) -> [binary()]. -ddl_local_key(#ddl_v1{local_key=LK}) -> - #key_v1{ast=Ast} = LK, - [ param_name(P) || P <- Ast]. - -param_name(#param_v1{name=[Name]}) -> - Name. - %% @todo: might be better if the DDL helper module had a %% valid_field_value(Field, Value) -> boolean() function. check_field_value(varchar, V) when is_binary(V) -> V; diff --git a/src/riak_kv_wm_ts_util.erl b/src/riak_kv_wm_ts_util.erl index 6265502085..c11615e026 100644 --- a/src/riak_kv_wm_ts_util.erl +++ b/src/riak_kv_wm_ts_util.erl @@ -33,6 +33,15 @@ -export([table_module_exists/1]). +-export([local_key/1]). + +-export([local_key_fields_and_types/1]). + +-include_lib("webmachine/include/webmachine.hrl"). +-include_lib("riak_ql/include/riak_ql_ddl.hrl"). +-include("riak_kv_wm_raw.hrl"). +-include("riak_kv_ts.hrl"). + %% @private table_from_request(RD) -> @@ -81,9 +90,6 @@ authorize(Call, Table, RD) -> {insecure, {halt, 426}, Resp} end. -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% helper functions -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% @todo: this should be in riak_ql_ddl and should probably check deeper. -spec table_module_exists(module()) -> boolean(). table_module_exists(Mod) -> @@ -93,4 +99,23 @@ table_module_exists(Mod) -> catch _:_ -> false - end. \ No newline at end of file + end. + + +local_key(Mod) -> + ddl_local_key(Mod:get_ddl()). + +%% this should be in the DDL helper module. +-spec ddl_local_key(#ddl_v1{}) -> [binary()]. +ddl_local_key(#ddl_v1{local_key=LK}) -> + #key_v1{ast=Ast} = LK, + [ param_name(P) || P <- Ast]. + +param_name(#param_v1{name=[Name]}) -> + Name. + +local_key_fields_and_types(Mod) -> + LK = local_key(Mod), + Types = [Mod:get_field_type([F]) || F <- LK ], + LKStr = [ binary_to_list(F) || F <- LK ], + lists:zip(LKStr, Types). \ No newline at end of file From af62807f0b5022e3dce65b7de3480d4b9019f668 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Fri, 11 Mar 2016 12:18:09 +0100 Subject: [PATCH 039/122] timeseries_listkeys refactored and produces html --- src/riak_kv_wm_timeseries_listkeys.erl | 86 ++++++++++++++++---------- 1 file changed, 53 insertions(+), 33 deletions(-) diff --git a/src/riak_kv_wm_timeseries_listkeys.erl b/src/riak_kv_wm_timeseries_listkeys.erl index 7e6ef6e7b2..69e33f443e 100644 --- a/src/riak_kv_wm_timeseries_listkeys.erl +++ b/src/riak_kv_wm_timeseries_listkeys.erl @@ -42,9 +42,10 @@ forbidden/2, resource_exists/2, content_types_provided/2, - encodings_provided/2, - produce_doc_body/2 - ]). + encodings_provided/2]). + +%% webmachine body-producing functions +-export([produce_doc_body/2]). -include("riak_kv_wm_raw.hrl"). -include_lib("webmachine/include/webmachine.hrl"). @@ -85,7 +86,6 @@ service_available(RD, Ctx = #ctx{riak = RiakProps}) -> {false, Resp, Ctx} end. - is_authorized(RD, #ctx{table=Table}=Ctx) -> case riak_kv_wm_ts_util:authorize(listkeys, Table, RD) of ok -> @@ -96,26 +96,19 @@ is_authorized(RD, #ctx{table=Table}=Ctx) -> {Halt, Resp, Ctx} end. - - -spec forbidden(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). forbidden(RD, Ctx) -> Result = riak_kv_wm_utils:is_forbidden(RD), {Result, RD, Ctx}. - - - -spec allowed_methods(#wm_reqdata{}, #ctx{}) -> cb_rv_spec([atom()]). %% @doc Get the list of methods this resource supports. allowed_methods(RD, Ctx) -> {['GET'], RD, Ctx}. - -spec resource_exists(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). resource_exists(RD, #ctx{mod=Mod} = Ctx) -> - {riak_kv_wm_utils:table_module_exists(Mod), RD, Ctx}. - + {riak_kv_wm_ts_util:table_module_exists(Mod), RD, Ctx}. -spec encodings_provided(#wm_reqdata{}, #ctx{}) -> cb_rv_spec([{Encoding::string(), Producer::function()}]). @@ -128,38 +121,65 @@ encodings_provided(RD, Ctx) -> cb_rv_spec([{ContentType::string(), Producer::atom()}]). %% @doc List the content types available for representing this resource. content_types_provided(RD, Ctx) -> - {[{"application/json", produce_doc_body}], RD, Ctx}. + {[{"text/html", produce_doc_body}], RD, Ctx}. - -produce_doc_body(RD, Ctx = #ctx{table = Table, +produce_doc_body(RD, Ctx = #ctx{table = Table, mod=Mod, client = Client}) -> - F = fun() -> - {ok, ReqId} = riak_client:stream_list_keys( - {Table, Table}, undefined, Client), - stream_keys(ReqId) - end, - {{stream, {<<>>, F}}, RD, Ctx}. - -stream_keys(ReqId) -> + {ok, ReqId} = riak_client:stream_list_keys( + {Table, Table}, undefined, Client), + lager:log(info, self(), "in produce_doc_body ~p", [Table]), + {{halt, 200}, wrq:set_resp_body({stream, prepare_stream(ReqId, Table, Mod)}, RD), Ctx}. + +prepare_stream(ReqId, Table, Mod) -> + {<<"">>, fun() -> stream_keys(ReqId, Table, Mod) end}. + +stream_keys(ReqId, Table, Mod) -> receive %% skip empty shipments {ReqId, {keys, []}} -> - stream_keys(ReqId); + stream_keys(ReqId, Table, Mod); {ReqId, From, {keys, []}} -> _ = riak_kv_keys_fsm:ack_keys(From), - stream_keys(ReqId); + stream_keys(ReqId, Table, Mod); {ReqId, From, {keys, Keys}} -> _ = riak_kv_keys_fsm:ack_keys(From), - {ts_keys_to_json(Keys), fun() -> stream_keys(ReqId) end}; + {ts_keys_to_html(Keys, Table, Mod), fun() -> stream_keys(ReqId, Table, Mod) end}; {ReqId, {keys, Keys}} -> - {ts_keys_to_json(Keys), fun() -> stream_keys(ReqId) end}; + {ts_keys_to_html(Keys, Table, Mod), fun() -> stream_keys(ReqId, Table, Mod) end}; {ReqId, done} -> - {<<>>, done}; + {<<"">>, done}; {ReqId, {error, timeout}} -> - {mochijson2:encode({struct, [{error, timeout}]}), done} + {mochijson2:encode({struct, [{error, timeout}]}), done}; + Weird -> + lager:log(info, self(), "stream_keys got totally Weird=~p", [Weird]), + stream_keys(ReqId, Table, Mod) end. -ts_keys_to_json(Keys) -> - KeysTerm = [tuple_to_list(sext:decode(A)) - || A <- Keys, A /= []], - mochijson2:encode({struct, [{<<"keys">>, KeysTerm}]}). +ts_keys_to_html(EncodedKeys, Table, Mod) -> + BaseUrl = base_url(Table), + Keys = decode_keys(EncodedKeys), + KeyTypes = riak_kv_wm_ts_util:local_key_fields_and_types(Mod), + URLs = [io_lib:format("~s~s", [BaseUrl, key_to_string(Key, KeyTypes)]) + || Key <- Keys], + Hrefs = [ io_lib:format("~s", [URL, URL]) + || URL <- URLs], + list_to_binary(lists:flatten(Hrefs)). + +decode_keys(Keys) -> + [tuple_to_list(sext:decode(A)) + || A <- Keys, A /= []]. + +key_to_string([], []) -> + ""; +key_to_string([Key|Keys], [{Field, Type}|KeyTypes]) -> + Field ++ "/" ++ value_to_url_string(Key, Type) ++ "/" ++ key_to_string(Keys, KeyTypes). + +value_to_url_string(V, varchar) -> + binary_to_list(V); +value_to_url_string(V, timestamp) -> + erlang:integer_to_list(V). + +base_url(Table) -> + {ok, [{Server, Port}]} = application:get_env(riak_api, http), + io_lib:format("http://~s:~B/ts/v1/tables/~s/keys/", + [Server, Port, Table]). From 32935784cee790c939f0518dc8bb03f651991ef1 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Sun, 13 Mar 2016 20:32:31 +0100 Subject: [PATCH 040/122] Uniform disptach routes for timeseries in riak_kv_web --- src/riak_kv_web.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/riak_kv_web.erl b/src/riak_kv_web.erl index ae73a8563f..fc794f567e 100644 --- a/src/riak_kv_web.erl +++ b/src/riak_kv_web.erl @@ -129,7 +129,7 @@ raw_dispatch(Name) -> %% decide if we want to dispatch to separate resource modules or handle %% the different versions inside the same resource handler module. [{["ts", api_version, "tables", table, "list_keys"], riak_kv_wm_timeseries_listkeys, Props}, - {["ts", "v1", "tables", table, "keys", '*'], riak_kv_wm_timeseries, Props}, + {["ts", api_version, "tables", table, "keys", '*'], riak_kv_wm_timeseries, Props}, {["ts", api_version, "query"], riak_kv_wm_timeseries_query, Props} ] || {_Prefix, Props} <- Props2]). From 68a1d1c17421d0dc32282d08a5500a71917dee35 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Sun, 13 Mar 2016 20:33:12 +0100 Subject: [PATCH 041/122] WIP. TS query now works for create table --- src/riak_kv_wm_timeseries_query.erl | 642 +++++++++++++--------------- 1 file changed, 294 insertions(+), 348 deletions(-) diff --git a/src/riak_kv_wm_timeseries_query.erl b/src/riak_kv_wm_timeseries_query.erl index 3ba77d937e..5074aa2e29 100644 --- a/src/riak_kv_wm_timeseries_query.erl +++ b/src/riak_kv_wm_timeseries_query.erl @@ -37,16 +37,17 @@ init/1, service_available/2, is_authorized/2, + malformed_request/2, forbidden/2, allowed_methods/2, + post_is_create/2, process_post/2, - malformed_request/2, content_types_accepted/2, - resource_exists/2, content_types_provided/2, - encodings_provided/2, - produce_doc_body/2, - accept_doc_body/2 + encodings_provided/2 + ]). + +-export([produce_doc_body/2 ]). -include_lib("webmachine/include/webmachine.hrl"). @@ -54,23 +55,25 @@ -include("riak_kv_wm_raw.hrl"). -include("riak_kv_ts.hrl"). --record(ctx, {api_version, - method :: atom(), - prefix, %% string() - prefix for resource uris - timeout, %% integer() - passed-in timeout value in ms - security, %% security context - client, %% riak_client() - the store client - riak, %% local | {node(), atom()} - params for riak client - query :: undefined | string(), - compiled_query :: undefined | #ddl_v1{} | #riak_sql_describe_v1{} | ?SQL_SELECT{}, - result :: undefined | ok | {Headers::[binary()], Rows::[ts_rec()]} | - [{entry, proplists:proplist()}] - }). +-record(ctx, { + table :: 'undefined' | string(), + mod :: 'undefined' | module(), + method :: atom(), + prefix, %% string() - prefix for resource uris + timeout, %% integer() - passed-in timeout value in ms + security, %% security context + riak, %% local | {node(), atom()} - params for riak client + sql_type, + compiled_query :: undefined | #ddl_v1{} | #riak_sql_describe_v1{} | #riak_select_v1{}, + result :: undefined | ok | {Headers::[binary()], Rows::[ts_rec()]} | + [{entry, proplists:proplist()}] + }). -define(DEFAULT_TIMEOUT, 60000). -define(TABLE_ACTIVATE_WAIT, 30). %% wait until table's bucket type is activated --define(CB_RV_SPEC, {boolean()|atom()|tuple(), #wm_reqdata{}, #ctx{}}). +-type cb_rv_spec(T) :: {T, #wm_reqdata{}, #ctx{}}. +-type halt() :: {'halt', 200..599} | {'error' , term()}. -type ts_rec() :: [riak_pb_ts_codec:ldbvalue()]. @@ -81,42 +84,62 @@ init(Props) -> {ok, #ctx{prefix = proplists:get_value(prefix, Props), riak = proplists:get_value(riak, Props)}}. --spec service_available(#wm_reqdata{}, #ctx{}) -> - {boolean(), #wm_reqdata{}, #ctx{}}. +-spec service_available(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). %% @doc Determine whether or not a connection to Riak -%% can be established. This function also takes this -%% opportunity to extract the 'bucket' and 'key' path -%% bindings from the dispatch, as well as any vtag -%% query parameter. +%% can be established. service_available(RD, Ctx = #ctx{riak = RiakProps}) -> + checkpoint("service_available: RD=~p", [RD]), case riak_kv_wm_utils:get_riak_client( RiakProps, riak_kv_wm_utils:get_client_id(RD)) of - {ok, C} -> - {true, RD, - Ctx#ctx{api_version = wrq:path_info(api_version, RD), - method = wrq:method(RD), - client = C - }}; + {ok, _C} -> + {true, RD, Ctx}; {error, Reason} -> - handle_error({riak_client_error, Reason}, RD, Ctx) + Resp = riak_kv_wm_ts_util:set_error_message("Unable to connect to Riak: ~p", + [Reason], RD), + {false, Resp, Ctx} end. -is_authorized(RD, Ctx) -> - case riak_api_web_security:is_authorized(RD) of - false -> - {"Basic realm=\"Riak\"", RD, Ctx}; - {true, SecContext} -> - {true, RD, Ctx#ctx{security = SecContext}}; - insecure -> - %% XXX 301 may be more appropriate here, but since the http and - %% https port are different and configurable, it is hard to figure - %% out the redirect URL to serve. - handle_error(insecure_connection, RD, Ctx) +malformed_request(RD, Ctx) -> + try + {SqlType, SQL} = query_from_request(RD), + Table = table_from_sql(SQL), + Mod = riak_ql_ddl:make_module_name(Table), + {false, RD, Ctx#ctx{sql_type=SqlType, + compiled_query=SQL, + table=Table, + mod=Mod}} + catch + throw:{query, Reason} -> + lager:log(info, self(), "try in malformed_request backfired: ~p", [Reason]), + Response = riak_kv_wm_ts_util:set_error_message("bad query: ~p", [Reason], RD), + {true, Response, Ctx} + end. + +-spec is_authorized(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()|halt()). +is_authorized(RD, #ctx{sql_type=SqlType, table=Table}=Ctx) -> + checkpoint("is_authorized", RD), + Call = call_from_sql_type(SqlType), + lager:log(info, self(), "is_authorized type:~p", [SqlType]), + case riak_kv_wm_ts_util:authorize(Call, Table, RD) of + ok -> + {true, RD, Ctx}; + {error, ErrorMsg} -> + {ErrorMsg, RD, Ctx}; + {insecure, Halt, Resp} -> + {Halt, Resp, Ctx} end. --spec forbidden(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +%% method_to_intended_api_call('POST') -> +%% query_create_table; +%% method_to_intended_api_call('PUT') -> +%% query_select; +%% method_to_intended_api_call('GET') -> +%% query_describe. + + +-spec forbidden(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). forbidden(RD, Ctx) -> case riak_kv_wm_utils:is_forbidden(RD) of true -> @@ -126,305 +149,224 @@ forbidden(RD, Ctx) -> %% for now {false, RD, Ctx} end. -%% Because webmachine chooses to (not) call certain callbacks -%% depending on request method used, sometimes accept_doc_body is not -%% called at all, and we arrive at produce_doc_body empty-handed. -%% This is the case when curl is executed with -G and --data. --spec allowed_methods(#wm_reqdata{}, #ctx{}) -> - {[atom()], #wm_reqdata{}, #ctx{}}. +-spec allowed_methods(#wm_reqdata{}, #ctx{}) -> cb_rv_spec([atom()]). allowed_methods(RD, Ctx) -> {['GET', 'POST'], RD, Ctx}. --spec malformed_request(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -malformed_request(RD, Ctx) -> - %% this is plugged because requests are validated against - %% effective query contained in the body (and hence, we need - %% accept_doc_body to parse and extract things out of JSON first) - {false, RD, Ctx}. - --spec preexec(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -%% * extract query from request body or, failing that, from -%% POST k=v items, try to compile it; -%% * check API version; -%% * validate query type against HTTP method; -%% * check permissions on the query type. -preexec(RD, Ctx) -> - case validate_request(RD, Ctx) of - {true, RD1, Ctx1} -> - case check_permissions(RD1, Ctx1) of - {true, RD2, Ctx2} -> - call_api_function(RD2, Ctx2); - FalseWithDetails -> - FalseWithDetails - end; - FalseWithDetails -> - FalseWithDetails - end. - --spec validate_request(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -validate_request(RD, Ctx) -> - case wrq:path_info(api_version, RD) of - "v1" -> - validate_request_v1(RD, Ctx); - BadVersion -> - handle_error({unsupported_version, BadVersion}, RD, Ctx) - end. - --spec validate_request_v1(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -validate_request_v1(RD, Ctx = #ctx{method = Method}) -> - Json = extract_json(RD), - case {Method, extract_query(Json), extract_cover_context(Json)} of - {Method, Query, CoverContext} - when (Method == 'GET' orelse Method == 'POST') - andalso is_list(Query) -> - case riak_ql_parser:ql_parse( - riak_ql_lexer:get_tokens(Query)) of - {error, Reason} -> - handle_error({query_parse_error, Reason}, RD, Ctx); - {ddl, DDL} -> - valid_params( - RD, Ctx#ctx{api_version = "v1", - query = Query, - compiled_query = DDL}); - {Type, Compiled} when Type == select; - Type == describe -> - {ok, SQL} = riak_kv_ts_util:build_sql_record( - Type, Compiled, CoverContext), - valid_params( - RD, Ctx#ctx{api_version = "v1", - query = Query, - compiled_query = SQL}) - end; - _Invalid -> - handle_error({malformed_request, Method}, RD, Ctx) - end. +query_from_request(RD) -> + QueryStr = query_string_from_request(RD), + lager:log(info, self(), "query_from_request: ~p", [QueryStr]), + compile_query(QueryStr). - --spec valid_params(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -valid_params(RD, Ctx) -> - %% no params currently for query - {true, RD, Ctx}. - -%% This is a special case for curl -G. `curl -G host --data $data` -%% will send the $data in URL instead of in the body, so we try to -%% look for it in req_qs. -extract_json(RD) -> - case proplists:get_value("json", RD#wm_reqdata.req_qs) of +query_string_from_request(RD) -> + case wrq:get_qs_value("query", RD) of undefined -> - %% if it was a PUT or POST, data is in body - binary_to_list(wrq:req_body(RD)); - BodyInPost -> - BodyInPost + throw({query, "no query key in query string"}); + Str -> + Str end. --spec extract_query(binary()) -> term(). -extract_query(Json) -> - try mochijson2:decode(Json) of - {struct, Decoded} when is_list(Decoded) -> - validate_ts_query( - proplists:get_value(<<"query">>, Decoded)) - catch - _:_ -> - undefined +compile_query(QueryStr) -> + case riak_ql_parser:ql_parse( + riak_ql_lexer:get_tokens(QueryStr)) of + {error, Reason} -> + ErrorMsg = lists:flatten(io_lib:format("parse error: ~p", [Reason])), + throw({query, ErrorMsg}); + ValidRes -> + ValidRes end. --spec extract_cover_context(binary()) -> term(). -extract_cover_context(Json) -> - try mochijson2:decode(Json) of - {struct, Decoded} when is_list(Decoded) -> - validate_ts_cover_context( - proplists:get_value(<<"coverage_context">>, Decoded)) - catch - _:_ -> - undefined - end. -validate_ts_query(Q) when is_binary(Q) -> - binary_to_list(Q); -validate_ts_query(_) -> - undefined. - -validate_ts_cover_context(C) when is_binary(C) -> - C; -validate_ts_cover_context(_) -> - undefined. - - --spec check_permissions(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -check_permissions(RD, Ctx = #ctx{security = undefined}) -> - {true, RD, Ctx}; -check_permissions(RD, Ctx = #ctx{security = Security, - compiled_query = CompiledQry}) -> - case riak_core_security:check_permission( - decode_query_permissions(CompiledQry), Security) of - {false, Error, _} -> - handle_error( - {not_permitted, utf8_to_binary(Error)}, RD, Ctx); - _ -> - {true, RD, Ctx} - end. +%% @todo: should really be in riak_ql somewhere +table_from_sql(#ddl_v1{table=Table}) -> Table; +table_from_sql(#riak_select_v1{'FROM'=Table}) -> Table; +table_from_sql(#riak_sql_describe_v1{'DESCRIBE'=Table}) -> Table. + +call_from_sql_type(ddl) -> query_create_table; +call_from_sql_type(select) -> query_select; +call_from_sql_type(describe) -> query_describe. -decode_query_permissions(#ddl_v1{table = NewBucketType}) -> - {riak_kv_ts_util:api_call_to_perm(query_create_table), NewBucketType}; -decode_query_permissions(?SQL_SELECT{'FROM' = Table}) -> - {riak_kv_ts_util:api_call_to_perm(query_select), Table}; -decode_query_permissions(#riak_sql_describe_v1{'DESCRIBE' = Table}) -> - {riak_kv_ts_util:api_call_to_perm(query_describe), Table}. -spec content_types_provided(#wm_reqdata{}, #ctx{}) -> - {[{ContentType::string(), Producer::atom()}], - #wm_reqdata{}, #ctx{}}. + cb_rv_spec([{ContentType::string(), Producer::atom()}]). content_types_provided(RD, Ctx) -> {[{"application/json", produce_doc_body}], RD, Ctx}. -spec encodings_provided(#wm_reqdata{}, #ctx{}) -> - {[{Encoding::string(), Producer::function()}], - #wm_reqdata{}, #ctx{}}. + cb_rv_spec([{Encoding::string(), Producer::function()}]). encodings_provided(RD, Ctx) -> {riak_kv_wm_utils:default_encodings(), RD, Ctx}. -spec content_types_accepted(#wm_reqdata{}, #ctx{}) -> - {[{ContentType::string(), Acceptor::atom()}], - #wm_reqdata{}, #ctx{}}. + cb_rv_spec([{ContentType::string(), Acceptor::atom()}]). content_types_accepted(RD, Ctx) -> - {[{"application/json", accept_doc_body}], RD, Ctx}. +% {[{"application/json", accept_doc_body}], RD, Ctx}. +%% @todo: if we end up without a body in the request this function should be deleted. + {[], RD, Ctx}. --spec resource_exists(#wm_reqdata{}, #ctx{}) -> - {boolean(), #wm_reqdata{}, #ctx{}}. -resource_exists(RD0, Ctx0) -> - case preexec(RD0, Ctx0) of - {true, RD, Ctx} -> - call_api_function(RD, Ctx); - FalseWithDetails -> - FalseWithDetails - end. - --spec process_post(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -%% @doc Pass through requests to allow POST to function -%% as PUT for clients that do not support PUT. -process_post(RD, Ctx) -> - accept_doc_body(RD, Ctx). - --spec accept_doc_body(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -accept_doc_body(RD0, Ctx0) -> - case preexec(RD0, Ctx0) of - {true, RD, Ctx} -> - call_api_function(RD, Ctx); - FalseWithDetails -> - FalseWithDetails - end. +-spec post_is_create(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). +post_is_create(RD, Ctx) -> + {false, RD, Ctx}. --spec call_api_function(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -call_api_function(RD, Ctx = #ctx{result = Result}) - when Result /= undefined -> - lager:debug("Function already executed", []), - {true, RD, Ctx}; -call_api_function(RD, Ctx = #ctx{method = Method, - compiled_query = CompiledQry}) -> - case CompiledQry of - SQL = ?SQL_SELECT{} when Method == 'GET' -> - %% inject coverage context - process_query(SQL, RD, Ctx); - Other when (is_record(Other, ddl_v1) andalso Method == 'POST') orelse - (is_record(Other, riak_sql_describe_v1) andalso Method == 'GET') -> - process_query(Other, RD, Ctx); - _Other -> - handle_error({inappropriate_sql_for_method, Method}, RD, Ctx) +-spec process_post(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). +process_post(RD, #ctx{sql_type=ddl, compiled_query=SQL}=Ctx) -> + case create_table(SQL) of + ok -> + Result = [{success, true}], %% represents ok + Json = to_json(Result), + {true, wrq:append_to_response_body(Json, RD), Ctx}; + {error, Reason} -> + Resp = riak_kv_wm_ts_util:set_error_message("query error: ~p", + [Reason], + RD), + {{halt, 500}, Resp, Ctx} end. - -process_query(DDL = #ddl_v1{table = Table}, RD, Ctx) -> +%% -spec accept_doc_body(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). +%% accept_doc_body(RD0, Ctx0) -> +%% {true, RD0, Ctx0}. + +%% -spec call_api_function(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). +%% call_api_function(RD, Ctx = #ctx{result = Result}) +%% when Result /= undefined -> +%% lager:debug("Function already executed", []), +%% {true, RD, Ctx}; +%% call_api_function(RD, Ctx = #ctx{method = Method, +%% compiled_query = CompiledQry}) -> +%% case CompiledQry of +%% SQL = #riak_select_v1{} when Method == 'GET' -> +%% %% inject coverage context +%% process_query(SQL, RD, Ctx); +%% Other when (is_record(Other, ddl_v1) andalso Method == 'POST') orelse +%% (is_record(Other, riak_sql_describe_v1) andalso Method == 'GET') -> +%% process_query(Other, RD, Ctx); +%% _Other -> +%% handle_error({inappropriate_sql_for_method, Method}, RD, Ctx) +%% end. + + +create_table(DDL = #ddl_v1{table = Table}) -> + %% would be better to use a function to get the table out. {ok, Props1} = riak_kv_ts_util:apply_timeseries_bucket_props(DDL, []), Props2 = [riak_kv_wm_utils:erlify_bucket_prop(P) || P <- Props1], - %% TODO: let's not bother collecting user properties from (say) - %% sidecar object in body JSON: when #ddl_v2 work is merged, we - %% will have a way to collect those bespoke table properties from - %% WITH clause. case riak_core_bucket_type:create(Table, Props2) of ok -> - wait_until_active(Table, RD, Ctx, ?TABLE_ACTIVATE_WAIT); + wait_until_active(Table, ?TABLE_ACTIVATE_WAIT); {error, Reason} -> - handle_error({table_create_fail, Table, Reason}, RD, Ctx) - end; - -process_query(SQL = ?SQL_SELECT{'FROM' = Table}, RD, Ctx0 = #ctx{}) -> - Mod = riak_ql_ddl:make_module_name(Table), - case catch Mod:get_ddl() of - {_, {undef, _}} -> - handle_error({no_such_table, Table}, RD, Ctx0); - DDL -> - case riak_kv_ts_api:query(SQL, DDL) of - {ok, Data} -> - {ColumnNames, _ColumnTypes, Rows} = Data, - Ctx = Ctx0#ctx{result = {ColumnNames, Rows}}, - prepare_data_in_body(RD, Ctx); - %% the following timeouts are known and distinguished: - {error, qry_worker_timeout} -> - %% the eleveldb process didn't send us any response after - %% 10 sec (hardcoded in riak_kv_qry), and probably died - handle_error(query_worker_timeout, RD, Ctx0); - {error, backend_timeout} -> - %% the eleveldb process did manage to send us a timeout - %% response - handle_error(backend_timeout, RD, Ctx0); - - {error, Reason} -> - handle_error({query_exec_error, Reason}, RD, Ctx0) - end - end; - -process_query(SQL = #riak_sql_describe_v1{'DESCRIBE' = Table}, RD, Ctx0 = #ctx{}) -> - Mod = riak_ql_ddl:make_module_name(Table), - case catch Mod:get_ddl() of - {_, {undef, _}} -> - handle_error({no_such_table, Table}, RD, Ctx0); - DDL -> - case riak_kv_ts_api:query(SQL, DDL) of - {ok, Data} -> - ColumnNames = [<<"Column">>, <<"Type">>, <<"Is Null">>, - <<"Primary Key">>, <<"Local Key">>], - Ctx = Ctx0#ctx{result = {ColumnNames, Data}}, - prepare_data_in_body(RD, Ctx); - {error, Reason} -> - handle_error({query_exec_error, Reason}, RD, Ctx0) - end + {error,{table_create_fail, Table, Reason}} end. - -wait_until_active(Table, RD, Ctx, 0) -> - handle_error({table_activate_fail, Table}, RD, Ctx); -wait_until_active(Table, RD, Ctx, Seconds) -> +%% process_query(DDL = #ddl_v1{table = Table}, RD, Ctx) -> +%% {ok, Props1} = riak_kv_ts_util:apply_timeseries_bucket_props(DDL, []), +%% Props2 = [riak_kv_wm_utils:erlify_bucket_prop(P) || P <- Props1], +%% %% TODO: let's not bother collecting user properties from (say) +%% %% sidecar object in body JSON: when #ddl_v2 work is merged, we +%% %% will have a way to collect those bespoke table properties from +%% %% WITH clause. +%% case riak_core_bucket_type:create(Table, Props2) of +%% ok -> +%% wait_until_active(Table, RD, Ctx, ?TABLE_ACTIVATE_WAIT); +%% {error, Reason} -> +%% handle_error({table_create_fail, Table, Reason}, RD, Ctx) +%% end; + +%% process_query(SQL = #riak_select_v1{'FROM' = Table}, RD, Ctx0 = #ctx{}) -> +%% Mod = riak_ql_ddl:make_module_name(Table), +%% case catch Mod:get_ddl() of +%% {_, {undef, _}} -> +%% handle_error({no_such_table, Table}, RD, Ctx0); +%% DDL -> +%% case riak_kv_ts_api:query(SQL, DDL) of +%% {ok, Data} -> +%% {ColumnNames, _ColumnTypes, Rows} = Data, +%% Ctx = Ctx0#ctx{result = {ColumnNames, Rows}}, +%% prepare_data_in_body(RD, Ctx); +%% %% the following timeouts are known and distinguished: +%% {error, qry_worker_timeout} -> +%% %% the eleveldb process didn't send us any response after +%% %% 10 sec (hardcoded in riak_kv_qry), and probably died +%% handle_error(query_worker_timeout, RD, Ctx0); +%% {error, backend_timeout} -> +%% %% the eleveldb process did manage to send us a timeout +%% %% response +%% handle_error(backend_timeout, RD, Ctx0); + +%% {error, Reason} -> +%% handle_error({query_exec_error, Reason}, RD, Ctx0) +%% end +%% end; + +%% process_query(SQL = #riak_sql_describe_v1{'DESCRIBE' = Table}, RD, Ctx0 = #ctx{}) -> +%% Mod = riak_ql_ddl:make_module_name(Table), +%% case catch Mod:get_ddl() of +%% {_, {undef, _}} -> +%% handle_error({no_such_table, Table}, RD, Ctx0); +%% DDL -> +%% case riak_kv_ts_api:query(SQL, DDL) of +%% {ok, Data} -> +%% ColumnNames = [<<"Column">>, <<"Type">>, <<"Is Null">>, +%% <<"Primary Key">>, <<"Local Key">>], +%% Ctx = Ctx0#ctx{result = {ColumnNames, Data}}, +%% prepare_data_in_body(RD, Ctx); +%% {error, Reason} -> +%% handle_error({query_exec_error, Reason}, RD, Ctx0) +%% end +%% end. + + +wait_until_active(Table, 0) -> + {error, {table_activate_fail, Table}}; +wait_until_active(Table, Seconds) -> case riak_core_bucket_type:activate(Table) of ok -> - prepare_data_in_body(RD, Ctx#ctx{result = {[],[]}}); - %% a way for CREATE TABLE queries to return 'ok' on success + ok; {error, not_ready} -> timer:sleep(1000), - wait_until_active(Table, RD, Ctx, Seconds - 1); + wait_until_active(Table, Seconds - 1); {error, undefined} -> %% this is inconceivable because create(Table) has %% just succeeded, so it's here mostly to pacify %% the dialyzer (and of course, for the odd chance %% of Erlang imps crashing nodes between create %% and activate calls) - handle_error({table_created_missing, Table}, RD, Ctx) + {error, {table_created_missing, Table}} end. -prepare_data_in_body(RD0, Ctx0) -> - {Json, RD1, Ctx1} = produce_doc_body(RD0, Ctx0), - {true, wrq:append_to_response_body(Json, RD1), Ctx1}. - --spec produce_doc_body(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +%% wait_until_active(Table, RD, Ctx, 0) -> +%% handle_error({table_activate_fail, Table}, RD, Ctx); +%% wait_until_active(Table, RD, Ctx, Seconds) -> +%% case riak_core_bucket_type:activate(Table) of +%% ok -> +%% prepare_data_in_body(RD, Ctx#ctx{result = {[],[]}}); +%% %% a way for CREATE TABLE queries to return 'ok' on success +%% {error, not_ready} -> +%% timer:sleep(1000), +%% wait_until_active(Table, RD, Ctx, Seconds - 1); +%% {error, undefined} -> +%% %% this is inconceivable because create(Table) has +%% %% just succeeded, so it's here mostly to pacify +%% %% the dialyzer (and of course, for the odd chance +%% %% of Erlang imps crashing nodes between create +%% %% and activate calls) +%% handle_error({table_created_missing, Table}, RD, Ctx) +%% end. + +%% prepare_data_in_body(RD0, Ctx0) -> +%% {Json, RD1, Ctx1} = produce_doc_body(RD0, Ctx0), +%% {true, wrq:append_to_response_body(Json, RD1), Ctx1}. + + +-spec produce_doc_body(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(iolist()). %% @doc Extract the value of the document, and place it in the %% response body of the request. produce_doc_body(RD, Ctx = #ctx{result = {Columns, Rows}}) -> @@ -433,64 +375,68 @@ produce_doc_body(RD, Ctx = #ctx{result = {Columns, Rows}}) -> {<<"rows">>, Rows}]}), RD, Ctx}. - -error_out(Type, Fmt, Args, RD, Ctx) -> - {Type, - wrq:set_resp_header( - "Content-Type", "text/plain", wrq:append_to_response_body( - flat_format(Fmt, Args), RD)), - Ctx}. - --spec handle_error(atom()|tuple(), #wm_reqdata{}, #ctx{}) -> {tuple(), #wm_reqdata{}, #ctx{}}. -handle_error(Error, RD, Ctx) -> - case Error of - {riak_client_error, Reason} -> - error_out(false, - "Unable to connect to Riak: ~p", [Reason], RD, Ctx); - insecure_connection -> - error_out({halt, 426}, - "Security is enabled and Riak does not" - " accept credentials over HTTP. Try HTTPS instead.", [], RD, Ctx); - {unsupported_version, BadVersion} -> - error_out({halt, 412}, - "Unsupported API version ~s", [BadVersion], RD, Ctx); - {not_permitted, Table} -> - error_out({halt, 401}, - "Access to table ~ts not allowed", [Table], RD, Ctx); - {malformed_request, Method} -> - error_out({halt, 400}, - "Malformed ~s request", [Method], RD, Ctx); - {no_such_table, Table} -> - error_out({halt, 404}, - "Table \"~ts\" does not exist", [Table], RD, Ctx); - {query_parse_error, Detailed} -> - error_out({halt, 400}, - "Malformed query: ~ts", [Detailed], RD, Ctx); - {table_create_fail, Table, Reason} -> - error_out({halt, 500}, - "Failed to create table \"~ts\": ~p", [Table, Reason], RD, Ctx); - query_worker_timeout -> - error_out({halt, 503}, - "Query worker timeout", [], RD, Ctx); - backend_timeout -> - error_out({halt, 503}, - "Storage backend timeout", [], RD, Ctx); - {query_exec_error, Detailed} -> - error_out({halt, 400}, - "Query execution failed: ~ts", [Detailed], RD, Ctx); - {table_activate_fail, Table} -> - error_out({halt, 500}, - "Failed to activate bucket type for table \"~ts\"", [Table], RD, Ctx); - {table_created_missing, Table} -> - error_out({halt, 500}, - "Bucket type for table \"~ts\" disappeared suddenly before activation", [Table], RD, Ctx); - {inappropriate_sql_for_method, Method} -> - error_out({halt, 400}, - "Inappropriate method ~s for SQL query type", [Method], RD, Ctx) - end. - -flat_format(Format, Args) -> - lists:flatten(io_lib:format(Format, Args)). - -utf8_to_binary(S) -> - unicode:characters_to_binary(S, utf8, utf8). +to_json({Columns, Rows}) when is_list(Columns), is_list(Rows) -> + mochijson2:encode( + {struct, [{<<"columns">>, Columns}, + {<<"rows">>, Rows}]}); +to_json(Other) -> + mochijson2:encode(Other). + + + +%% -spec handle_error(atom()|tuple(), #wm_reqdata{}, #ctx{}) -> {tuple(), #wm_reqdata{}, #ctx{}}. +%% handle_error(Error, RD, Ctx) -> +%% case Error of +%% {riak_client_error, Reason} -> +%% error_out(false, +%% "Unable to connect to Riak: ~p", [Reason], RD, Ctx); +%% insecure_connection -> +%% error_out({halt, 426}, +%% "Security is enabled and Riak does not" +%% " accept credentials over HTTP. Try HTTPS instead.", [], RD, Ctx); +%% {unsupported_version, BadVersion} -> +%% error_out({halt, 412}, +%% "Unsupported API version ~s", [BadVersion], RD, Ctx); +%% {not_permitted, Table} -> +%% error_out({halt, 401}, +%% "Access to table ~ts not allowed", [Table], RD, Ctx); +%% {malformed_request, Method} -> +%% error_out({halt, 400}, +%% "Malformed ~s request", [Method], RD, Ctx); +%% {no_such_table, Table} -> +%% error_out({halt, 404}, +%% "Table \"~ts\" does not exist", [Table], RD, Ctx); +%% {query_parse_error, Detailed} -> +%% error_out({halt, 400}, +%% "Malformed query: ~ts", [Detailed], RD, Ctx); +%% {table_create_fail, Table, Reason} -> +%% error_out({halt, 500}, +%% "Failed to create table \"~ts\": ~p", [Table, Reason], RD, Ctx); +%% query_worker_timeout -> +%% error_out({halt, 503}, +%% "Query worker timeout", [], RD, Ctx); +%% backend_timeout -> +%% error_out({halt, 503}, +%% "Storage backend timeout", [], RD, Ctx); +%% {query_exec_error, Detailed} -> +%% error_out({halt, 400}, +%% "Query execution failed: ~ts", [Detailed], RD, Ctx); +%% {table_activate_fail, Table} -> +%% error_out({halt, 500}, +%% "Failed to activate bucket type for table \"~ts\"", [Table], RD, Ctx); +%% {table_created_missing, Table} -> +%% error_out({halt, 500}, +%% "Bucket type for table \"~ts\" disappeared suddenly before activation", [Table], RD, Ctx); +%% {inappropriate_sql_for_method, Method} -> +%% error_out({halt, 400}, +%% "Inappropriate method ~s for SQL query type", [Method], RD, Ctx) +%% end. + +%% flat_format(Format, Args) -> +%% lists:flatten(io_lib:format(Format, Args)). + +%% utf8_to_binary(S) -> +%% unicode:characters_to_binary(S, utf8, utf8). + +checkpoint(Format, Args) -> + lager:log(info, self(), Format, Args). From 5690c59ed9411156c4acda5a0f0b4da733fe97f6 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Sun, 13 Mar 2016 21:00:15 +0100 Subject: [PATCH 042/122] WIP. timeseries describe query now works. --- src/riak_kv_wm_timeseries_query.erl | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/src/riak_kv_wm_timeseries_query.erl b/src/riak_kv_wm_timeseries_query.erl index 5074aa2e29..6078fc3c29 100644 --- a/src/riak_kv_wm_timeseries_query.erl +++ b/src/riak_kv_wm_timeseries_query.erl @@ -103,6 +103,7 @@ service_available(RD, Ctx = #ctx{riak = RiakProps}) -> malformed_request(RD, Ctx) -> try {SqlType, SQL} = query_from_request(RD), + checkpoint("malformed_request SqlType=~p, SQL=~p", [SqlType, SQL]), Table = table_from_sql(SQL), Mod = riak_ql_ddl:make_module_name(Table), {false, RD, Ctx#ctx{sql_type=SqlType, @@ -176,8 +177,12 @@ compile_query(QueryStr) -> {error, Reason} -> ErrorMsg = lists:flatten(io_lib:format("parse error: ~p", [Reason])), throw({query, ErrorMsg}); - ValidRes -> - ValidRes + {ddl, _ } = Res -> + Res; + {Type, Compiled} when Type==select; Type==describe -> + {ok, SQL} = riak_kv_ts_util:build_sql_record( + Type, Compiled, undefined), + {Type, SQL} end. @@ -229,8 +234,24 @@ process_post(RD, #ctx{sql_type=ddl, compiled_query=SQL}=Ctx) -> [Reason], RD), {{halt, 500}, Resp, Ctx} + end; +process_post(RD, #ctx{sql_type=describe, + compiled_query=SQL, + mod=Mod}=Ctx) -> + DDL = Mod:get_ddl(), %% might be faster to store this earlier on + case riak_kv_ts_api:query(SQL, DDL) of + {ok, Data} -> + ColumnNames = [<<"Column">>, <<"Type">>, <<"Is Null">>, + <<"Primary Key">>, <<"Local Key">>], + Json = to_json({ColumnNames, Data}), + {true, wrq:append_to_response_body(Json, RD), Ctx}; + {error, Reason} -> + Resp = riak_kv_wm_ts_util:set_error_message( + "describe failed: ~p", [Reason], RD), + {{halt, 500}, Resp, Ctx} end. + %% -spec accept_doc_body(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). %% accept_doc_body(RD0, Ctx0) -> %% {true, RD0, Ctx0}. From 7de1579de81445976d3bf377209796282c20a2bb Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Sun, 13 Mar 2016 21:14:20 +0100 Subject: [PATCH 043/122] timeseries select query works. --- src/riak_kv_wm_timeseries_query.erl | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/riak_kv_wm_timeseries_query.erl b/src/riak_kv_wm_timeseries_query.erl index 6078fc3c29..d85636215b 100644 --- a/src/riak_kv_wm_timeseries_query.erl +++ b/src/riak_kv_wm_timeseries_query.erl @@ -249,9 +249,37 @@ process_post(RD, #ctx{sql_type=describe, Resp = riak_kv_wm_ts_util:set_error_message( "describe failed: ~p", [Reason], RD), {{halt, 500}, Resp, Ctx} + end; +process_post(RD, #ctx{sql_type=select, + compiled_query=SQL, + mod=Mod}=Ctx) -> + DDL = Mod:get_ddl(), %% might be faster to store this earlier on + case riak_kv_ts_api:query(SQL, DDL) of + {ok, Data} -> + {ColumnNames, _ColumnTypes, Rows} = Data, + Json = to_json({ColumnNames, Rows}), + {true, wrq:append_to_response_body(Json, RD), Ctx}; + %% the following timeouts are known and distinguished: + {error, qry_worker_timeout} -> + %% the eleveldb process didn't send us any response after + %% 10 sec (hardcoded in riak_kv_qry), and probably died + Resp = riak_kv_wm_ts_util:set_error_message( + "qry_worker_timeout", [], RD), + {false, Resp, Ctx}; + {error, backend_timeout} -> + %% the eleveldb process did manage to send us a timeout + %% response + Resp = riak_kv_wm_ts_util:set_error_message( + "backend_timeout", [], RD), + {false, Resp, Ctx}; + {error, Reason} -> + Resp = riak_kv_wm_ts_util:set_error_message( + "select query execution error: ~p", [Reason], RD), + {false, Resp, Ctx} end. + %% -spec accept_doc_body(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). %% accept_doc_body(RD0, Ctx0) -> %% {true, RD0, Ctx0}. From bc2a24d80af21d81e07268dad70194ce44ce47f2 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Sun, 13 Mar 2016 21:26:50 +0100 Subject: [PATCH 044/122] riak_kv_vm_timeseries remove dead code --- src/riak_kv_wm_timeseries_query.erl | 177 ---------------------------- 1 file changed, 177 deletions(-) diff --git a/src/riak_kv_wm_timeseries_query.erl b/src/riak_kv_wm_timeseries_query.erl index d85636215b..b21a6fefe7 100644 --- a/src/riak_kv_wm_timeseries_query.erl +++ b/src/riak_kv_wm_timeseries_query.erl @@ -131,15 +131,6 @@ is_authorized(RD, #ctx{sql_type=SqlType, table=Table}=Ctx) -> {Halt, Resp, Ctx} end. - -%% method_to_intended_api_call('POST') -> -%% query_create_table; -%% method_to_intended_api_call('PUT') -> -%% query_select; -%% method_to_intended_api_call('GET') -> -%% query_describe. - - -spec forbidden(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). forbidden(RD, Ctx) -> case riak_kv_wm_utils:is_forbidden(RD) of @@ -156,8 +147,6 @@ forbidden(RD, Ctx) -> allowed_methods(RD, Ctx) -> {['GET', 'POST'], RD, Ctx}. - - query_from_request(RD) -> QueryStr = query_string_from_request(RD), lager:log(info, self(), "query_from_request: ~p", [QueryStr]), @@ -185,8 +174,6 @@ compile_query(QueryStr) -> {Type, SQL} end. - - %% @todo: should really be in riak_ql somewhere table_from_sql(#ddl_v1{table=Table}) -> Table; table_from_sql(#riak_select_v1{'FROM'=Table}) -> Table; @@ -278,31 +265,6 @@ process_post(RD, #ctx{sql_type=select, {false, Resp, Ctx} end. - - -%% -spec accept_doc_body(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). -%% accept_doc_body(RD0, Ctx0) -> -%% {true, RD0, Ctx0}. - -%% -spec call_api_function(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). -%% call_api_function(RD, Ctx = #ctx{result = Result}) -%% when Result /= undefined -> -%% lager:debug("Function already executed", []), -%% {true, RD, Ctx}; -%% call_api_function(RD, Ctx = #ctx{method = Method, -%% compiled_query = CompiledQry}) -> -%% case CompiledQry of -%% SQL = #riak_select_v1{} when Method == 'GET' -> -%% %% inject coverage context -%% process_query(SQL, RD, Ctx); -%% Other when (is_record(Other, ddl_v1) andalso Method == 'POST') orelse -%% (is_record(Other, riak_sql_describe_v1) andalso Method == 'GET') -> -%% process_query(Other, RD, Ctx); -%% _Other -> -%% handle_error({inappropriate_sql_for_method, Method}, RD, Ctx) -%% end. - - create_table(DDL = #ddl_v1{table = Table}) -> %% would be better to use a function to get the table out. {ok, Props1} = riak_kv_ts_util:apply_timeseries_bucket_props(DDL, []), @@ -314,64 +276,6 @@ create_table(DDL = #ddl_v1{table = Table}) -> {error,{table_create_fail, Table, Reason}} end. -%% process_query(DDL = #ddl_v1{table = Table}, RD, Ctx) -> -%% {ok, Props1} = riak_kv_ts_util:apply_timeseries_bucket_props(DDL, []), -%% Props2 = [riak_kv_wm_utils:erlify_bucket_prop(P) || P <- Props1], -%% %% TODO: let's not bother collecting user properties from (say) -%% %% sidecar object in body JSON: when #ddl_v2 work is merged, we -%% %% will have a way to collect those bespoke table properties from -%% %% WITH clause. -%% case riak_core_bucket_type:create(Table, Props2) of -%% ok -> -%% wait_until_active(Table, RD, Ctx, ?TABLE_ACTIVATE_WAIT); -%% {error, Reason} -> -%% handle_error({table_create_fail, Table, Reason}, RD, Ctx) -%% end; - -%% process_query(SQL = #riak_select_v1{'FROM' = Table}, RD, Ctx0 = #ctx{}) -> -%% Mod = riak_ql_ddl:make_module_name(Table), -%% case catch Mod:get_ddl() of -%% {_, {undef, _}} -> -%% handle_error({no_such_table, Table}, RD, Ctx0); -%% DDL -> -%% case riak_kv_ts_api:query(SQL, DDL) of -%% {ok, Data} -> -%% {ColumnNames, _ColumnTypes, Rows} = Data, -%% Ctx = Ctx0#ctx{result = {ColumnNames, Rows}}, -%% prepare_data_in_body(RD, Ctx); -%% %% the following timeouts are known and distinguished: -%% {error, qry_worker_timeout} -> -%% %% the eleveldb process didn't send us any response after -%% %% 10 sec (hardcoded in riak_kv_qry), and probably died -%% handle_error(query_worker_timeout, RD, Ctx0); -%% {error, backend_timeout} -> -%% %% the eleveldb process did manage to send us a timeout -%% %% response -%% handle_error(backend_timeout, RD, Ctx0); - -%% {error, Reason} -> -%% handle_error({query_exec_error, Reason}, RD, Ctx0) -%% end -%% end; - -%% process_query(SQL = #riak_sql_describe_v1{'DESCRIBE' = Table}, RD, Ctx0 = #ctx{}) -> -%% Mod = riak_ql_ddl:make_module_name(Table), -%% case catch Mod:get_ddl() of -%% {_, {undef, _}} -> -%% handle_error({no_such_table, Table}, RD, Ctx0); -%% DDL -> -%% case riak_kv_ts_api:query(SQL, DDL) of -%% {ok, Data} -> -%% ColumnNames = [<<"Column">>, <<"Type">>, <<"Is Null">>, -%% <<"Primary Key">>, <<"Local Key">>], -%% Ctx = Ctx0#ctx{result = {ColumnNames, Data}}, -%% prepare_data_in_body(RD, Ctx); -%% {error, Reason} -> -%% handle_error({query_exec_error, Reason}, RD, Ctx0) -%% end -%% end. - - wait_until_active(Table, 0) -> {error, {table_activate_fail, Table}}; wait_until_active(Table, Seconds) -> @@ -390,31 +294,6 @@ wait_until_active(Table, Seconds) -> {error, {table_created_missing, Table}} end. - -%% wait_until_active(Table, RD, Ctx, 0) -> -%% handle_error({table_activate_fail, Table}, RD, Ctx); -%% wait_until_active(Table, RD, Ctx, Seconds) -> -%% case riak_core_bucket_type:activate(Table) of -%% ok -> -%% prepare_data_in_body(RD, Ctx#ctx{result = {[],[]}}); -%% %% a way for CREATE TABLE queries to return 'ok' on success -%% {error, not_ready} -> -%% timer:sleep(1000), -%% wait_until_active(Table, RD, Ctx, Seconds - 1); -%% {error, undefined} -> -%% %% this is inconceivable because create(Table) has -%% %% just succeeded, so it's here mostly to pacify -%% %% the dialyzer (and of course, for the odd chance -%% %% of Erlang imps crashing nodes between create -%% %% and activate calls) -%% handle_error({table_created_missing, Table}, RD, Ctx) -%% end. - -%% prepare_data_in_body(RD0, Ctx0) -> -%% {Json, RD1, Ctx1} = produce_doc_body(RD0, Ctx0), -%% {true, wrq:append_to_response_body(Json, RD1), Ctx1}. - - -spec produce_doc_body(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(iolist()). %% @doc Extract the value of the document, and place it in the %% response body of the request. @@ -431,61 +310,5 @@ to_json({Columns, Rows}) when is_list(Columns), is_list(Rows) -> to_json(Other) -> mochijson2:encode(Other). - - -%% -spec handle_error(atom()|tuple(), #wm_reqdata{}, #ctx{}) -> {tuple(), #wm_reqdata{}, #ctx{}}. -%% handle_error(Error, RD, Ctx) -> -%% case Error of -%% {riak_client_error, Reason} -> -%% error_out(false, -%% "Unable to connect to Riak: ~p", [Reason], RD, Ctx); -%% insecure_connection -> -%% error_out({halt, 426}, -%% "Security is enabled and Riak does not" -%% " accept credentials over HTTP. Try HTTPS instead.", [], RD, Ctx); -%% {unsupported_version, BadVersion} -> -%% error_out({halt, 412}, -%% "Unsupported API version ~s", [BadVersion], RD, Ctx); -%% {not_permitted, Table} -> -%% error_out({halt, 401}, -%% "Access to table ~ts not allowed", [Table], RD, Ctx); -%% {malformed_request, Method} -> -%% error_out({halt, 400}, -%% "Malformed ~s request", [Method], RD, Ctx); -%% {no_such_table, Table} -> -%% error_out({halt, 404}, -%% "Table \"~ts\" does not exist", [Table], RD, Ctx); -%% {query_parse_error, Detailed} -> -%% error_out({halt, 400}, -%% "Malformed query: ~ts", [Detailed], RD, Ctx); -%% {table_create_fail, Table, Reason} -> -%% error_out({halt, 500}, -%% "Failed to create table \"~ts\": ~p", [Table, Reason], RD, Ctx); -%% query_worker_timeout -> -%% error_out({halt, 503}, -%% "Query worker timeout", [], RD, Ctx); -%% backend_timeout -> -%% error_out({halt, 503}, -%% "Storage backend timeout", [], RD, Ctx); -%% {query_exec_error, Detailed} -> -%% error_out({halt, 400}, -%% "Query execution failed: ~ts", [Detailed], RD, Ctx); -%% {table_activate_fail, Table} -> -%% error_out({halt, 500}, -%% "Failed to activate bucket type for table \"~ts\"", [Table], RD, Ctx); -%% {table_created_missing, Table} -> -%% error_out({halt, 500}, -%% "Bucket type for table \"~ts\" disappeared suddenly before activation", [Table], RD, Ctx); -%% {inappropriate_sql_for_method, Method} -> -%% error_out({halt, 400}, -%% "Inappropriate method ~s for SQL query type", [Method], RD, Ctx) -%% end. - -%% flat_format(Format, Args) -> -%% lists:flatten(io_lib:format(Format, Args)). - -%% utf8_to_binary(S) -> -%% unicode:characters_to_binary(S, utf8, utf8). - checkpoint(Format, Args) -> lager:log(info, self(), Format, Args). From 91798d0323a04b96d700d7c65bcdda5b0ef48d66 Mon Sep 17 00:00:00 2001 From: Brett Hazen Date: Mon, 14 Mar 2016 04:22:15 +0000 Subject: [PATCH 045/122] Address some of @hmmr's concerns --- include/riak_kv_ts.hrl | 6 ++-- src/riak_kv_pb_timeseries.erl | 62 +++++++++++++++++++---------------- src/riak_kv_ts_util.erl | 20 +++++++---- 3 files changed, 51 insertions(+), 37 deletions(-) diff --git a/include/riak_kv_ts.hrl b/include/riak_kv_ts.hrl index 97e8d338fc..e4f951488b 100644 --- a/include/riak_kv_ts.hrl +++ b/include/riak_kv_ts.hrl @@ -29,6 +29,7 @@ %% the result type of a query, rows means to return all matching rows, aggregate %% returns one row calculated from the result set for the query. -type select_result_type() :: rows | aggregate. +-type value_type() :: binary() | float() | integer() | boolean(). -record(riak_sel_clause_v1, { @@ -65,8 +66,9 @@ -record(riak_sql_insert_v1, { 'INSERT' = <<>> :: binary(), - fields :: [{identifier, binary()}], - values :: list(list(term())) + fields :: [identifier()], + values :: [[value_type()]], + helper_mod :: atom() }). -define(SQL_SELECT, #riak_select_v1). diff --git a/src/riak_kv_pb_timeseries.erl b/src/riak_kv_pb_timeseries.erl index c72897d3ff..dca9e49beb 100644 --- a/src/riak_kv_pb_timeseries.erl +++ b/src/riak_kv_pb_timeseries.erl @@ -269,24 +269,30 @@ wait_until_active(Table, State, Seconds) -> #tsqueryresp{} | #rpberrorresp{}. make_insert_response(Mod, #riak_sql_insert_v1{'INSERT' = Table, fields = Fields, values = Values}) -> case lookup_field_positions(Mod, Fields) of - {error, FieldReason} -> - make_rpberrresp(?E_BAD_QUERY, FieldReason); - {ok, Positions} -> + {ok, Positions} -> Empty = make_empty_row(Mod), case xlate_insert_to_putdata(Values, Positions, Empty) of {error, ValueReason} -> make_rpberrresp(?E_BAD_QUERY, ValueReason); {ok, Data} -> - Response = sub_putreq_common(Mod, Table, Data, #state{}), - xlate_put_to_qry_resp(Response) - end + insert_putreqs(Mod, Table, Data) + end; + {error, FieldReason} -> + make_rpberrresp(?E_BAD_QUERY, FieldReason) end. -xlate_put_to_qry_resp({reply, #tsputresp{}, _State}) -> - #tsqueryresp{}; -xlate_put_to_qry_resp({reply, Err, _State}) -> - Err. - +insert_putreqs(Mod, Table, Data) -> + case catch validate_rows(Mod, Data) of + [] -> + case put_data(Data, Table, Mod) of + 0 -> + #tsqueryresp{}; + ErrorCount -> + failed_put_response(ErrorCount) + end; + BadRowIdxs when is_list(BadRowIdxs) -> + validate_rows_error_response(BadRowIdxs) + end. %% %% Return an all-null empty row ready to be populated by the values @@ -301,24 +307,24 @@ make_empty_row(Mod) -> %% %% This *requires* that once schema changes take place the DDL fields are left in order. %% --spec lookup_field_positions(module(), [identifier()]) -> +-spec lookup_field_positions(module(), [field_identifier()]) -> {ok, [pos_integer()]} | {error, string()}. lookup_field_positions(Mod, FieldIdentifiers) -> case lists:foldl( fun({identifier, FieldName}, {Good, Bad}) -> case Mod:is_field_valid(FieldName) of false -> - {Good, [flat_format("undefined field ~s", [FieldName]) | Bad]}; + {Good, [FieldName | Bad]}; true -> {[Mod:get_field_position(FieldName) | Good], Bad} end end, {[], []}, FieldIdentifiers) of - {Positions, []} -> - {ok, lists:reverse(Positions)}; - {_, Errors} -> - %% Only returns the first error, could investigate returning multiple. - {error, hd(lists:reverse(Errors))} + {Positions, []} -> + {ok, lists:reverse(Positions)}; + {_, Errors} -> + {error, flat_format("undefined fields: ~s", + [string:join(lists:reverse(Errors), ", ")])} end. %% @@ -334,10 +340,8 @@ xlate_insert_to_putdata(Values, Positions, Empty) -> case make_insert_row(RowVals, Positions, Empty) of {ok, Row} -> {[Row | Good], Bad, RowNum + 1}; - {error, Reason} -> - Reason1 = flat_format("~s in row index ~b", - [Reason, RowNum]), - {Good, [Reason1 | Bad], RowNum + 1} + {error, _Reason} -> + {Good, [integer_to_list(RowNum) | Bad], RowNum + 1} end end, Converted = lists:foldl(ConvFn, {[], [], 1}, Values), @@ -345,21 +349,21 @@ xlate_insert_to_putdata(Values, Positions, Empty) -> {PutData, [], _} -> {ok, lists:reverse(PutData)}; {_, Errors, _} -> - %% Only returns the first error, could investigate returning multiple. - {error, lists:last(Errors)} + {error, flat_format("too many values in row index(es) ~s", + [string:join(lists:reverse(Errors), ", ")])} end. --spec make_insert_row([] | [riak_ql_ddl:data_value()], [pos_integer()], tuple()) -> +-spec make_insert_row([] | [riak_ql_ddl:data_value()], [] | [pos_integer()], tuple()) -> {ok, tuple()} | {error, string()}. -make_insert_row([], _Positions, Row) -> +make_insert_row([], _Positions, Row) when is_tuple(Row) -> %% Out of entries in the value - row is populated with default values %% so if we run out of data for implicit/explicit fieldnames can just return {ok, Row}; -make_insert_row(_, [], _Row) -> +make_insert_row(_, [], Row) when is_tuple(Row) -> %% Too many values for the field {error, "too many values"}; %% Make sure the types match -make_insert_row([{_Type, Val} | Values], [Pos | Positions], Row) -> +make_insert_row([{_Type, Val} | Values], [Pos | Positions], Row) when is_tuple(Row) -> make_insert_row(Values, Positions, setelement(Pos, Row, Val)). @@ -1074,7 +1078,7 @@ validate_xlate_insert_to_putdata_too_many_values_test() -> Positions = [3, 1, 2, 4], Result = xlate_insert_to_putdata(Values, Positions, Empty), ?assertEqual( - {error,"too many values in row index 1"}, + {error,"too many values in row index(es) 1"}, Result ). diff --git a/src/riak_kv_ts_util.erl b/src/riak_kv_ts_util.erl index c507eb8d93..03d25a9213 100644 --- a/src/riak_kv_ts_util.erl +++ b/src/riak_kv_ts_util.erl @@ -82,12 +82,20 @@ build_sql_record(describe, SQL, _Cover) -> {ok, #riak_sql_describe_v1{'DESCRIBE' = D}}; build_sql_record(insert, SQL, _Cover) -> T = proplists:get_value(table, SQL), - F = proplists:get_value(fields, SQL), - V = proplists:get_value(values, SQL), - {ok, #riak_sql_insert_v1{'INSERT' = T, - fields = F, - values = V - }}. + case is_binary(T) of + true -> + Mod = riak_ql_ddl:make_module_name(T), + %% If columns are not specified, all columns are implied + F = riak_ql_ddl:insert_sql_columns(Mod, proplists:get_value(fields, SQL, [])), + V = proplists:get_value(values, SQL), + {ok, #riak_sql_insert_v1{'INSERT' = T, + fields = F, + values = V, + helper_mod = Mod + }}; + false -> + {error, <<"Must provide exactly one table name">>} + end. %% Useful key extractors for functions (e.g., in get or delete code From 0645f79febb8637ffc087dacd83de59db21c5cac Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Mon, 14 Mar 2016 12:54:37 +0100 Subject: [PATCH 046/122] Trying to humour dialyzer, but is not yielding fully. --- src/riak_kv_wm_timeseries_listkeys.erl | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/riak_kv_wm_timeseries_listkeys.erl b/src/riak_kv_wm_timeseries_listkeys.erl index 69e33f443e..2122143284 100644 --- a/src/riak_kv_wm_timeseries_listkeys.erl +++ b/src/riak_kv_wm_timeseries_listkeys.erl @@ -159,12 +159,23 @@ ts_keys_to_html(EncodedKeys, Table, Mod) -> BaseUrl = base_url(Table), Keys = decode_keys(EncodedKeys), KeyTypes = riak_kv_wm_ts_util:local_key_fields_and_types(Mod), - URLs = [io_lib:format("~s~s", [BaseUrl, key_to_string(Key, KeyTypes)]) + URLs = [format_url(BaseUrl, KeyTypes, Key) || Key <- Keys], - Hrefs = [ io_lib:format("~s", [URL, URL]) - || URL <- URLs], + %% Dialyzer does not like the list_comprehension, if you want to avoid the + %% dialyzer error you have to write it like this: + %% URLs = lists:map(fun(Key) -> + %% format_url(BaseUrl, KeyTypes, Key) + %% end, + %% Keys), + Hrefs = [format_href(URL) || URL <- URLs], list_to_binary(lists:flatten(Hrefs)). +format_href(URL) -> + io_lib:format("~s", [URL, URL]). + +format_url(BaseUrl, KeyTypes, Key) -> + io_lib:format("~s~s", [BaseUrl, key_to_string(Key, KeyTypes)]). + decode_keys(Keys) -> [tuple_to_list(sext:decode(A)) || A <- Keys, A /= []]. @@ -181,5 +192,5 @@ value_to_url_string(V, timestamp) -> base_url(Table) -> {ok, [{Server, Port}]} = application:get_env(riak_api, http), - io_lib:format("http://~s:~B/ts/v1/tables/~s/keys/", - [Server, Port, Table]). + lists:flatten(io_lib:format("http://~s:~B/ts/v1/tables/~s/keys/", + [Server, Port, Table])). From fdeba228ce838379f2fce0fcf117c1a57346433b Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Mon, 14 Mar 2016 12:55:27 +0100 Subject: [PATCH 047/122] riak_kv_wm_timeseries_query reorganised to have the callbacks in the right order. And all helper functions at the end of the module. --- src/riak_kv_wm_timeseries_query.erl | 93 +++++++++++++++-------------- 1 file changed, 47 insertions(+), 46 deletions(-) diff --git a/src/riak_kv_wm_timeseries_query.erl b/src/riak_kv_wm_timeseries_query.erl index b21a6fefe7..0c4c657bdd 100644 --- a/src/riak_kv_wm_timeseries_query.erl +++ b/src/riak_kv_wm_timeseries_query.erl @@ -56,7 +56,7 @@ -include("riak_kv_ts.hrl"). -record(ctx, { - table :: 'undefined' | string(), + table :: 'undefined' | binary(), mod :: 'undefined' | module(), method :: atom(), prefix, %% string() - prefix for resource uris @@ -99,7 +99,11 @@ service_available(RD, Ctx = #ctx{riak = RiakProps}) -> {false, Resp, Ctx} end. +-spec allowed_methods(#wm_reqdata{}, #ctx{}) -> cb_rv_spec([atom()]). +allowed_methods(RD, Ctx) -> + {['GET', 'POST'], RD, Ctx}. +-spec malformed_request(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). malformed_request(RD, Ctx) -> try {SqlType, SQL} = query_from_request(RD), @@ -117,16 +121,16 @@ malformed_request(RD, Ctx) -> {true, Response, Ctx} end. --spec is_authorized(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()|halt()). +-spec is_authorized(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()|string()|halt()). is_authorized(RD, #ctx{sql_type=SqlType, table=Table}=Ctx) -> - checkpoint("is_authorized", RD), Call = call_from_sql_type(SqlType), lager:log(info, self(), "is_authorized type:~p", [SqlType]), case riak_kv_wm_ts_util:authorize(Call, Table, RD) of ok -> {true, RD, Ctx}; {error, ErrorMsg} -> - {ErrorMsg, RD, Ctx}; + ErrorStr = lists:flatten(io_lib:format("~p", [ErrorMsg])), + {ErrorStr, RD, Ctx}; {insecure, Halt, Resp} -> {Halt, Resp, Ctx} end. @@ -143,45 +147,6 @@ forbidden(RD, Ctx) -> end. --spec allowed_methods(#wm_reqdata{}, #ctx{}) -> cb_rv_spec([atom()]). -allowed_methods(RD, Ctx) -> - {['GET', 'POST'], RD, Ctx}. - -query_from_request(RD) -> - QueryStr = query_string_from_request(RD), - lager:log(info, self(), "query_from_request: ~p", [QueryStr]), - compile_query(QueryStr). - -query_string_from_request(RD) -> - case wrq:get_qs_value("query", RD) of - undefined -> - throw({query, "no query key in query string"}); - Str -> - Str - end. - -compile_query(QueryStr) -> - case riak_ql_parser:ql_parse( - riak_ql_lexer:get_tokens(QueryStr)) of - {error, Reason} -> - ErrorMsg = lists:flatten(io_lib:format("parse error: ~p", [Reason])), - throw({query, ErrorMsg}); - {ddl, _ } = Res -> - Res; - {Type, Compiled} when Type==select; Type==describe -> - {ok, SQL} = riak_kv_ts_util:build_sql_record( - Type, Compiled, undefined), - {Type, SQL} - end. - -%% @todo: should really be in riak_ql somewhere -table_from_sql(#ddl_v1{table=Table}) -> Table; -table_from_sql(#riak_select_v1{'FROM'=Table}) -> Table; -table_from_sql(#riak_sql_describe_v1{'DESCRIBE'=Table}) -> Table. - -call_from_sql_type(ddl) -> query_create_table; -call_from_sql_type(select) -> query_select; -call_from_sql_type(describe) -> query_describe. @@ -200,7 +165,6 @@ encodings_provided(RD, Ctx) -> -spec content_types_accepted(#wm_reqdata{}, #ctx{}) -> cb_rv_spec([{ContentType::string(), Acceptor::atom()}]). content_types_accepted(RD, Ctx) -> -% {[{"application/json", accept_doc_body}], RD, Ctx}. %% @todo: if we end up without a body in the request this function should be deleted. {[], RD, Ctx}. @@ -265,6 +229,45 @@ process_post(RD, #ctx{sql_type=select, {false, Resp, Ctx} end. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Helper functions +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +query_from_request(RD) -> + QueryStr = query_string_from_request(RD), + lager:log(info, self(), "query_from_request: ~p", [QueryStr]), + compile_query(QueryStr). + +query_string_from_request(RD) -> + case wrq:get_qs_value("query", RD) of + undefined -> + throw({query, "no query key in query string"}); + Str -> + Str + end. + +compile_query(QueryStr) -> + case riak_ql_parser:ql_parse( + riak_ql_lexer:get_tokens(QueryStr)) of + {error, Reason} -> + ErrorMsg = lists:flatten(io_lib:format("parse error: ~p", [Reason])), + throw({query, ErrorMsg}); + {ddl, _ } = Res -> + Res; + {Type, Compiled} when Type==select; Type==describe -> + {ok, SQL} = riak_kv_ts_util:build_sql_record( + Type, Compiled, undefined), + {Type, SQL} + end. + +%% @todo: should really be in riak_ql somewhere +table_from_sql(#ddl_v1{table=Table}) -> Table; +table_from_sql(#riak_select_v1{'FROM'=Table}) -> Table; +table_from_sql(#riak_sql_describe_v1{'DESCRIBE'=Table}) -> Table. + +call_from_sql_type(ddl) -> query_create_table; +call_from_sql_type(select) -> query_select; +call_from_sql_type(describe) -> query_describe. + create_table(DDL = #ddl_v1{table = Table}) -> %% would be better to use a function to get the table out. {ok, Props1} = riak_kv_ts_util:apply_timeseries_bucket_props(DDL, []), @@ -295,8 +298,6 @@ wait_until_active(Table, Seconds) -> end. -spec produce_doc_body(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(iolist()). -%% @doc Extract the value of the document, and place it in the -%% response body of the request. produce_doc_body(RD, Ctx = #ctx{result = {Columns, Rows}}) -> {mochijson2:encode( {struct, [{<<"columns">>, Columns}, From bc80ebb6a65d53c8d4fa977e9d835360adbfe711 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Mon, 14 Mar 2016 13:17:16 +0100 Subject: [PATCH 048/122] Comments in the top of the files aligned with what the code does. --- src/riak_kv_wm_timeseries.erl | 3 ++- src/riak_kv_wm_timeseries_listkeys.erl | 5 ++--- src/riak_kv_wm_timeseries_query.erl | 3 +-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 02a36f290a..f08ea38161 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -37,7 +37,8 @@ %% Request body is expected to be a JSON containing a struct or structs for the %% POST. GET and DELETE have no body. %% -%% Response is a JSON containing full records. +%% Response is a JSON containing full records or {"success": true} for POST and +%% DELETE. %% -module(riak_kv_wm_timeseries). diff --git a/src/riak_kv_wm_timeseries_listkeys.erl b/src/riak_kv_wm_timeseries_listkeys.erl index 2122143284..f172d1ce05 100644 --- a/src/riak_kv_wm_timeseries_listkeys.erl +++ b/src/riak_kv_wm_timeseries_listkeys.erl @@ -24,11 +24,10 @@ %% @doc Resource for Riak TS operations over HTTP. %% %% ``` -%% GET /ts/v1/table/Table/keys list_keys +%% GET /ts/v1/table/Table/list_keys %% ''' %% -%% Request body is expected to be a JSON containing key and/or value(s). -%% Response is a JSON containing data rows with column headers. +%% Response is HTML URLs for the entries in the table. %% -module(riak_kv_wm_timeseries_listkeys). diff --git a/src/riak_kv_wm_timeseries_query.erl b/src/riak_kv_wm_timeseries_query.erl index 0c4c657bdd..6f2584966b 100644 --- a/src/riak_kv_wm_timeseries_query.erl +++ b/src/riak_kv_wm_timeseries_query.erl @@ -23,10 +23,9 @@ %% @doc Resource for Riak TS operations over HTTP. %% %% ``` -%% GET/POST /ts/v1/query execute SQL query +%% POST /ts/v1/query?query="query string" execute SQL query %% ''' %% -%% Request body is expected to be a JSON containing key and/or value(s). %% Response is a JSON containing data rows with column headers. %% From 20ef44ee1fa55bc5b2bbaa5ff9128a74106a024c Mon Sep 17 00:00:00 2001 From: Brett Hazen Date: Mon, 14 Mar 2016 23:15:02 +0000 Subject: [PATCH 049/122] Correct data type declaration in #riak_sql_insert_v1{} to fix dialyzer --- include/riak_kv_ts.hrl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/riak_kv_ts.hrl b/include/riak_kv_ts.hrl index e4f951488b..05d9ee430c 100644 --- a/include/riak_kv_ts.hrl +++ b/include/riak_kv_ts.hrl @@ -66,8 +66,8 @@ -record(riak_sql_insert_v1, { 'INSERT' = <<>> :: binary(), - fields :: [identifier()], - values :: [[value_type()]], + fields :: [field_identifier()], + values :: [[riak_ql_ddl:data_value()]], helper_mod :: atom() }). From dbc872ca77cb7fe4397b8c8991f48de1caa776f4 Mon Sep 17 00:00:00 2001 From: Brett Hazen Date: Tue, 15 Mar 2016 03:52:07 +0000 Subject: [PATCH 050/122] Move some types from QL and use exported types from riak_ql_ddl --- include/riak_kv_ts.hrl | 11 +++++------ src/riak_kv_pb_timeseries.erl | 2 +- src/riak_kv_qry_compiler.erl | 30 +++++++++++++++++++++--------- 3 files changed, 27 insertions(+), 16 deletions(-) diff --git a/include/riak_kv_ts.hrl b/include/riak_kv_ts.hrl index 05d9ee430c..b734770558 100644 --- a/include/riak_kv_ts.hrl +++ b/include/riak_kv_ts.hrl @@ -29,13 +29,12 @@ %% the result type of a query, rows means to return all matching rows, aggregate %% returns one row calculated from the result set for the query. -type select_result_type() :: rows | aggregate. --type value_type() :: binary() | float() | integer() | boolean(). -record(riak_sel_clause_v1, { calc_type = rows :: select_result_type(), initial_state = [] :: [any()], - col_return_types = [] :: [field_type()], + col_return_types = [] :: [riak_ql_ddl:field_type()], col_names = [] :: [binary()], clause = [] :: [riak_kv_qry_compiler:compiled_select()], finalisers = [] :: [skip | function()] @@ -45,9 +44,9 @@ { 'SELECT' :: #riak_sel_clause_v1{}, 'FROM' = <<>> :: binary() | {list, [binary()]} | {regex, list()}, - 'WHERE' = [] :: [filter()], - 'ORDER BY' = [] :: [sorter()], - 'LIMIT' = [] :: [limit()], + 'WHERE' = [] :: [riak_ql_ddl:filter()], + 'ORDER BY' = [] :: [riak_kv_qry_compiler:sorter()], + 'LIMIT' = [] :: [riak_kv_qry_compiler:limit()], helper_mod :: atom(), %% will include groups when we get that far partition_key = none :: none | #key_v1{}, @@ -66,7 +65,7 @@ -record(riak_sql_insert_v1, { 'INSERT' = <<>> :: binary(), - fields :: [field_identifier()], + fields :: [riak_ql_ddl:field_identifier()], values :: [[riak_ql_ddl:data_value()]], helper_mod :: atom() }). diff --git a/src/riak_kv_pb_timeseries.erl b/src/riak_kv_pb_timeseries.erl index dca9e49beb..46aa0bd775 100644 --- a/src/riak_kv_pb_timeseries.erl +++ b/src/riak_kv_pb_timeseries.erl @@ -307,7 +307,7 @@ make_empty_row(Mod) -> %% %% This *requires* that once schema changes take place the DDL fields are left in order. %% --spec lookup_field_positions(module(), [field_identifier()]) -> +-spec lookup_field_positions(module(), [riak_ql_ddl:field_identifier()]) -> {ok, [pos_integer()]} | {error, string()}. lookup_field_positions(Mod, FieldIdentifiers) -> case lists:foldl( diff --git a/src/riak_kv_qry_compiler.erl b/src/riak_kv_qry_compiler.erl index f8ac62b150..aa00724cd2 100644 --- a/src/riak_kv_qry_compiler.erl +++ b/src/riak_kv_qry_compiler.erl @@ -38,6 +38,18 @@ {filter, [term()]} | {start_inclusive, boolean()} | {end_inclusive, boolean()}]. +-type combinator() :: [binary()]. +-type limit() :: any(). +-type operator() :: [binary()]. +-type sorter() :: term(). + + +-export_type([ + combinator/0, + limit/0, + operator/0, + sorter/0 +]). -export_type([where_props/0]). %% 3rd argument is undefined if we should not be concerned about the @@ -209,7 +221,7 @@ get_col_names2(_, Name) -> }). %% --spec select_column_clause_folder(#ddl_v1{}, selection(), +-spec select_column_clause_folder(#ddl_v1{}, riak_ql_ddl:selection(), {set(), #riak_sel_clause_v1{}}) -> {set(), #riak_sel_clause_v1{}}. select_column_clause_folder(DDL, ColAST1, @@ -292,7 +304,7 @@ compile_select_col(DDL, Select) -> %% Returns a one arity fun which is stateless for example pulling a field from a %% row. --spec compile_select_col_stateless(#ddl_v1{}, selection()|{Op::atom(), selection(), selection()}|{return_state, integer()}) -> +-spec compile_select_col_stateless(#ddl_v1{}, riak_ql_ddl:selection()|{Op::atom(), riak_ql_ddl:selection(), riak_ql_ddl:selection()}|{return_state, integer()}) -> compiled_select(). compile_select_col_stateless(_, {identifier, [<<"*">>]}) -> fun(Row, _) -> Row end; @@ -317,8 +329,8 @@ compile_select_col_stateless(DDL, {Op, A, B}) -> compile_select_col_stateless2(Op, Arg_a, Arg_b). %% --spec infer_col_type(#ddl_v1{}, selection(), Errors1::[any()]) -> - {Type::simple_field_type() | error, Errors2::[any()]}. +-spec infer_col_type(#ddl_v1{}, riak_ql_ddl:selection(), Errors1::[any()]) -> + {riak_ql_ddl:simple_field_type() | error, Errors2::[any()]}. infer_col_type(_, {Type, _}, Errors) when Type == sint64; Type == varchar; Type == boolean; Type == double -> {Type, Errors}; @@ -364,15 +376,15 @@ pull_from_row(N, Row) -> lists:nth(N, Row). %% --spec extract_stateful_functions(selection(), integer()) -> - {selection() | {return_state, integer()}, [selection_function()]}. +-spec extract_stateful_functions(riak_ql_ddl:selection(), integer()) -> + {riak_ql_ddl:selection() | {return_state, integer()}, [riak_ql_ddl:selection_function()]}. extract_stateful_functions(Selection1, FinaliserLen) when is_integer(FinaliserLen) -> {Selection2, Fns} = extract_stateful_functions2(Selection1, FinaliserLen, []), {Selection2, lists:reverse(Fns)}. %% extract stateful functions from the selection --spec extract_stateful_functions2(selection(), integer(), [selection_function()]) -> - {selection() | {finalise_aggregation, FnName::atom(), integer()}, [selection_function()]}. +-spec extract_stateful_functions2(riak_ql_ddl:selection(), integer(), [riak_ql_ddl:selection_function()]) -> + {riak_ql_ddl:selection() | {finalise_aggregation, FnName::atom(), integer()}, [riak_ql_ddl:selection_function()]}. extract_stateful_functions2({Op, ArgA1, ArgB1}, FinaliserLen, Fns1) -> {ArgA2, Fns2} = extract_stateful_functions2(ArgA1, FinaliserLen, Fns1), {ArgB2, Fns3} = extract_stateful_functions2(ArgB1, FinaliserLen, Fns2), @@ -449,7 +461,7 @@ col_index_and_type_of(Fields, ColumnName) -> end. %% --spec expand_where(filter(), #key_v1{}, integer()) -> +-spec expand_where(riak_ql_ddl:filter(), #key_v1{}, integer()) -> [where_props()] | {error, any()}. expand_where(Where, PartitionKey, MaxSubQueries) -> case find_quantum_field_index_in_key(PartitionKey) of From daaffa5874ef916ce27b25851a676c2bf7288445 Mon Sep 17 00:00:00 2001 From: andytill Date: Tue, 15 Mar 2016 16:09:10 +0000 Subject: [PATCH 051/122] Queries that produce one sub query also go through make_wheres to correct the start key. --- src/riak_kv_qry_compiler.erl | 186 +++++++++++++---------------------- 1 file changed, 69 insertions(+), 117 deletions(-) diff --git a/src/riak_kv_qry_compiler.erl b/src/riak_kv_qry_compiler.erl index c6f2517054..83bd8fcbe5 100644 --- a/src/riak_kv_qry_compiler.erl +++ b/src/riak_kv_qry_compiler.erl @@ -509,10 +509,7 @@ hash_timestamp_to_quanta(QField, QSize, QUnit, QIndex, MaxSubQueries, Where1) -> {NoSubQueries, Boundaries} = riak_ql_quanta:quanta(Min2, Max2, QSize, QUnit), if - NoSubQueries == 1 -> - [Where2]; - NoSubQueries > 1 andalso (MaxSubQueries == undefined orelse - NoSubQueries =< MaxSubQueries) -> + MaxSubQueries == undefined orelse NoSubQueries =< MaxSubQueries -> %% use the maximum value that has not been incremented, we still use %% the end_inclusive flag because the end key is not used to hash make_wheres(Where2, QField, Min2, Max1, Boundaries); @@ -1053,19 +1050,14 @@ simplest_test() -> Query = "select weather from GeoCheckin where time > 3000 and time < 5000 and user = 'user_1' and location = 'San Francisco'", {ok, Q} = get_query(Query), true = is_query_valid(DDL, Q), - [Where1] = - test_data_where_clause(<<"San Francisco">>, <<"user_1">>, [{3000, 5000}]), - Where2 = Where1 ++ [{start_inclusive, false}], - PK = get_standard_pk(), - LK = get_standard_lk(), - ?assertMatch( - {ok, [?SQL_SELECT{ is_executable = true, - type = timeseries, - 'WHERE' = Where2, - partition_key = PK, - local_key = LK }]}, - compile(DDL, Q, 5) - ). + [ExpectedWhere] = + test_data_where_clause(<<"San Francisco">>, <<"user_1">>, [{3001, 5000}]), + {ok, [?SQL_SELECT{ 'WHERE' = WhereVal, + partition_key = PK, + local_key = LK }]} = compile(DDL, Q, 5), + ?assertEqual(get_standard_pk(), PK), + ?assertEqual(get_standard_lk(), LK), + ?assertEqual(ExpectedWhere, WhereVal). simple_with_filter_1_test() -> {ok, Q} = get_query( @@ -1077,23 +1069,18 @@ simple_with_filter_1_test() -> DDL = get_standard_ddl(), true = is_query_valid(DDL, Q), [[StartKey, EndKey |_]] = - test_data_where_clause(<<"Scotland">>, <<"user_1">>, [{3000, 5000}]), - Where = [ + test_data_where_clause(<<"Scotland">>, <<"user_1">>, [{3001, 5000}]), + ExpectedWhere = [ StartKey, EndKey, - {filter, {'=', {field, <<"weather">>, varchar}, {const, <<"yankee">>}}}, - {start_inclusive, false} + {filter, {'=', {field, <<"weather">>, varchar}, {const, <<"yankee">>}}} ], - PK = get_standard_pk(), - LK = get_standard_lk(), - ?assertMatch( - {ok, [?SQL_SELECT{ is_executable = true, - type = timeseries, - 'WHERE' = Where, - partition_key = PK, - local_key = LK }]}, - compile(DDL, Q, 5) - ). + {ok, [?SQL_SELECT{ 'WHERE' = WhereVal, + partition_key = PK, + local_key = LK }]} = compile(DDL, Q, 5), + ?assertEqual(get_standard_pk(), PK), + ?assertEqual(get_standard_lk(), LK), + ?assertEqual(ExpectedWhere, WhereVal). simple_with_filter_2_test() -> {ok, Q} = get_query( @@ -1106,21 +1093,17 @@ simple_with_filter_2_test() -> true = is_query_valid(DDL, Q), [[StartKey, EndKey |_]] = test_data_where_clause(<<"Scotland">>, <<"user_1">>, [{3000, 5000}]), - Where = [ + ExpectedWhere = [ StartKey, EndKey, {filter, {'=', {field, <<"weather">>, varchar}, {const, <<"yankee">>}}} ], - PK = get_standard_pk(), - LK = get_standard_lk(), - ?assertMatch( - {ok, [?SQL_SELECT{ is_executable = true, - type = timeseries, - 'WHERE' = Where, - partition_key = PK, - local_key = LK }]}, - compile(DDL, Q, 5) - ). + {ok, [?SQL_SELECT{ 'WHERE' = WhereVal, + partition_key = PK, + local_key = LK }]} = compile(DDL, Q, 5), + ?assertEqual(get_standard_pk(), PK), + ?assertEqual(get_standard_lk(), LK), + ?assertEqual(ExpectedWhere, WhereVal). simple_with_filter_3_test() -> {ok, Q} = get_query( @@ -1132,24 +1115,21 @@ simple_with_filter_3_test() -> DDL = get_standard_ddl(), true = is_query_valid(DDL, Q), [[StartKey, EndKey |_]] = - test_data_where_clause(<<"Scotland">>, <<"user_1">>, [{3000, 5000}]), + test_data_where_clause(<<"Scotland">>, <<"user_1">>, [{3001, 5000}]), PK = get_standard_pk(), LK = get_standard_lk(), - Where = [ + ExpectedWhere = [ StartKey, EndKey, {filter, {'=', {field, <<"weather">>, varchar}, {const, <<"yankee">>}}}, - {start_inclusive, false}, - {end_inclusive, true} + {end_inclusive, true} ], - ?assertMatch( - {ok, [?SQL_SELECT{ is_executable = true, - type = timeseries, - 'WHERE' = Where, - partition_key = PK, - local_key = LK }]}, - compile(DDL, Q, 5) - ). + {ok, [?SQL_SELECT{ 'WHERE' = WhereVal, + partition_key = PK, + local_key = LK }]} = compile(DDL, Q, 5), + ?assertEqual(get_standard_pk(), PK), + ?assertEqual(get_standard_lk(), LK), + ?assertEqual(ExpectedWhere, WhereVal). simple_with_2_field_filter_test() -> {ok, Q} = get_query( @@ -1162,10 +1142,8 @@ simple_with_2_field_filter_test() -> DDL = get_standard_ddl(), true = is_query_valid(DDL, Q), [[StartKey, EndKey |_]] = - test_data_where_clause(<<"Scotland">>, <<"user_1">>, [{3000, 5000}]), - PK = get_standard_pk(), - LK = get_standard_lk(), - Where = [ + test_data_where_clause(<<"Scotland">>, <<"user_1">>, [{3001, 5000}]), + ExpectedWhere = [ StartKey, EndKey, {filter, @@ -1173,17 +1151,14 @@ simple_with_2_field_filter_test() -> {'=', {field, <<"weather">>, varchar}, {const, <<"yankee">>}}, {'=', {field, <<"temperature">>, varchar}, {const, <<"yelp">>}} } - }, - {start_inclusive, false} + } ], - ?assertMatch( - {ok, [?SQL_SELECT{ is_executable = true, - type = timeseries, - 'WHERE' = Where, - partition_key = PK, - local_key = LK }]}, - compile(DDL, Q, 5) - ). + {ok, [?SQL_SELECT{ 'WHERE' = WhereVal, + partition_key = PK, + local_key = LK }]} = compile(DDL, Q, 5), + ?assertEqual(get_standard_pk(), PK), + ?assertEqual(get_standard_lk(), LK), + ?assertEqual(ExpectedWhere, WhereVal). complex_with_4_field_filter_test() -> Query = "select weather from GeoCheckin where time > 3000 and time < 5000 and user = 'user_1' and location = 'Scotland' and extra = 1 and (weather = 'yankee' or (temperature = 'yelp' and geohash = 'erko'))", @@ -1191,8 +1166,8 @@ complex_with_4_field_filter_test() -> DDL = get_long_ddl(), true = is_query_valid(DDL, Q), [[Start, End | _]] = - test_data_where_clause(<<"Scotland">>, <<"user_1">>, [{3000, 5000}]), - Where2 = [ + test_data_where_clause(<<"Scotland">>, <<"user_1">>, [{3001, 5000}]), + ExpectedWhere = [ Start, End, {filter, {and_, @@ -1204,19 +1179,14 @@ complex_with_4_field_filter_test() -> }, {'=', {field, <<"extra">>, sint64}, {const, 1}} } - }, - {start_inclusive, false} + } ], - PK = get_standard_pk(), - LK = get_standard_lk(), - ?assertMatch( - {ok, [?SQL_SELECT{ is_executable = true, - type = timeseries, - 'WHERE' = Where2, - partition_key = PK, - local_key = LK }]}, - compile(DDL, Q, 5) - ). + {ok, [?SQL_SELECT{ 'WHERE' = WhereVal, + partition_key = PK, + local_key = LK }]} = compile(DDL, Q, 5), + ?assertEqual(get_standard_pk(), PK), + ?assertEqual(get_standard_lk(), LK), + ?assertEqual(ExpectedWhere, WhereVal). complex_with_boolean_rewrite_filter_test() -> DDL = get_long_ddl(), @@ -1227,10 +1197,8 @@ complex_with_boolean_rewrite_filter_test() -> "AND (myboolean = False OR myboolean = tRue)"), true = is_query_valid(DDL, Q), [[StartKey, EndKey |_]] = - test_data_where_clause(<<"Scotland">>, <<"user_1">>, [{3000, 5000}]), - PK = get_standard_pk(), - LK = get_standard_lk(), - Where = [ + test_data_where_clause(<<"Scotland">>, <<"user_1">>, [{3001, 5000}]), + ExpectedWhere = [ StartKey, EndKey, {filter, @@ -1238,18 +1206,14 @@ complex_with_boolean_rewrite_filter_test() -> {'=', {field, <<"myboolean">>, boolean}, {const, false}}, {'=', {field, <<"myboolean">>, boolean}, {const, true}} } - }, - {start_inclusive, false} + } ], - ?assertMatch( - {ok, [?SQL_SELECT{ is_executable = true, - type = timeseries, - 'WHERE' = Where, - partition_key = PK, - local_key = LK - }]}, - compile(DDL, Q, 5) - ). + {ok, [?SQL_SELECT{ 'WHERE' = WhereVal, + partition_key = PK, + local_key = LK }]} = compile(DDL, Q, 5), + ?assertEqual(get_standard_pk(), PK), + ?assertEqual(get_standard_lk(), LK), + ?assertEqual(ExpectedWhere, WhereVal). %% got for 3 queries to get partition ordering problems flushed out simple_spanning_boundary_test() -> @@ -1385,23 +1349,12 @@ key_is_all_timestamps_test() -> "SELECT time_a FROM GeoCheckin " "WHERE time_c > 2999 AND time_c < 5000 " "AND time_a = 10 AND time_b = 15"), - ?assertMatch( - {ok, [?SQL_SELECT{ - 'WHERE' = [ - {startkey, [ - {<<"time_a">>, timestamp, 10}, - {<<"time_b">>, timestamp, 15}, - {<<"time_c">>, timestamp, 2999} - ]}, - {endkey, [ - {<<"time_a">>, timestamp, 10}, - {<<"time_b">>, timestamp, 15}, - {<<"time_c">>, timestamp, 5000} - ]}, - {filter, []}, - {start_inclusive, false}] - }]}, - compile(DDL, Q, 5) + {ok, [?SQL_SELECT{ 'WHERE' = Where }]} = compile(DDL, Q, 5), + ?assertEqual( + [{startkey, [{<<"time_a">>,timestamp,10}, {<<"time_b">>,timestamp,15}, {<<"time_c">>,timestamp,3000} ]}, + {endkey, [{<<"time_a">>,timestamp,10}, {<<"time_b">>,timestamp,15}, {<<"time_c">>,timestamp,5000} ]}, + {filter, []} ], + Where ). duplicate_lower_bound_filter_not_allowed_test() -> @@ -2058,10 +2011,9 @@ flexible_keys_1_test() -> "SELECT * FROM tab4 WHERE a > 0 AND a < 1000 AND a1 = 1"), {ok, [Select]} = compile(DDL, Q, 100), ?assertEqual( - [{startkey,[{<<"a1">>,sint64,1}, {<<"a">>,timestamp,0}]}, + [{startkey,[{<<"a1">>,sint64,1}, {<<"a">>,timestamp,1}]}, {endkey, [{<<"a1">>,sint64,1}, {<<"a">>,timestamp,1000}]}, - {filter,[]}, - {start_inclusive,false}], + {filter,[]}], Select#riak_select_v1.'WHERE' ). From 5086662b6cd465c0eb7d8af13d43eb6a717ca87a Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Thu, 11 Feb 2016 05:40:39 +0200 Subject: [PATCH 052/122] return {1021, notfound} also in get --- src/riak_kv_pb_timeseries.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/riak_kv_pb_timeseries.erl b/src/riak_kv_pb_timeseries.erl index 46aa0bd775..52abe02a8a 100644 --- a/src/riak_kv_pb_timeseries.erl +++ b/src/riak_kv_pb_timeseries.erl @@ -58,7 +58,7 @@ -define(E_BAD_QUERY, 1018). -define(E_TABLE_INACTIVE, 1019). -define(E_PARSE_ERROR, 1020). --define(E_DELETE_NOTFOUND, 1021). +-define(E_NOTFOUND, 1021). -define(FETCH_RETRIES, 10). %% TODO make it configurable in tsqueryreq -define(TABLE_ACTIVATE_WAIT, 30). %% ditto @@ -529,7 +529,7 @@ sub_tsgetreq(Mod, DDL, #tsgetreq{table = Table, {error, {bad_key_length, Got, Need}} -> {reply, key_element_count_mismatch(Got, Need), State}; {error, notfound} -> - {reply, tsgetresp, State}; + {reply, make_rpberrresp(?E_NOTFOUND, "notfound"), State}; {error, Reason} -> {reply, make_rpberrresp(?E_GET, to_string(Reason)), State} end. @@ -581,7 +581,7 @@ sub_tsdelreq(Mod, DDL, #tsdelreq{table = Table, {error, {bad_key_length, Got, Need}} -> {reply, key_element_count_mismatch(Got, Need), State}; {error, notfound} -> - {reply, make_rpberrresp(?E_DELETE_NOTFOUND, "notfound"), State}; + {reply, make_rpberrresp(?E_NOTFOUND, "notfound"), State}; {error, Reason} -> {reply, failed_delete_response(Reason), State} end. From e9846541315e631d144ad7f24443b5d6b9b9a85d Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Wed, 9 Mar 2016 14:39:52 +0200 Subject: [PATCH 053/122] fix an eunit test in riak_kv_console after SQL records reorg --- src/riak_kv_console.erl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/riak_kv_console.erl b/src/riak_kv_console.erl index 42894dba48..628ee16a11 100644 --- a/src/riak_kv_console.erl +++ b/src/riak_kv_console.erl @@ -875,6 +875,7 @@ bucket_error_xlate(X) -> -ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). +-include_lib("riak_ql/include/riak_ql_ddl.hrl"). json_props(Props) -> lists:flatten(mochijson2:encode([{props, Props}])). @@ -991,9 +992,10 @@ bucket_type_create_with_timeseries_table_with_two_element_key_test() -> mochijson2:decode(JSON) ), % just assert that this returns a ddl prop + HaveDDL = proplists:get_value(ddl, Result), ?assertMatch( - [{ddl, _}|_], - Result + ?DDL{}, + HaveDDL ). bucket_type_create_with_timeseries_table_error_with_misplaced_quantum_test() -> From fc099b3e0d466fa7c13b81fbba1be638201f46d1 Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Fri, 22 Jan 2016 10:27:55 +0200 Subject: [PATCH 054/122] relax property retrieval checks in a riak_kv_console eunit test A proplist should not be relied upon to have properties in a particular order. --- src/riak_kv_console.erl | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/src/riak_kv_console.erl b/src/riak_kv_console.erl index 628ee16a11..bb7ebf2f8c 100644 --- a/src/riak_kv_console.erl +++ b/src/riak_kv_console.erl @@ -901,18 +901,33 @@ bucket_type_create_with_timeseries_table_test() -> "user varchar not null, ", "time timestamp not null, ", "PRIMARY KEY ((series, user, quantum(time, 15, m)), " - "series, user, time))">>, - JSON = json_props([{bucket_type, my_type}, - {table_def, TableDef}]), + "series, user, time))" + " with (prop1='woo', prop2 = 42)">>, + JSON = json_props([{bucket_type, my_type}, + {table_def, TableDef}, + {prop2, 41}]), bucket_type_create( - fun(Props) -> put(Ref, Props) end, - <<"my_type">>, - mochijson2:decode(JSON) - ), + fun(Props) -> put(Ref, Props) end, + <<"my_type">>, + mochijson2:decode(JSON) + ), ?assertMatch( - [{ddl, _}, {bucket_type, <<"my_type">>} | _], - get(Ref) - ). + {prop1, <<"woo">>}, + lists:keyfind(prop1, 1, get(Ref)) + ), + ?assertMatch( + {prop2, 42}, %% 42 set in query via 'with' + %% takes precedence over 41 from sidecar properties + lists:keyfind(prop2, 1, get(Ref)) + ), + ?assertMatch( + {ddl, _}, + lists:keyfind(ddl, 1, get(Ref)) + ), + ?assertMatch( + {bucket_type, <<"my_type">>}, + lists:keyfind(bucket_type, 1, get(Ref)) + ). bucket_type_create_with_timeseries_table_is_write_once_test() -> Ref = make_ref(), From 13ac079c144b37f911779e5bcc437b32b966be2e Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Tue, 26 Jan 2016 12:29:26 +0200 Subject: [PATCH 055/122] report error messages maybe_parse_table_def as single lines let \n be added by eventual io:format --- src/riak_kv_ts_util.erl | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/riak_kv_ts_util.erl b/src/riak_kv_ts_util.erl index 03d25a9213..dc693a546f 100644 --- a/src/riak_kv_ts_util.erl +++ b/src/riak_kv_ts_util.erl @@ -168,8 +168,10 @@ maybe_parse_table_def(BucketType, Props) -> {ok, DDL} -> ok = assert_type_and_table_name_same(BucketType, DDL), ok = try_compile_ddl(DDL), - ok = assert_write_once_not_false(PropsNoDef), - apply_timeseries_bucket_props(DDL, PropsNoDef); + MergedProps = merge_props_with_preference( + PropsNoDef, WithProps), + ok = assert_write_once_not_false(BucketType, MergedProps), + apply_timeseries_bucket_props(DDL, MergedProps); {'EXIT', {Reason, _}} -> % the lexer throws exceptions, the reason should always be a % binary @@ -186,13 +188,13 @@ maybe_parse_table_def(BucketType, Props) -> %% Time series must always use write_once so throw an error if the write_once %% property is ever set to false. This prevents a user thinking they have %% disabled write_once when it has been set to true internally. -assert_write_once_not_false(Props) -> +assert_write_once_not_false(BucketType, Props) -> case lists:keyfind(<<"write_once">>, 1, Props) of {<<"write_once">>, false} -> throw({error, {write_once, - "Error, the time series bucket type could not be created. " - "The write_once property must be true\n"}}); + flat_format( + "Time series bucket type ~s has write_once == false", [BucketType])}}); _ -> ok end. @@ -203,9 +205,9 @@ assert_type_and_table_name_same(BucketType, #ddl_v1{table = BucketType}) -> assert_type_and_table_name_same(BucketType1, #ddl_v1{table = BucketType2}) -> throw({error, {table_name, - "The bucket type and table name must be the same\n" - " bucket type was: " ++ binary_to_list(BucketType1) ++ "\n" - " table name was: " ++ binary_to_list(BucketType2) ++ "\n"}}). + flat_format( + "Time series bucket type and table name mismatch (~s != ~s)", + [BucketType1, BucketType2])}}). %% Attempt to compile the DDL but don't do anything with the output, this is %% catch failures as early as possible. Also the error messages are easy to @@ -255,6 +257,7 @@ make_ts_keys(CompoundKey, DDL = #ddl_v1{local_key = #key_v1{ast = LKParams}, encode_typeval_key(TypeVals) -> list_to_tuple([Val || {_Type, Val} <- TypeVals]). + %% Print the query explanation to the shell. explain_query_print(QueryString) -> explain_query_print2(1, explain_query(QueryString)). @@ -343,6 +346,7 @@ op_to_string(Op) -> " " ++ atom_to_list(Op) ++ " ". varchar_quotes(V) -> <<"'", V/binary, "'">>. + %%% %%% TESTS %%% @@ -410,3 +414,6 @@ make_ts_keys_4_test() -> ). -endif. + +flat_format(F, A) -> + lists:flatten(io_lib:format(F, A)). From 5c2eefb75c5ab268c840d4c6ae61e17a715f0e68 Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Tue, 26 Jan 2016 12:29:26 +0200 Subject: [PATCH 056/122] support setting bucket props with WITH in CREATE TABLE query Relies on lexer/parser support in riak_ql. Instead of {ok, #ddl_v1{}}, riak_ql_parser:parse/1 now returns {ok, {#ddl_v1{}, Properties::proplist()}}. This introduces some cumbersomeness but has been chosen in favour of adding a new field and letting #ddl_v1{} carry the properties in it. --- src/riak_kv_pb_timeseries.erl | 25 +++++++++-------- src/riak_kv_qry.erl | 4 +-- src/riak_kv_qry_compiler.erl | 52 ++++++++++++++++++----------------- src/riak_kv_ts_util.erl | 21 +++++++++++--- 4 files changed, 60 insertions(+), 42 deletions(-) diff --git a/src/riak_kv_pb_timeseries.erl b/src/riak_kv_pb_timeseries.erl index 52abe02a8a..4333fe7a03 100644 --- a/src/riak_kv_pb_timeseries.erl +++ b/src/riak_kv_pb_timeseries.erl @@ -131,15 +131,15 @@ decode_query(#tsinterpolation{ base = BaseQuery }, Cover) -> riak_kv_ts_util:build_sql_record(describe, SQL, Cover); {insert, SQL} -> riak_kv_ts_util:build_sql_record(insert, SQL, Cover); - {ddl, DDL} -> - {ok, DDL}; + {ddl, DDL, WithProperties} -> + {ok, {DDL, WithProperties}}; Other -> Other end. -spec decode_query_permissions(ts_query_types()) -> {string(), binary()}. -decode_query_permissions(#ddl_v1{table = NewBucketType}) -> +decode_query_permissions({?DDL{table = NewBucketType}, _WithProps}) -> {"riak_kv.ts_create_table", NewBucketType}; decode_query_permissions(?SQL_SELECT{'FROM' = Table}) -> {"riak_kv.ts_query", Table}; @@ -180,9 +180,9 @@ process(M = #tscoveragereq{table = Table}, State) -> check_table_and_call(Table, fun sub_tscoveragereq/4, M, State); %% this is tsqueryreq, subdivided per query type in its SQL -process(DDL = #ddl_v1{}, State) -> +process({DDL = ?DDL{}, WithProperties}, State) -> %% the only one that doesn't require an activated table - create_table(DDL, State); + create_table({DDL, WithProperties}, State); process(M = ?SQL_SELECT{'FROM' = Table}, State) -> check_table_and_call(Table, fun sub_tsqueryreq/4, M, State); @@ -226,10 +226,10 @@ process_stream({ReqId, Error}, ReqId, %% create_table, the only function for which we don't do %% check_table_and_call --spec create_table(#ddl_v1{}, #state{}) -> +-spec create_table({?DDL{}, proplists:proplist()}, #state{}) -> {reply, #tsqueryresp{} | #rpberrorresp{}, #state{}}. -create_table(DDL = #ddl_v1{table = Table}, State) -> - {ok, Props1} = riak_kv_ts_util:apply_timeseries_bucket_props(DDL, []), +create_table({DDL = ?DDL{table = Table}, WithProps}, State) -> + {ok, Props1} = riak_kv_ts_util:apply_timeseries_bucket_props(DDL, WithProps), Props2 = [riak_kv_wm_utils:erlify_bucket_prop(P) || P <- Props1], case riak_core_bucket_type:create(Table, Props2) of ok -> @@ -986,13 +986,16 @@ missing_helper_module_test() -> ). test_helper_validate_rows_mod() -> - riak_ql_ddl_compiler:compile_and_load_from_tmp( - riak_ql_parser:parse(riak_ql_lexer:get_tokens( + {ddl, DDL, []} = + riak_ql_parser:ql_parse( + riak_ql_lexer:get_tokens( "CREATE TABLE mytable (" "family VARCHAR NOT NULL," "series VARCHAR NOT NULL," "time TIMESTAMP NOT NULL," - "PRIMARY KEY ((family, series, quantum(time, 1, 'm')), family, series, time))"))). + "PRIMARY KEY ((family, series, quantum(time, 1, 'm'))," + " family, series, time))")), + riak_ql_ddl_compiler:compile_and_load_from_tmp(DDL). validate_rows_empty_test() -> {module, Mod} = test_helper_validate_rows_mod(), diff --git a/src/riak_kv_qry.erl b/src/riak_kv_qry.erl index c3d1bad379..866f0fba9c 100644 --- a/src/riak_kv_qry.erl +++ b/src/riak_kv_qry.erl @@ -139,8 +139,8 @@ format_query_syntax_errors(Errors) -> -include_lib("eunit/include/eunit.hrl"). describe_table_columns_test() -> - {ok, DDL} = - riak_ql_parser:parse( + {ddl, DDL, []} = + riak_ql_parser:ql_parse( riak_ql_lexer:get_tokens( "CREATE TABLE fafa (" " f varchar not null," diff --git a/src/riak_kv_qry_compiler.erl b/src/riak_kv_qry_compiler.erl index a3c0af7e2a..1f0aeff305 100644 --- a/src/riak_kv_qry_compiler.erl +++ b/src/riak_kv_qry_compiler.erl @@ -54,11 +54,11 @@ %% 3rd argument is undefined if we should not be concerned about the %% maximum number of quanta --spec compile(#ddl_v1{}, ?SQL_SELECT{}, 'undefined'|pos_integer()) -> +-spec compile(?DDL{}, ?SQL_SELECT{}, 'undefined'|pos_integer()) -> {ok, [?SQL_SELECT{}]} | {error, any()}. -compile(#ddl_v1{}, ?SQL_SELECT{is_executable = true}, _MaxSubQueries) -> +compile(?DDL{}, ?SQL_SELECT{is_executable = true}, _MaxSubQueries) -> {error, 'query is already compiled'}; -compile(#ddl_v1{} = DDL, +compile(?DDL{} = DDL, ?SQL_SELECT{is_executable = false, 'SELECT' = Sel} = Q, MaxSubQueries) -> if Sel#riak_sel_clause_v1.clause == [] -> {error, 'full table scan not implemented'}; @@ -74,7 +74,7 @@ compile(#ddl_v1{} = DDL, %% adding the local key here is a bodge %% should be a helper fun in the generated DDL module but I couldn't %% write that up in time -compile_where_clause(#ddl_v1{} = DDL, +compile_where_clause(?DDL{} = DDL, ?SQL_SELECT{is_executable = false, 'WHERE' = W, cover_context = Cover} = Q, @@ -89,7 +89,7 @@ compile_where_clause(#ddl_v1{} = DDL, end. %% now break out the query on quantum boundaries -expand_query(#ddl_v1{local_key = LK, partition_key = PK}, +expand_query(?DDL{local_key = LK, partition_key = PK}, ?SQL_SELECT{} = Q1, Where1, MaxSubQueries) -> case expand_where(Where1, PK, MaxSubQueries) of @@ -196,7 +196,7 @@ compile_select_clause(DDL, ?SQL_SELECT{'SELECT' = #riak_sel_clause_v1{ clause = end. %% --spec get_col_names(#ddl_v1{}, ?SQL_SELECT{}) -> [binary()]. +-spec get_col_names(?DDL{}, ?SQL_SELECT{}) -> [binary()]. get_col_names(DDL, ?SQL_SELECT{'SELECT' = #riak_sel_clause_v1{clause = Select}}) -> ColNames = riak_ql_to_string:col_names_from_select(Select), %% flatten because * gets expanded to multiple columns @@ -206,7 +206,7 @@ get_col_names(DDL, ?SQL_SELECT{'SELECT' = #riak_sel_clause_v1{clause = Select}}) %% get_col_names2(DDL, "*") -> - [X#riak_field_v1.name || X <- DDL#ddl_v1.fields]; + [X#riak_field_v1.name || X <- DDL?DDL.fields]; get_col_names2(_, Name) -> list_to_binary(Name). @@ -221,7 +221,7 @@ get_col_names2(_, Name) -> }). %% --spec select_column_clause_folder(#ddl_v1{}, riak_ql_ddl:selection(), +-spec select_column_clause_folder(?DDL{}, riak_ql_ddl:selection(), {set(), #riak_sel_clause_v1{}}) -> {set(), #riak_sel_clause_v1{}}. select_column_clause_folder(DDL, ColAST1, @@ -273,7 +273,7 @@ select_column_clause_exploded_folder(DDL, {ColAst, Finaliser}, {TypeSet1, SelCla %% Compile a single selection column into a fun that can extract the cell %% from the row. --spec compile_select_col(DDL::#ddl_v1{}, ColumnSpec::any()) -> +-spec compile_select_col(DDL::?DDL{}, ColumnSpec::any()) -> #single_sel_column{}. compile_select_col(DDL, {{window_agg_fn, FnName}, [FnArg1]}) when is_atom(FnName) -> case riak_ql_window_agg_fns:start_state(FnName) of @@ -304,8 +304,10 @@ compile_select_col(DDL, Select) -> %% Returns a one arity fun which is stateless for example pulling a field from a %% row. --spec compile_select_col_stateless(#ddl_v1{}, riak_ql_ddl:selection()|{Op::atom(), riak_ql_ddl:selection(), riak_ql_ddl:selection()}|{return_state, integer()}) -> - compiled_select(). +-spec compile_select_col_stateless(?DDL{}, riak_ql_ddl:selection() + | {Op::atom(), riak_ql_ddl:selection(), riak_ql_ddl:selection()} + | {return_state, integer()}) -> + compiled_select(). compile_select_col_stateless(_, {identifier, [<<"*">>]}) -> fun(Row, _) -> Row end; compile_select_col_stateless(DDL, {negate, ExprToNegate}) -> @@ -320,7 +322,7 @@ compile_select_col_stateless(_, {finalise_aggregation, FnName, N}) -> ColValue = pull_from_row(N, Row), riak_ql_window_agg_fns:finalise(FnName, ColValue) end; -compile_select_col_stateless(#ddl_v1{ fields = Fields }, {identifier, ColumnName}) -> +compile_select_col_stateless(?DDL{ fields = Fields }, {identifier, ColumnName}) -> {Index, _} = col_index_and_type_of(Fields, to_column_name_binary(ColumnName)), fun(Row,_) -> pull_from_row(Index, Row) end; compile_select_col_stateless(DDL, {Op, A, B}) -> @@ -329,7 +331,7 @@ compile_select_col_stateless(DDL, {Op, A, B}) -> compile_select_col_stateless2(Op, Arg_a, Arg_b). %% --spec infer_col_type(#ddl_v1{}, riak_ql_ddl:selection(), Errors1::[any()]) -> +-spec infer_col_type(?DDL{}, riak_ql_ddl:selection(), Errors1::[any()]) -> {riak_ql_ddl:simple_field_type() | error, Errors2::[any()]}. infer_col_type(_, {Type, _}, Errors) when Type == sint64; Type == varchar; Type == boolean; Type == double -> @@ -340,7 +342,7 @@ infer_col_type(_, {integer, _}, Errors) -> {sint64, Errors}; infer_col_type(_, {float, _}, Errors) -> {double, Errors}; -infer_col_type(#ddl_v1{ fields = Fields }, {identifier, ColName1}, Errors) -> +infer_col_type(?DDL{ fields = Fields }, {identifier, ColName1}, Errors) -> case to_column_name_binary(ColName1) of <<"*">> -> Type = [T || #riak_field_v1{ type = T } <- Fields]; @@ -583,7 +585,7 @@ quantum_field_name(DDL) -> end. %% -find_quantum_fields(#ddl_v1{ partition_key = #key_v1{ ast = PKAST } }) -> +find_quantum_fields(?DDL{ partition_key = #key_v1{ ast = PKAST } }) -> [quantum_fn_to_field_name(QuantumFunc) || #hash_fn_v1{ } = QuantumFunc <- PKAST]. %% @@ -592,12 +594,12 @@ quantum_fn_to_field_name(#hash_fn_v1{ mod = riak_ql_quanta, args = [#param_v1{name = [Name]}|_ ] }) -> Name. -check_if_timeseries(#ddl_v1{table = T, partition_key = PK, local_key = LK0} = DDL, +check_if_timeseries(?DDL{table = T, partition_key = PK, local_key = LK0} = DDL, [W]) -> try #key_v1{ast = PartitionKeyAST} = PK, PartitionFields = [X || #param_v1{name = X} <- PartitionKeyAST], - LK = LK0#key_v1{ast = lists:sublist(LK0#key_v1.ast, length(PartitionKeyAST))}, + LK = LK0#key_v1{ast = lists:sublist(LK0#key_v1.ast, length(PartitionKeyAST))}, QuantumFieldName = quantum_field_name(DDL), StrippedW = strip(W, []), {StartW, EndW, Filter} = @@ -634,7 +636,7 @@ check_if_timeseries(#ddl_v1{table = T, partition_key = PK, local_key = LK0} = DD %% debugging {error, {where_not_timeseries, Reason, erlang:get_stacktrace()}} end; -check_if_timeseries(#ddl_v1{ }, []) -> +check_if_timeseries(?DDL{}, []) -> {error, {no_where_clause, ?E_NO_WHERE_CLAUSE}}. %% @@ -877,7 +879,7 @@ modify_where_key(TupleList, Field, NewVal) -> -define(MIN, 60 * 1000). -define(NAME, "time"). -is_query_valid(#ddl_v1{ table = Table } = DDL, Q) -> +is_query_valid(?DDL{table = Table} = DDL, Q) -> Mod = riak_ql_ddl:make_module_name(Table), riak_ql_ddl:is_query_valid(Mod, DDL, riak_kv_ts_util:sql_record_to_tuple(Q)). @@ -915,7 +917,7 @@ get_standard_ddl() -> get_ddl(SQL) -> Lexed = riak_ql_lexer:get_tokens(SQL), - {ok, DDL} = riak_ql_parser:parse(Lexed), + {ddl, DDL, _WithProps} = riak_ql_parser:ql_parse(Lexed), {module, _Module} = riak_ql_ddl_compiler:compile_and_load_from_tmp(DDL), DDL. @@ -950,7 +952,7 @@ get_standard_lk() -> %% simple_filter_typing_test() -> - #ddl_v1{table = T} = get_long_ddl(), + ?DDL{table = T} = get_long_ddl(), Mod = riak_ql_ddl:make_module_name(T), Filter = [ {or_, @@ -982,7 +984,7 @@ simple_filter_typing_test() -> %% we have enough info to build a range scan %% simple_rewrite_test() -> - #ddl_v1{table = T} = get_standard_ddl(), + ?DDL{table = T} = get_standard_ddl(), Mod = riak_ql_ddl:make_module_name(T), LK = #key_v1{ast = [ #param_v1{name = [<<"geohash">>]}, @@ -1006,7 +1008,7 @@ simple_rewrite_test() -> %% local key - there is no enough info for a range scan %% simple_rewrite_fail_1_test() -> - #ddl_v1{table = T} = get_standard_ddl(), + ?DDL{table = T} = get_standard_ddl(), Mod = riak_ql_ddl:make_module_name(T), LK = #key_v1{ast = [ #param_v1{name = [<<"geohash">>]}, @@ -1021,7 +1023,7 @@ simple_rewrite_fail_1_test() -> ). simple_rewrite_fail_2_test() -> - #ddl_v1{table = T} = get_standard_ddl(), + ?DDL{table = T} = get_standard_ddl(), Mod = riak_ql_ddl:make_module_name(T), LK = #key_v1{ast = [ #param_v1{name = [<<"geohash">>]}, @@ -1036,7 +1038,7 @@ simple_rewrite_fail_2_test() -> ). simple_rewrite_fail_3_test() -> - #ddl_v1{table = T} = get_standard_ddl(), + ?DDL{table = T} = get_standard_ddl(), Mod = riak_ql_ddl:make_module_name(T), LK = #key_v1{ast = [ #param_v1{name = [<<"geohash">>]}, diff --git a/src/riak_kv_ts_util.erl b/src/riak_kv_ts_util.erl index dc693a546f..4a5d6ef9b3 100644 --- a/src/riak_kv_ts_util.erl +++ b/src/riak_kv_ts_util.erl @@ -150,7 +150,9 @@ get_table_ddl(Table) when is_binary(Table) -> apply_timeseries_bucket_props(DDL, Props1) -> Props2 = lists:keystore( <<"write_once">>, 1, Props1, {<<"write_once">>, true}), - {ok, [{<<"ddl">>, DDL} | Props2]}. + Props3 = lists:keystore( + <<"ddl">>, 1, Props2, {<<"ddl">>, DDL}), + {ok, Props3}. -spec maybe_parse_table_def(BucketType :: binary(), @@ -164,8 +166,9 @@ maybe_parse_table_def(BucketType, Props) -> false -> {ok, Props}; {value, {<<"table_def">>, TableDef}, PropsNoDef} -> - case catch riak_ql_parser:parse(riak_ql_lexer:get_tokens(binary_to_list(TableDef))) of - {ok, DDL} -> + case catch riak_ql_parser:ql_parse( + riak_ql_lexer:get_tokens(binary_to_list(TableDef))) of + {ddl, DDL = ?DDL{}, WithProps} -> ok = assert_type_and_table_name_same(BucketType, DDL), ok = try_compile_ddl(DDL), MergedProps = merge_props_with_preference( @@ -199,6 +202,16 @@ assert_write_once_not_false(BucketType, Props) -> ok end. +-spec merge_props_with_preference(proplists:proplist(), proplists:proplist()) -> + proplists:proplist(). +%% If same keys appear in RpbBucketProps as well as embedded in the +%% query ("CREATE TABLE ... WITH"), we merge the two proplists giving +%% preference to the latter. +merge_props_with_preference(PbProps, WithProps) -> + lists:foldl( + fun({K, _} = P, Acc) -> lists:keystore(K, 1, Acc, P) end, + PbProps, WithProps). + %% Ensure table name in DDL and bucket type are the same. assert_type_and_table_name_same(BucketType, #ddl_v1{table = BucketType}) -> ok; @@ -356,7 +369,7 @@ varchar_quotes(V) -> helper_compile_def_to_module(SQL) -> Lexed = riak_ql_lexer:get_tokens(SQL), - {ok, DDL} = riak_ql_parser:parse(Lexed), + {ok, {DDL, _Props}} = riak_ql_parser:parse(Lexed), {module, Mod} = riak_ql_ddl_compiler:compile_and_load_from_tmp(DDL), {DDL, Mod}. From 12d3fbbc36010d5e6e9f4b455f9da269bb1682b6 Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Fri, 11 Mar 2016 03:20:09 +0200 Subject: [PATCH 057/122] consistently refer to #ddl_v1{} as ?DDL{} --- src/riak_kv_pb_timeseries.erl | 10 +++++----- src/riak_kv_qry.erl | 12 ++++++------ src/riak_kv_qry_queue.erl | 2 +- src/riak_kv_ts_newtype.erl | 8 ++++---- src/riak_kv_ts_util.erl | 14 +++++++------- 5 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/riak_kv_pb_timeseries.erl b/src/riak_kv_pb_timeseries.erl index 4333fe7a03..bda0d29827 100644 --- a/src/riak_kv_pb_timeseries.erl +++ b/src/riak_kv_pb_timeseries.erl @@ -74,7 +74,7 @@ -type ts_responses() :: #tsputresp{} | #tsdelresp{} | #tsgetresp{} | #tslistkeysresp{} | #tsqueryresp{} | #rpberrorresp{}. --type ts_query_types() :: #ddl_v1{} | ?SQL_SELECT{} | #riak_sql_describe_v1{} | +-type ts_query_types() :: ?DDL{} | ?SQL_SELECT{} | #riak_sql_describe_v1{} | #riak_sql_insert_v1{}. -type process_retval() :: {reply, RpbOrTsMessage::tuple(), #state{}}. @@ -456,7 +456,7 @@ put_data(Data, Table, Mod) when is_binary(Table) -> -spec partition_data(Data :: list(term()), Bucket :: {binary(), binary()}, BucketProps :: proplists:proplist(), - DDL :: #ddl_v1{}, + DDL :: ?DDL{}, Mod :: module()) -> list(tuple(chash:index(), list(term()))). partition_data(Data, Bucket, BucketProps, DDL, Mod) -> @@ -602,7 +602,7 @@ sub_tslistkeysreq(Mod, DDL, #tslistkeysreq{table = Table, {ok, ReqId} -> ColumnInfo = [Mod:get_field_type(N) - || #param_v1{name = N} <- DDL#ddl_v1.local_key#key_v1.ast], + || #param_v1{name = N} <- DDL?DDL.local_key#key_v1.ast], {reply, {stream, ReqId}, State#state{req = Req, req_ctx = ReqId, column_info = ColumnInfo}}; {error, Reason} -> @@ -804,7 +804,7 @@ make_tsquery_resp(Mod, SQL = #riak_sql_insert_v1{}, _Data) -> %% --------------------------------------------------- -spec check_table_and_call(Table::binary(), - WorkItem::fun((module(), #ddl_v1{}, + WorkItem::fun((module(), ?DDL{}, OrigMessage::tuple(), #state{}) -> process_retval()), OrigMessage::tuple(), @@ -982,7 +982,7 @@ missing_helper_module_not_ts_type_test() -> missing_helper_module_test() -> ?assertMatch( #rpberrorresp{errcode = ?E_MISSING_TS_MODULE }, - missing_helper_module(<<"mytype">>, [{ddl, #ddl_v1{}}]) + missing_helper_module(<<"mytype">>, [{ddl, ?DDL{}}]) ). test_helper_validate_rows_mod() -> diff --git a/src/riak_kv_qry.erl b/src/riak_kv_qry.erl index 866f0fba9c..f043b6b137 100644 --- a/src/riak_kv_qry.erl +++ b/src/riak_kv_qry.erl @@ -33,7 +33,7 @@ -include("riak_kv_ts.hrl"). %% No coverage plan for parallel requests --spec submit(string() | ?SQL_SELECT{} | #riak_sql_describe_v1{} | #riak_sql_insert_v1{}, #ddl_v1{}) -> +-spec submit(string() | ?SQL_SELECT{} | #riak_sql_describe_v1{} | #riak_sql_insert_v1{}, ?DDL{}) -> {ok, any()} | {error, any()}. %% @doc Parse, validate against DDL, and submit a query for execution. %% To get the results of running the query, use fetch/1. @@ -58,11 +58,11 @@ submit(SQL = ?SQL_SELECT{}, DDL) -> %% --------------------- %% local functions --spec describe_table_columns(#ddl_v1{}) -> +-spec describe_table_columns(?DDL{}) -> {ok, [[binary() | boolean() | integer() | undefined]]}. -describe_table_columns(#ddl_v1{fields = FieldSpecs, - partition_key = #key_v1{ast = PKSpec}, - local_key = #key_v1{ast = LKSpec}}) -> +describe_table_columns(?DDL{fields = FieldSpecs, + partition_key = #key_v1{ast = PKSpec}, + local_key = #key_v1{ast = LKSpec}}) -> {ok, [[Name, list_to_binary(atom_to_list(Type)), Nullable, column_pk_position_or_blank(Name, PKSpec), @@ -92,7 +92,7 @@ count_to_position(Col, [_ | Rest], Pos) -> count_to_position(Col, Rest, Pos + 1). -maybe_submit_to_queue(SQL, #ddl_v1{table = BucketType} = DDL) -> +maybe_submit_to_queue(SQL, ?DDL{table = BucketType} = DDL) -> Mod = riak_ql_ddl:make_module_name(BucketType), MaxSubQueries = app_helper:get_env(riak_kv, timeseries_query_max_quanta_span), diff --git a/src/riak_kv_qry_queue.erl b/src/riak_kv_qry_queue.erl index 17a998a812..90b9ef1182 100644 --- a/src/riak_kv_qry_queue.erl +++ b/src/riak_kv_qry_queue.erl @@ -67,7 +67,7 @@ %%% API %%%=================================================================== --spec put_on_queue(pid(), [qry()], #ddl_v1{}) -> +-spec put_on_queue(pid(), [qry()], ?DDL{}) -> {ok, query_id()} | {error, term()}. %% @doc Enqueue a prepared query for execution. The query should be %% compatible with the DDL supplied. diff --git a/src/riak_kv_ts_newtype.erl b/src/riak_kv_ts_newtype.erl index 37d0fd2ba4..1d4a008cf0 100644 --- a/src/riak_kv_ts_newtype.erl +++ b/src/riak_kv_ts_newtype.erl @@ -1,6 +1,6 @@ %% ------------------------------------------------------------------- %% -%% riak_kv_ts_newtype +%% riak_kv_ts_newtype %% %% Copyright (c) 2016 Basho Technologies, Inc. All Rights Reserved. %% @@ -43,7 +43,7 @@ -include_lib("riak_ql/include/riak_ql_ddl.hrl"). -%%% +%%% %%% API. %%% @@ -55,7 +55,7 @@ start_link() -> new_type(BucketType) -> gen_server:cast(?MODULE, {new_type, BucketType}). -%%% +%%% %%% gen_server. %%% @@ -117,7 +117,7 @@ do_new_type(BucketType) -> maybe_compile_ddl(_BucketType, NewDDL, NewDDL) -> %% Do nothing; we're seeing a CMD update but the DDL hasn't changed ok; -maybe_compile_ddl(BucketType, NewDDL, _OldDDL) when is_record(NewDDL, ddl_v1) -> +maybe_compile_ddl(BucketType, NewDDL, _OldDDL) when is_record(NewDDL, ?DDL_RECORD_NAME) -> ok = maybe_stop_current_compilation(BucketType), ok = start_compilation(BucketType, NewDDL); maybe_compile_ddl(_BucketType, _NewDDL, _OldDDL) -> diff --git a/src/riak_kv_ts_util.erl b/src/riak_kv_ts_util.erl index 4a5d6ef9b3..b154f411ba 100644 --- a/src/riak_kv_ts_util.erl +++ b/src/riak_kv_ts_util.erl @@ -124,7 +124,7 @@ queried_table(#riak_sql_insert_v1{'INSERT' = Table}) -> Table. -spec get_table_ddl(binary()) -> - {ok, module(), #ddl_v1{}} | + {ok, module(), ?DDL{}} | {error, term()}. %% Check that Table is in good standing and ready for TS operations %% (its bucket type has been activated and it has a DDL in its props) @@ -144,7 +144,7 @@ get_table_ddl(Table) when is_binary(Table) -> %% --spec apply_timeseries_bucket_props(DDL::#ddl_v1{}, +-spec apply_timeseries_bucket_props(DDL::?DDL{}, Props1::[proplists:property()]) -> {ok, Props2::[proplists:property()]}. apply_timeseries_bucket_props(DDL, Props1) -> @@ -213,9 +213,9 @@ merge_props_with_preference(PbProps, WithProps) -> PbProps, WithProps). %% Ensure table name in DDL and bucket type are the same. -assert_type_and_table_name_same(BucketType, #ddl_v1{table = BucketType}) -> +assert_type_and_table_name_same(BucketType, ?DDL{table = BucketType}) -> ok; -assert_type_and_table_name_same(BucketType1, #ddl_v1{table = BucketType2}) -> +assert_type_and_table_name_same(BucketType1, ?DDL{table = BucketType2}) -> throw({error, {table_name, flat_format( @@ -231,14 +231,14 @@ try_compile_ddl(DDL) -> ok. --spec make_ts_keys([riak_pb_ts_codec:ldbvalue()], #ddl_v1{}, module()) -> +-spec make_ts_keys([riak_pb_ts_codec:ldbvalue()], ?DDL{}, module()) -> {ok, {binary(), binary()}} | {error, {bad_key_length, integer(), integer()}}. %% Given a list of values (of appropriate types) and a DDL, produce a %% partition and local key pair, which can be used in riak_client:get %% to fetch TS objects. -make_ts_keys(CompoundKey, DDL = #ddl_v1{local_key = #key_v1{ast = LKParams}, - fields = Fields}, Mod) -> +make_ts_keys(CompoundKey, DDL = ?DDL{local_key = #key_v1{ast = LKParams}, + fields = Fields}, Mod) -> %% 1. use elements in Key to form a complete data record: KeyFields = [F || #param_v1{name = [F]} <- LKParams], Got = length(CompoundKey), From 81e7a8db12258b4ba534b5a0f6ca5a553f9ae455 Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Fri, 11 Mar 2016 16:31:31 +0200 Subject: [PATCH 058/122] more accurate check for valid bucket type property names In riak_kv_wm_utils:erlify_bucket_prop, rather than relying on list_to_existing_atom, check against a list of known property names, via a convenience fun riak_kv_bucket:is_valid_property/1. --- src/riak_kv_bucket.erl | 40 ++++++++++++++++++++++++++++++++++- src/riak_kv_console.erl | 30 +++++++++++++++----------- src/riak_kv_pb_timeseries.erl | 18 ++++++++++------ src/riak_kv_wm_utils.erl | 14 +++++++++++- 4 files changed, 82 insertions(+), 20 deletions(-) diff --git a/src/riak_kv_bucket.erl b/src/riak_kv_bucket.erl index 887268ad7a..be932f04e3 100644 --- a/src/riak_kv_bucket.erl +++ b/src/riak_kv_bucket.erl @@ -2,7 +2,7 @@ %% %% riak_kv_bucket: bucket validation functions %% -%% Copyright (c) 2007-2011 Basho Technologies, Inc. All Rights Reserved. +%% Copyright (c) 2007-2016 Basho Technologies, Inc. All Rights Reserved. %% %% This file is provided to you under the Apache License, %% Version 2.0 (the "License"); you may not use this file @@ -24,6 +24,7 @@ -module(riak_kv_bucket). -export([validate/4]). +-export([is_valid_property/1]). -include("riak_kv_types.hrl"). @@ -43,6 +44,43 @@ -export_type([props/0]). +-define(VALID_PROPERTIES, + ["allow_mult", + "basic_quorum", + "big_vclock", + "bucket_type", + "chash_keyfun", + "dvv_enabled", + "dw", + "last_write_wins", + "linkfun", + "n_val", + "notfound_ok", + "old_vclock", + "postcommit", + "pr", + "precommit", + "pw", + "r", + "rw", + "small_vclock", + "w", + "write_once", + "young_vclock", + %% TS-specific ones: + "ddl", + "table_def" + ]). + +-spec is_valid_property(string() | binary()) -> boolean(). +%% @doc Checks whether a given binary or string is a valid bucket type +%% property. +is_valid_property(P) when is_list(P) -> + lists:member(P, ?VALID_PROPERTIES); +is_valid_property(P) when is_binary(P) -> + is_valid_property(binary_to_list(P)). + + %% @doc called by riak_core in a few places to ensure bucket %% properties are sane. The arguments combinations have the following %% meanings:- diff --git a/src/riak_kv_console.erl b/src/riak_kv_console.erl index bb7ebf2f8c..fdc08612c5 100644 --- a/src/riak_kv_console.erl +++ b/src/riak_kv_console.erl @@ -522,8 +522,13 @@ bucket_type_create(CreateTypeFn, Type, {struct, Fields}) -> [{<<"props", _/binary>>, {struct, Props1}}] -> case catch riak_kv_ts_util:maybe_parse_table_def(Type, Props1) of {ok, Props2} -> - Props3 = [riak_kv_wm_utils:erlify_bucket_prop(P) || P <- Props2], - CreateTypeFn(Props3); + case catch [riak_kv_wm_utils:erlify_bucket_prop(P) || P <- Props2] of + {bad_bucket_property, BadProp} -> + io:format("Invalid bucket type property: ~ts\n", [BadProp]), + error; + Props3 -> + CreateTypeFn(Props3) + end; {error, ErrorMessage} when is_list(ErrorMessage) orelse is_binary(ErrorMessage) -> bucket_type_print_create_result_error_header(Type), io:format("~ts~n", [ErrorMessage]), @@ -566,8 +571,13 @@ bucket_type_update([TypeStr, PropsStr]) -> bucket_type_update(Type, {struct, Fields}) -> case proplists:get_value(<<"props">>, Fields) of {struct, Props} -> - ErlProps = [riak_kv_wm_utils:erlify_bucket_prop(P) || P <- Props], - bucket_type_print_update_result(Type, riak_core_bucket_type:update(Type, ErlProps)); + case catch [riak_kv_wm_utils:erlify_bucket_prop(P) || P <- Props] of + {bad_bucket_property, BadProp} -> + io:format("Invalid bucket type property: ~ts\n", [BadProp]), + error; + ErlProps -> + bucket_type_print_update_result(Type, riak_core_bucket_type:update(Type, ErlProps)) + end; _ -> io:format("Cannot create bucket type ~ts: no props field found in json~n", [Type]), error @@ -902,23 +912,19 @@ bucket_type_create_with_timeseries_table_test() -> "time timestamp not null, ", "PRIMARY KEY ((series, user, quantum(time, 15, m)), " "series, user, time))" - " with (prop1='woo', prop2 = 42)">>, + " with (n_val=42)">>, JSON = json_props([{bucket_type, my_type}, {table_def, TableDef}, - {prop2, 41}]), + {n_val, 41}]), bucket_type_create( fun(Props) -> put(Ref, Props) end, <<"my_type">>, mochijson2:decode(JSON) ), ?assertMatch( - {prop1, <<"woo">>}, - lists:keyfind(prop1, 1, get(Ref)) - ), - ?assertMatch( - {prop2, 42}, %% 42 set in query via 'with' + {n_val, 42}, %% 42 set in query via 'with' %% takes precedence over 41 from sidecar properties - lists:keyfind(prop2, 1, get(Ref)) + lists:keyfind(n_val, 1, get(Ref)) ), ?assertMatch( {ddl, _}, diff --git a/src/riak_kv_pb_timeseries.erl b/src/riak_kv_pb_timeseries.erl index bda0d29827..23d2b97ae8 100644 --- a/src/riak_kv_pb_timeseries.erl +++ b/src/riak_kv_pb_timeseries.erl @@ -230,12 +230,18 @@ process_stream({ReqId, Error}, ReqId, {reply, #tsqueryresp{} | #rpberrorresp{}, #state{}}. create_table({DDL = ?DDL{table = Table}, WithProps}, State) -> {ok, Props1} = riak_kv_ts_util:apply_timeseries_bucket_props(DDL, WithProps), - Props2 = [riak_kv_wm_utils:erlify_bucket_prop(P) || P <- Props1], - case riak_core_bucket_type:create(Table, Props2) of - ok -> - wait_until_active(Table, State, ?TABLE_ACTIVATE_WAIT); - {error, Reason} -> - {reply, table_create_fail_response(Table, Reason), State} + case catch [riak_kv_wm_utils:erlify_bucket_prop(P) || P <- Props1] of + {bad_bucket_property, BadProp} -> + {reply, table_create_fail_response( + Table, flat_format("Invalid bucket type property: ~ts", [BadProp])), + State}; + Props2 -> + case riak_core_bucket_type:create(Table, Props2) of + ok -> + wait_until_active(Table, State, ?TABLE_ACTIVATE_WAIT); + {error, Reason} -> + {reply, table_create_fail_response(Table, Reason), State} + end end. wait_until_active(Table, State, 0) -> diff --git a/src/riak_kv_wm_utils.erl b/src/riak_kv_wm_utils.erl index b18cbbc471..ca50e3d1a8 100644 --- a/src/riak_kv_wm_utils.erl +++ b/src/riak_kv_wm_utils.erl @@ -398,7 +398,19 @@ erlify_bucket_prop({?JSON_CHASH, {struct, Props}}) -> erlify_bucket_prop({<<"ddl">>, Value}) -> {ddl, Value}; erlify_bucket_prop({Prop, Value}) -> - {list_to_existing_atom(binary_to_list(Prop)), Value}. + {validate_bucket_property(binary_to_list(Prop)), Value}. + +%% this serves to narrow the property name check to a set of +%% known properties, rather than use list_to_existing_atom which +%% is too broad, and also to report a meaningful exception, for +%% callers to deal with and report further. +validate_bucket_property(P) -> + case riak_kv_bucket:is_valid_property(P) of + true -> + list_to_atom(P); + false -> + throw({bad_bucket_property, P}) + end. %% @doc Populates the resource's context/state with the bucket type %% from the path info, if not already set. From 704efa9a70dc8148468ddf16bc74504aa6494c22 Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Wed, 16 Mar 2016 16:13:57 +0200 Subject: [PATCH 059/122] shorten lines, clean up whitespace --- src/riak_kv_qry_compiler.erl | 136 +++++++++++++++++++++++------------ 1 file changed, 92 insertions(+), 44 deletions(-) diff --git a/src/riak_kv_qry_compiler.erl b/src/riak_kv_qry_compiler.erl index 1f0aeff305..17e3d00724 100644 --- a/src/riak_kv_qry_compiler.erl +++ b/src/riak_kv_qry_compiler.erl @@ -332,7 +332,7 @@ compile_select_col_stateless(DDL, {Op, A, B}) -> %% -spec infer_col_type(?DDL{}, riak_ql_ddl:selection(), Errors1::[any()]) -> - {riak_ql_ddl:simple_field_type() | error, Errors2::[any()]}. + {Type::riak_ql_ddl:simple_field_type() | error, Errors2::[any()]}. infer_col_type(_, {Type, _}, Errors) when Type == sint64; Type == varchar; Type == boolean; Type == double -> {Type, Errors}; @@ -379,14 +379,17 @@ pull_from_row(N, Row) -> %% -spec extract_stateful_functions(riak_ql_ddl:selection(), integer()) -> - {riak_ql_ddl:selection() | {return_state, integer()}, [riak_ql_ddl:selection_function()]}. + {riak_ql_ddl:selection() | + {return_state, integer()}, [riak_ql_ddl:selection_function()]}. extract_stateful_functions(Selection1, FinaliserLen) when is_integer(FinaliserLen) -> {Selection2, Fns} = extract_stateful_functions2(Selection1, FinaliserLen, []), {Selection2, lists:reverse(Fns)}. %% extract stateful functions from the selection --spec extract_stateful_functions2(riak_ql_ddl:selection(), integer(), [riak_ql_ddl:selection_function()]) -> - {riak_ql_ddl:selection() | {finalise_aggregation, FnName::atom(), integer()}, [riak_ql_ddl:selection_function()]}. +-spec extract_stateful_functions2(riak_ql_ddl:selection(), integer(), + [riak_ql_ddl:selection_function()]) -> + {riak_ql_ddl:selection() | {finalise_aggregation, FnName::atom(), integer()}, + [riak_ql_ddl:selection_function()]}. extract_stateful_functions2({Op, ArgA1, ArgB1}, FinaliserLen, Fns1) -> {ArgA2, Fns2} = extract_stateful_functions2(ArgA1, FinaliserLen, Fns1), {ArgB2, Fns3} = extract_stateful_functions2(ArgB1, FinaliserLen, Fns2), @@ -602,7 +605,7 @@ check_if_timeseries(?DDL{table = T, partition_key = PK, local_key = LK0} = DDL, LK = LK0#key_v1{ast = lists:sublist(LK0#key_v1.ast, length(PartitionKeyAST))}, QuantumFieldName = quantum_field_name(DDL), StrippedW = strip(W, []), - {StartW, EndW, Filter} = + {StartW, EndW, Filter} = break_out_timeseries(StrippedW, PartitionFields, QuantumFieldName), Mod = riak_ql_ddl:make_module_name(T), StartKey = rewrite(LK, StartW, Mod), @@ -675,15 +678,19 @@ find_timestamp_bounds2(QuantumFieldName, [{or_, {_, QuantumFieldName, _}, _} | _ %% if this is an or state ment, lookahead at what is being tested, the quanta %% cannot be tested with an OR operator error({time_bounds_must_use_and_op, ?E_TIME_BOUNDS_MUST_USE_AND}); -find_timestamp_bounds2(QuantumFieldName, [{Op, QuantumFieldName, _} = Filter | Tail], OtherFilters, BoundsAcc1) -> +find_timestamp_bounds2(QuantumFieldName, [{Op, QuantumFieldName, _} = Filter | Tail], + OtherFilters, BoundsAcc1) -> %% if there are already end bounds throw an error if Op == '>' orelse Op == '>=' -> - find_timestamp_bounds2(QuantumFieldName, Tail, OtherFilters, acc_lower_bounds(Filter, BoundsAcc1)); + find_timestamp_bounds2( + QuantumFieldName, Tail, OtherFilters, acc_lower_bounds(Filter, BoundsAcc1)); Op == '<' orelse Op == '<=' -> - find_timestamp_bounds2(QuantumFieldName, Tail, OtherFilters, acc_upper_bounds(Filter, BoundsAcc1)); + find_timestamp_bounds2( + QuantumFieldName, Tail, OtherFilters, acc_upper_bounds(Filter, BoundsAcc1)); Op == '=' orelse Op == '!=' -> - find_timestamp_bounds2(QuantumFieldName, Tail, [Filter | OtherFilters], BoundsAcc1) + find_timestamp_bounds2( + QuantumFieldName, Tail, [Filter | OtherFilters], BoundsAcc1) end; find_timestamp_bounds2(QuantumFieldName, [Filter | Tail], OtherFilters, BoundsAcc1) -> %% this filter is not on the quantum @@ -1061,7 +1068,9 @@ simple_rewrite_fail_3_test() -> simplest_test() -> DDL = get_standard_ddl(), - Query = "select weather from GeoCheckin where time > 3000 and time < 5000 and user = 'user_1' and location = 'San Francisco'", + Query = + "select weather from GeoCheckin where time > 3000" + " and time < 5000 and user = 'user_1' and location = 'San Francisco'", {ok, Q} = get_query(Query), true = is_query_valid(DDL, Q), [ExpectedWhere] = @@ -1175,7 +1184,11 @@ simple_with_2_field_filter_test() -> ?assertEqual(ExpectedWhere, WhereVal). complex_with_4_field_filter_test() -> - Query = "select weather from GeoCheckin where time > 3000 and time < 5000 and user = 'user_1' and location = 'Scotland' and extra = 1 and (weather = 'yankee' or (temperature = 'yelp' and geohash = 'erko'))", + Query = + "select weather from GeoCheckin where" + " time > 3000 and time < 5000 and user = 'user_1'" + " and location = 'Scotland' and extra = 1" + " and (weather = 'yankee' or (temperature = 'yelp' and geohash = 'erko'))", {ok, Q} = get_query(Query), DDL = get_long_ddl(), true = is_query_valid(DDL, Q), @@ -1233,7 +1246,9 @@ complex_with_boolean_rewrite_filter_test() -> simple_spanning_boundary_test() -> DDL = get_standard_ddl(), {ok, Q} = get_query( - "select weather from GeoCheckin where time >= 3000 and time < 31000 and user = 'user_1' and location = 'Scotland'"), + "select weather from GeoCheckin" + " where time >= 3000 and time < 31000" + " and user = 'user_1' and location = 'Scotland'"), true = is_query_valid(DDL, Q), %% get basic query %% now make the result - expecting 3 queries @@ -1264,7 +1279,10 @@ simple_spanning_boundary_test() -> %% one. boundary_quanta_test() -> DDL = get_standard_ddl(), - Query = "select weather from GeoCheckin where time >= 14000 and time <= 15000 and user = 'user_1' and location = 'Scotland'", + Query = + "select weather from GeoCheckin" + " where time >= 14000 and time <= 15000" + " and user = 'user_1' and location = 'Scotland'", {ok, Q} = get_query(Query), true = is_query_valid(DDL, Q), %% get basic query @@ -1294,7 +1312,10 @@ test_data_where_clause(Family, Series, StartEndTimes) -> %% is exact multiple of quantum size) simple_spanning_boundary_precision_test() -> DDL = get_standard_ddl(), - Query = "select weather from GeoCheckin where time >= 3000 and time < 30000 and user = 'user_1' and location = 'Scotland'", + Query = + "select weather from GeoCheckin" + " where time >= 3000 and time < 30000" + " and user = 'user_1' and location = 'Scotland'", {ok, Q} = get_query(Query), true = is_query_valid(DDL, Q), %% now make the result - expecting 2 queries @@ -1319,7 +1340,10 @@ simple_spanning_boundary_precision_test() -> simplest_compile_once_only_fail_test() -> DDL = get_standard_ddl(), - Query = "select weather from GeoCheckin where time >= 3000 and time < 5000 and user = 'user_1' and location = 'Scotland'", + Query = + "select weather from GeoCheckin where" + " time >= 3000 and time < 5000" + " and user = 'user_1' and location = 'Scotland'", {ok, Q} = get_query(Query), true = is_query_valid(DDL, Q), %% now try and compile twice @@ -1400,7 +1424,8 @@ lower_bound_is_bigger_than_upper_bound_test() -> "WHERE time > 6000 AND time < 5000" "AND user = 'user_1' AND location = 'derby'"), ?assertEqual( - {error, {lower_bound_must_be_less_than_upper_bound, ?E_TSMSG_LOWER_BOUND_MUST_BE_LESS_THAN_UPPER_BOUND}}, + {error, {lower_bound_must_be_less_than_upper_bound, + ?E_TSMSG_LOWER_BOUND_MUST_BE_LESS_THAN_UPPER_BOUND}}, compile(DDL, Q, 5) ). @@ -1411,7 +1436,8 @@ lower_bound_is_same_as_upper_bound_test() -> "WHERE time > 5000 AND time < 5000" "AND user = 'user_1' AND location = 'derby'"), ?assertEqual( - {error, {lower_and_upper_bounds_are_equal_when_no_equals_operator, ?E_TSMSG_LOWER_AND_UPPER_BOUNDS_ARE_EQUAL_WHEN_NO_EQUALS_OPERATOR}}, + {error, {lower_and_upper_bounds_are_equal_when_no_equals_operator, + ?E_TSMSG_LOWER_AND_UPPER_BOUNDS_ARE_EQUAL_WHEN_NO_EQUALS_OPERATOR}}, compile(DDL, Q, 5) ). @@ -1457,7 +1483,8 @@ missing_key_field_in_where_clause_test() -> not_equals_can_only_be_a_filter_test() -> DDL = get_standard_ddl(), - {ok, Q} = get_query("select * from test1 where time > 1 and time < 6 and user = '2' and location != '4'"), + {ok, Q} = get_query("select * from test1 where time > 1" + " and time < 6 and user = '2' and location != '4'"), ?assertEqual( {error, {missing_key_clause, ?E_KEY_PARAM_MUST_USE_EQUALS_OPERATOR("location", '!=')}}, compile(DDL, Q, 5) @@ -1495,9 +1522,10 @@ run_select_all_test() -> run_select_first_test() -> DDL = get_standard_ddl(), - Sel = testing_compile_row_select(DDL, - "SELECT geohash FROM GeoCheckin " - "WHERE time > 1 AND time < 6 AND user = '2' AND location = '4'"), + Sel = testing_compile_row_select( + DDL, + "SELECT geohash FROM GeoCheckin " + "WHERE time > 1 AND time < 6 AND user = '2' AND location = '4'"), #riak_sel_clause_v1{clause = SelectSpec} = Sel, ?assertEqual( [<<"geodude">>], @@ -1506,9 +1534,10 @@ run_select_first_test() -> run_select_last_test() -> DDL = get_standard_ddl(), - Sel = testing_compile_row_select(DDL, - "SELECT temperature FROM GeoCheckin " - "WHERE time > 1 AND time < 6 AND user = '2' AND location = '4'"), + Sel = testing_compile_row_select( + DDL, + "SELECT temperature FROM GeoCheckin " + "WHERE time > 1 AND time < 6 AND user = '2' AND location = '4'"), #riak_sel_clause_v1{clause = SelectSpec} = Sel, ?assertEqual( [12.2], @@ -1517,9 +1546,10 @@ run_select_last_test() -> run_select_all_individually_test() -> DDL = get_standard_ddl(), - Sel = testing_compile_row_select(DDL, - "SELECT geohash, location, user, time, weather, temperature FROM GeoCheckin " - "WHERE time > 1 AND time < 6 AND user = '2' AND location = '4'"), + Sel = testing_compile_row_select( + DDL, + "SELECT geohash, location, user, time, weather, temperature FROM GeoCheckin " + "WHERE time > 1 AND time < 6 AND user = '2' AND location = '4'"), #riak_sel_clause_v1{clause = SelectSpec} = Sel, ?assertEqual( ?ROW, @@ -1528,9 +1558,10 @@ run_select_all_individually_test() -> run_select_some_test() -> DDL = get_standard_ddl(), - Sel = testing_compile_row_select(DDL, - "SELECT location, weather FROM GeoCheckin " - "WHERE time > 1 AND time < 6 AND user = '2' AND location = '4'"), + Sel = testing_compile_row_select( + DDL, + "SELECT location, weather FROM GeoCheckin " + "WHERE time > 1 AND time < 6 AND user = '2' AND location = '4'"), #riak_sel_clause_v1{clause = SelectSpec} = Sel, ?assertEqual( [<<"derby">>, <<"hot">>], @@ -1539,9 +1570,10 @@ run_select_some_test() -> select_count_aggregation_test() -> DDL = get_standard_ddl(), - Sel = testing_compile_row_select(DDL, - "SELECT count(location) FROM GeoCheckin " - "WHERE time > 1 AND time < 6 AND user = '2' AND location = '4'"), + Sel = testing_compile_row_select( + DDL, + "SELECT count(location) FROM GeoCheckin " + "WHERE time > 1 AND time < 6 AND user = '2' AND location = '4'"), #riak_sel_clause_v1{clause = SelectSpec} = Sel, ?assertEqual( [1], @@ -1551,9 +1583,10 @@ select_count_aggregation_test() -> select_count_aggregation_2_test() -> DDL = get_standard_ddl(), - Sel = testing_compile_row_select(DDL, - "SELECT count(location), count(location) FROM GeoCheckin " - "WHERE time > 1 AND time < 6 AND user = '2' AND location = '4'"), + Sel = testing_compile_row_select( + DDL, + "SELECT count(location), count(location) FROM GeoCheckin " + "WHERE time > 1 AND time < 6 AND user = '2' AND location = '4'"), #riak_sel_clause_v1{clause = SelectSpec} = Sel, ?assertEqual( [1, 10], @@ -1627,7 +1660,10 @@ get_sel_ddl() -> basic_select_test() -> DDL = get_sel_ddl(), - SQL = "SELECT location from mytab WHERE myfamily = 'familyX' and myseries = 'seriesX' and time > 1 and time < 2", + SQL = + "SELECT location from mytab" + " WHERE myfamily = 'familyX'" + " and myseries = 'seriesX' and time > 1 and time < 2", {ok, Rec} = get_query(SQL), {ok, Sel} = compile_select_clause(DDL, Rec), ?assertMatch(#riak_sel_clause_v1{calc_type = rows, @@ -1699,7 +1735,10 @@ select_column_and_all_test() -> ). basic_select_window_agg_fn_test() -> - SQL = "SELECT count(location), avg(mydouble), avg(mysint) from mytab WHERE myfamily = 'familyX' and myseries = 'seriesX' and time > 1 and time < 2", + SQL = + "SELECT count(location), avg(mydouble), avg(mysint)" + " from mytab WHERE myfamily = 'familyX'" + " and myseries = 'seriesX' and time > 1 and time < 2", {ok, Rec} = get_query(SQL), {ok, Sel} = compile_select_clause(get_sel_ddl(), Rec), ?assertMatch(#riak_sel_clause_v1{calc_type = aggregate, @@ -1717,7 +1756,10 @@ basic_select_window_agg_fn_test() -> Sel). basic_select_arith_1_test() -> - SQL = "SELECT 1 + 2 - 3 /4 * 5 from mytab WHERE myfamily = 'familyX' and myseries = 'seriesX' and time > 1 and time < 2", + SQL = + "SELECT 1 + 2 - 3 /4 * 5 from mytab" + " WHERE myfamily = 'familyX' and myseries = 'seriesX'" + " and time > 1 and time < 2", {ok, Rec} = get_query(SQL), {ok, Sel} = compile_select_clause(get_sel_ddl(), Rec), ?assertMatch( @@ -1762,7 +1804,10 @@ boolean_false_literal_test() -> ). basic_select_arith_2_test() -> - SQL = "SELECT 1 + 2.0 - 3 /4 * 5 from mytab WHERE myfamily = 'familyX' and myseries = 'seriesX' and time > 1 and time < 2", + SQL = + "SELECT 1 + 2.0 - 3 /4 * 5 from mytab" + " WHERE myfamily = 'familyX' and myseries = 'seriesX'" + " and time > 1 and time < 2", {ok, Rec} = get_query(SQL), {ok, Sel} = compile_select_clause(get_sel_ddl(), Rec), ?assertMatch( @@ -1852,7 +1897,10 @@ extract_stateful_function_1_test() -> CountFn1 = {{window_agg_fn, 'COUNT'}, [{identifier, [<<"col1">>]}]}, CountFn2 = {{window_agg_fn, 'COUNT'}, [{identifier, [<<"col2">>]}]}, ?assertEqual( - {{'+', {finalise_aggregation, 'COUNT', 1}, {finalise_aggregation, 'COUNT', 2}}, [CountFn1,CountFn2]}, + {{'+', + {finalise_aggregation, 'COUNT', 1}, + {finalise_aggregation, 'COUNT', 2}}, + [CountFn1,CountFn2]}, extract_stateful_functions(Select, 0) ). @@ -2076,8 +2124,8 @@ no_quantum_in_query_1_test() -> {ok, Q} = get_query( "SELECT * FROM tab1 WHERE a = 1 AND b = 1"), ?assertMatch( - {ok, [#riak_select_v1{ - 'WHERE' = + {ok, [#riak_select_v1{ + 'WHERE' = [{startkey,[{<<"a">>,timestamp,1},{<<"b">>,varchar,1}]}, {endkey, [{<<"a">>,timestamp,1},{<<"b">>,varchar,1}]}, {filter,[]}, @@ -2097,7 +2145,7 @@ no_quantum_in_query_2_test() -> {ok, Q} = get_query( "SELECT * FROM tabab WHERE a = 1000 AND b = 'bval' AND c = 3.5"), {ok, [Select]} = compile(DDL, Q, 100), - Key = + Key = [{<<"c">>,double,3.5}, {<<"a">>,sint64,1000},{<<"b">>,varchar,<<"bval">>}], ?assertEqual( [{startkey, Key}, From 01131de29f8b12432a0da4a12c424553ff1c305f Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Thu, 17 Mar 2016 10:19:49 +0100 Subject: [PATCH 060/122] Fix: timeseries_query with correct SQL for non-existing table now returns 404 instead of crashing. --- src/riak_kv_wm_timeseries_query.erl | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/riak_kv_wm_timeseries_query.erl b/src/riak_kv_wm_timeseries_query.erl index 6f2584966b..8f54a9add6 100644 --- a/src/riak_kv_wm_timeseries_query.erl +++ b/src/riak_kv_wm_timeseries_query.erl @@ -39,6 +39,7 @@ malformed_request/2, forbidden/2, allowed_methods/2, + resource_exists/2, post_is_create/2, process_post/2, content_types_accepted/2, @@ -168,6 +169,18 @@ content_types_accepted(RD, Ctx) -> {[], RD, Ctx}. +-spec resource_exists(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). +resource_exists(RD, #ctx{sql_type=ddl}=Ctx) -> + {true, RD, Ctx}; +resource_exists(RD, #ctx{sql_type=Type, + mod=Mod}=Ctx) when Type == describe; + Type == select -> + Res = riak_kv_wm_ts_util:table_module_exists(Mod), + {Res, RD, Ctx}; +resource_exists(RD, Ctx) -> + lager:log(info, self(), "resource_exists default case Ctx=~p", [Ctx]), + {false, RD, Ctx}. + -spec post_is_create(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). post_is_create(RD, Ctx) -> {false, RD, Ctx}. From c548a2d6daed25d3f546330bfcd027644f897ee1 Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Thu, 17 Mar 2016 15:25:53 +0200 Subject: [PATCH 061/122] better diagnostics in riak_kv_wm_utils:erlify_bucket_prop * throw meaningful exceptions instead of badarg (on list_to_existing_atom failing when extracting Mod:Fun for linkfun and chash_fun props) or badmatch (on bad bkey for jsanon); * allow free-form custom properties (log a message when one is created). --- src/riak_kv_bucket.erl | 6 ++--- src/riak_kv_console.erl | 10 ++++++-- src/riak_kv_pb_timeseries.erl | 15 ++++++++++-- src/riak_kv_wm_utils.erl | 43 +++++++++++++++++++++-------------- 4 files changed, 50 insertions(+), 24 deletions(-) diff --git a/src/riak_kv_bucket.erl b/src/riak_kv_bucket.erl index be932f04e3..3f9b1bcde0 100644 --- a/src/riak_kv_bucket.erl +++ b/src/riak_kv_bucket.erl @@ -44,7 +44,7 @@ -export_type([props/0]). --define(VALID_PROPERTIES, +-define(COMMON_BUCKET_PROPERTIES, ["allow_mult", "basic_quorum", "big_vclock", @@ -73,10 +73,10 @@ ]). -spec is_valid_property(string() | binary()) -> boolean(). -%% @doc Checks whether a given binary or string is a valid bucket type +%% @doc Checks whether a given binary or string is a common bucket type %% property. is_valid_property(P) when is_list(P) -> - lists:member(P, ?VALID_PROPERTIES); + lists:member(P, ?COMMON_BUCKET_PROPERTIES); is_valid_property(P) when is_binary(P) -> is_valid_property(binary_to_list(P)). diff --git a/src/riak_kv_console.erl b/src/riak_kv_console.erl index fdc08612c5..647b740c2a 100644 --- a/src/riak_kv_console.erl +++ b/src/riak_kv_console.erl @@ -523,8 +523,14 @@ bucket_type_create(CreateTypeFn, Type, {struct, Fields}) -> case catch riak_kv_ts_util:maybe_parse_table_def(Type, Props1) of {ok, Props2} -> case catch [riak_kv_wm_utils:erlify_bucket_prop(P) || P <- Props2] of - {bad_bucket_property, BadProp} -> - io:format("Invalid bucket type property: ~ts\n", [BadProp]), + {bad_linkfun_modfun, {M, F}} -> + io:format("Invalid link mod or fun in bucket type properties: ~p:~p\n", [M, F]), + error; + {bad_linkfun_bkey, {B, K}} -> + io:format("Malformed bucket/key for anon link fun in bucket type properties: ~p/~p\n", [B, K]), + error; + {bad_chash_keyfun, {M, F}} -> + io:format("Invalid chash mod or fun in bucket type properties: ~p:~p\n", [M, F]), error; Props3 -> CreateTypeFn(Props3) diff --git a/src/riak_kv_pb_timeseries.erl b/src/riak_kv_pb_timeseries.erl index 23d2b97ae8..e6b81ca708 100644 --- a/src/riak_kv_pb_timeseries.erl +++ b/src/riak_kv_pb_timeseries.erl @@ -231,9 +231,20 @@ process_stream({ReqId, Error}, ReqId, create_table({DDL = ?DDL{table = Table}, WithProps}, State) -> {ok, Props1} = riak_kv_ts_util:apply_timeseries_bucket_props(DDL, WithProps), case catch [riak_kv_wm_utils:erlify_bucket_prop(P) || P <- Props1] of - {bad_bucket_property, BadProp} -> + {bad_linkfun_modfun, {M, F}} -> {reply, table_create_fail_response( - Table, flat_format("Invalid bucket type property: ~ts", [BadProp])), + Table, flat_format( + "Invalid link mod or fun in bucket type properties: ~p:~p\n", [M, F])), + State}; + {bad_linkfun_bkey, {B, K}} -> + {reply, table_create_fail_response( + Table, flat_format( + "Malformed bucket/key for anon link fun in bucket type properties: ~p/~p\n", [B, K])), + State}; + {bad_chash_keyfun, {M, F}} -> + {reply, table_create_fail_response( + Table, flat_format( + "Invalid chash mod or fun in bucket type properties: ~p:~p\n", [M, F])), State}; Props2 -> case riak_core_bucket_type:create(Table, Props2) of diff --git a/src/riak_kv_wm_utils.erl b/src/riak_kv_wm_utils.erl index ca50e3d1a8..7316c695f8 100644 --- a/src/riak_kv_wm_utils.erl +++ b/src/riak_kv_wm_utils.erl @@ -368,9 +368,14 @@ erlify_bucket_prop({?JSON_LINKFUN, {struct, Props}}) -> case {proplists:get_value(?JSON_MOD, Props), proplists:get_value(?JSON_FUN, Props)} of {Mod, Fun} when is_binary(Mod), is_binary(Fun) -> - {linkfun, {modfun, - list_to_existing_atom(binary_to_list(Mod)), - list_to_existing_atom(binary_to_list(Fun))}}; + try + {linkfun, {modfun, + list_to_existing_atom(binary_to_list(Mod)), + list_to_existing_atom(binary_to_list(Fun))}} + catch + error:badarg -> + throw({bad_linkfun_modfun, {Mod, Fun}}) + end; {undefined, undefined} -> case proplists:get_value(?JSON_JSFUN, Props) of Name when is_binary(Name) -> @@ -380,36 +385,40 @@ erlify_bucket_prop({?JSON_LINKFUN, {struct, Props}}) -> {struct, Bkey} -> Bucket = proplists:get_value(?JSON_JSBUCKET, Bkey), Key = proplists:get_value(?JSON_JSKEY, Bkey), - %% bomb if malformed - true = is_binary(Bucket) andalso is_binary(Key), - {linkfun, {jsanon, {Bucket, Key}}}; + if is_binary(Bucket) andalso is_binary(Key) -> + {linkfun, {jsanon, {Bucket, Key}}}; + el/=se -> + throw({bad_linkfun_bkey, {Bucket, Key}}) + end; Source when is_binary(Source) -> {linkfun, {jsanon, Source}} end end end; erlify_bucket_prop({?JSON_CHASH, {struct, Props}}) -> - {chash_keyfun, {list_to_existing_atom( - binary_to_list( - proplists:get_value(?JSON_MOD, Props))), - list_to_existing_atom( - binary_to_list( - proplists:get_value(?JSON_FUN, Props)))}}; + Mod = proplists:get_value(?JSON_MOD, Props), + Fun = proplists:get_value(?JSON_FUN, Props), + try + {chash_keyfun, {list_to_existing_atom( + binary_to_list(Mod)), + list_to_existing_atom( + binary_to_list(Fun))}} + catch + error:badarg -> + throw({bad_chash_keyfun, {Mod, Fun}}) + end; erlify_bucket_prop({<<"ddl">>, Value}) -> {ddl, Value}; erlify_bucket_prop({Prop, Value}) -> {validate_bucket_property(binary_to_list(Prop)), Value}. -%% this serves to narrow the property name check to a set of -%% known properties, rather than use list_to_existing_atom which -%% is too broad, and also to report a meaningful exception, for -%% callers to deal with and report further. validate_bucket_property(P) -> case riak_kv_bucket:is_valid_property(P) of true -> list_to_atom(P); false -> - throw({bad_bucket_property, P}) + lager:info("setting custom bucket property: ~s", [P]), + list_to_atom(P) end. %% @doc Populates the resource's context/state with the bucket type From c78a5f015889667d2a77feff064cab1b1003b46d Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Thu, 17 Mar 2016 16:00:07 +0200 Subject: [PATCH 062/122] catch exceptions from erlify_bucket_prop in riak_kv_wm_bucket_type else, a malformed property in json would fly to webmachine --- src/riak_kv_wm_bucket_type.erl | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/src/riak_kv_wm_bucket_type.erl b/src/riak_kv_wm_bucket_type.erl index 6c09ca2cb5..c15648ad1a 100644 --- a/src/riak_kv_wm_bucket_type.erl +++ b/src/riak_kv_wm_bucket_type.erl @@ -261,12 +261,25 @@ produce_bucket_type_body(RD, Ctx) -> %% @doc Modify the bucket properties according to the body of the %% bucket-level PUT request. accept_bucket_type_body(RD, Ctx=#ctx{bucket_type=T, bucketprops=Props}) -> - ErlProps = lists:map(fun riak_kv_wm_utils:erlify_bucket_prop/1, Props), - case riak_core_bucket_type:update(T, ErlProps) of - ok -> - {true, RD, Ctx}; - {error, Details} -> - JSON = mochijson2:encode(Details), - RD2 = wrq:append_to_resp_body(JSON, RD), - {{halt, 400}, RD2, Ctx} + try lists:map(fun riak_kv_wm_utils:erlify_bucket_prop/1, Props) of + ErlProps -> + case riak_core_bucket_type:update(T, ErlProps) of + ok -> + {true, RD, Ctx}; + {error, Details} -> + JSON = mochijson2:encode(Details), + RD2 = wrq:append_to_resp_body(JSON, RD), + {{halt, 400}, RD2, Ctx} + end + catch + throw:Details -> + error_out({halt, 400}, "Bad bucket type properties: ~p", [Details], RD, Ctx) end. + +error_out(Type, Fmt, Args, RD, Ctx) -> + {Type, + wrq:set_resp_header( + "Content-Type", "text/plain", + wrq:append_to_response_body( + lists:flatten(io_lib:format(Fmt, Args)), RD)), + Ctx}. From 3b66451582875b2b8bbf82effebfa570a5b3a1e9 Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Thu, 17 Mar 2016 16:07:23 +0200 Subject: [PATCH 063/122] error message tweak --- src/riak_kv_ts_util.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/riak_kv_ts_util.erl b/src/riak_kv_ts_util.erl index b154f411ba..5e1a974ca5 100644 --- a/src/riak_kv_ts_util.erl +++ b/src/riak_kv_ts_util.erl @@ -219,7 +219,7 @@ assert_type_and_table_name_same(BucketType1, ?DDL{table = BucketType2}) -> throw({error, {table_name, flat_format( - "Time series bucket type and table name mismatch (~s != ~s)", + "Time series bucket type and table name do not match (~s != ~s)", [BucketType1, BucketType2])}}). %% Attempt to compile the DDL but don't do anything with the output, this is From fd6d7c51c83852f4bc59255b25fde5f23c5b7bce Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Fri, 18 Mar 2016 00:39:26 +0200 Subject: [PATCH 064/122] don't keep a registry of 'common properties' Just log an informational message when attempt is made to add a property which did not exist as an atom. --- src/riak_kv_bucket.erl | 38 -------------------------------------- src/riak_kv_wm_utils.erl | 11 ++++++----- 2 files changed, 6 insertions(+), 43 deletions(-) diff --git a/src/riak_kv_bucket.erl b/src/riak_kv_bucket.erl index 3f9b1bcde0..787a756174 100644 --- a/src/riak_kv_bucket.erl +++ b/src/riak_kv_bucket.erl @@ -24,7 +24,6 @@ -module(riak_kv_bucket). -export([validate/4]). --export([is_valid_property/1]). -include("riak_kv_types.hrl"). @@ -44,43 +43,6 @@ -export_type([props/0]). --define(COMMON_BUCKET_PROPERTIES, - ["allow_mult", - "basic_quorum", - "big_vclock", - "bucket_type", - "chash_keyfun", - "dvv_enabled", - "dw", - "last_write_wins", - "linkfun", - "n_val", - "notfound_ok", - "old_vclock", - "postcommit", - "pr", - "precommit", - "pw", - "r", - "rw", - "small_vclock", - "w", - "write_once", - "young_vclock", - %% TS-specific ones: - "ddl", - "table_def" - ]). - --spec is_valid_property(string() | binary()) -> boolean(). -%% @doc Checks whether a given binary or string is a common bucket type -%% property. -is_valid_property(P) when is_list(P) -> - lists:member(P, ?COMMON_BUCKET_PROPERTIES); -is_valid_property(P) when is_binary(P) -> - is_valid_property(binary_to_list(P)). - - %% @doc called by riak_core in a few places to ensure bucket %% properties are sane. The arguments combinations have the following %% meanings:- diff --git a/src/riak_kv_wm_utils.erl b/src/riak_kv_wm_utils.erl index 7316c695f8..fbfd750cfc 100644 --- a/src/riak_kv_wm_utils.erl +++ b/src/riak_kv_wm_utils.erl @@ -413,11 +413,12 @@ erlify_bucket_prop({Prop, Value}) -> {validate_bucket_property(binary_to_list(Prop)), Value}. validate_bucket_property(P) -> - case riak_kv_bucket:is_valid_property(P) of - true -> - list_to_atom(P); - false -> - lager:info("setting custom bucket property: ~s", [P]), + %% there's no validation; free-form properties are all allowed + try + list_to_existing_atom(P) + catch + error:badarg -> + lager:info("Setting new custom bucket type property '~s'", [P]), list_to_atom(P) end. From 938bb72b25d4bc5121395de0e0c362af001bf512 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Fri, 18 Mar 2016 08:35:20 +0100 Subject: [PATCH 065/122] timeseries_query: remove prefix, riak from #ctx. Use init:get_status() to judge availability. --- src/riak_kv_wm_timeseries_query.erl | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/src/riak_kv_wm_timeseries_query.erl b/src/riak_kv_wm_timeseries_query.erl index 8f54a9add6..8e03b0e2fa 100644 --- a/src/riak_kv_wm_timeseries_query.erl +++ b/src/riak_kv_wm_timeseries_query.erl @@ -59,10 +59,8 @@ table :: 'undefined' | binary(), mod :: 'undefined' | module(), method :: atom(), - prefix, %% string() - prefix for resource uris timeout, %% integer() - passed-in timeout value in ms security, %% security context - riak, %% local | {node(), atom()} - params for riak client sql_type, compiled_query :: undefined | #ddl_v1{} | #riak_sql_describe_v1{} | #riak_select_v1{}, result :: undefined | ok | {Headers::[binary()], Rows::[ts_rec()]} | @@ -78,24 +76,19 @@ -spec init(proplists:proplist()) -> {ok, #ctx{}}. -%% @doc Initialize this resource. This function extracts the -%% 'prefix' and 'riak' properties from the dispatch args. -init(Props) -> - {ok, #ctx{prefix = proplists:get_value(prefix, Props), - riak = proplists:get_value(riak, Props)}}. +init(_Props) -> + {ok, #ctx{}}. -spec service_available(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). %% @doc Determine whether or not a connection to Riak %% can be established. -service_available(RD, Ctx = #ctx{riak = RiakProps}) -> - checkpoint("service_available: RD=~p", [RD]), - case riak_kv_wm_utils:get_riak_client( - RiakProps, riak_kv_wm_utils:get_client_id(RD)) of - {ok, _C} -> +service_available(RD, Ctx) -> + case init:get_status() of + {started, _} -> {true, RD, Ctx}; - {error, Reason} -> + Status -> Resp = riak_kv_wm_ts_util:set_error_message("Unable to connect to Riak: ~p", - [Reason], RD), + [Status], RD), {false, Resp, Ctx} end. From 0a8c75daf1a92a8b24237bbe776c262e62c38684 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Fri, 18 Mar 2016 08:37:10 +0100 Subject: [PATCH 066/122] timeseries_query: only POST allowed. --- src/riak_kv_wm_timeseries_query.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/riak_kv_wm_timeseries_query.erl b/src/riak_kv_wm_timeseries_query.erl index 8e03b0e2fa..1a413ac59a 100644 --- a/src/riak_kv_wm_timeseries_query.erl +++ b/src/riak_kv_wm_timeseries_query.erl @@ -94,7 +94,7 @@ service_available(RD, Ctx) -> -spec allowed_methods(#wm_reqdata{}, #ctx{}) -> cb_rv_spec([atom()]). allowed_methods(RD, Ctx) -> - {['GET', 'POST'], RD, Ctx}. + {['POST'], RD, Ctx}. -spec malformed_request(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). malformed_request(RD, Ctx) -> From 0e9c10f9b6cac63ea960d8b4e10e2abddce530ba Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Fri, 18 Mar 2016 08:38:00 +0100 Subject: [PATCH 067/122] timeseries_query: removed debugging with checkpoint/1 --- src/riak_kv_wm_timeseries_query.erl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/riak_kv_wm_timeseries_query.erl b/src/riak_kv_wm_timeseries_query.erl index 1a413ac59a..0a8c21df43 100644 --- a/src/riak_kv_wm_timeseries_query.erl +++ b/src/riak_kv_wm_timeseries_query.erl @@ -100,7 +100,6 @@ allowed_methods(RD, Ctx) -> malformed_request(RD, Ctx) -> try {SqlType, SQL} = query_from_request(RD), - checkpoint("malformed_request SqlType=~p, SQL=~p", [SqlType, SQL]), Table = table_from_sql(SQL), Mod = riak_ql_ddl:make_module_name(Table), {false, RD, Ctx#ctx{sql_type=SqlType, @@ -315,6 +314,3 @@ to_json({Columns, Rows}) when is_list(Columns), is_list(Rows) -> {<<"rows">>, Rows}]}); to_json(Other) -> mochijson2:encode(Other). - -checkpoint(Format, Args) -> - lager:log(info, self(), Format, Args). From cf63b409ec4c356bae47f0226a0c6950577e0f59 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Fri, 18 Mar 2016 09:03:06 +0100 Subject: [PATCH 068/122] timeseries_query: unsupported SQL type now leads to a 503 --- src/riak_kv_wm_timeseries_query.erl | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/riak_kv_wm_timeseries_query.erl b/src/riak_kv_wm_timeseries_query.erl index 0a8c21df43..8f243e9534 100644 --- a/src/riak_kv_wm_timeseries_query.erl +++ b/src/riak_kv_wm_timeseries_query.erl @@ -108,15 +108,18 @@ malformed_request(RD, Ctx) -> mod=Mod}} catch throw:{query, Reason} -> - lager:log(info, self(), "try in malformed_request backfired: ~p", [Reason]), Response = riak_kv_wm_ts_util:set_error_message("bad query: ~p", [Reason], RD), - {true, Response, Ctx} + {true, Response, Ctx}; + throw:{unsupported_sql_type, Type} -> + Response = riak_kv_wm_ts_util:set_error_message( + "The ~p query type is not supported over the HTTP API yet", + [Type], RD), + {{halt, 503}, Response, Ctx} end. -spec is_authorized(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()|string()|halt()). is_authorized(RD, #ctx{sql_type=SqlType, table=Table}=Ctx) -> Call = call_from_sql_type(SqlType), - lager:log(info, self(), "is_authorized type:~p", [SqlType]), case riak_kv_wm_ts_util:authorize(Call, Table, RD) of ok -> {true, RD, Ctx}; @@ -170,7 +173,6 @@ resource_exists(RD, #ctx{sql_type=Type, Res = riak_kv_wm_ts_util:table_module_exists(Mod), {Res, RD, Ctx}; resource_exists(RD, Ctx) -> - lager:log(info, self(), "resource_exists default case Ctx=~p", [Ctx]), {false, RD, Ctx}. -spec post_is_create(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). @@ -238,7 +240,6 @@ process_post(RD, #ctx{sql_type=select, %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% query_from_request(RD) -> QueryStr = query_string_from_request(RD), - lager:log(info, self(), "query_from_request: ~p", [QueryStr]), compile_query(QueryStr). query_string_from_request(RD) -> @@ -260,7 +261,9 @@ compile_query(QueryStr) -> {Type, Compiled} when Type==select; Type==describe -> {ok, SQL} = riak_kv_ts_util:build_sql_record( Type, Compiled, undefined), - {Type, SQL} + {Type, SQL}; + {UnsupportedType, _ } -> + throw({unsupported_sql_type, UnsupportedType}) end. %% @todo: should really be in riak_ql somewhere From 3da340d567050f8497443418a3d26b0e71caaacb Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Fri, 18 Mar 2016 10:12:40 +0100 Subject: [PATCH 069/122] timeseries_query: query string now in body as text/plain --- src/riak_kv_wm_timeseries_query.erl | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/riak_kv_wm_timeseries_query.erl b/src/riak_kv_wm_timeseries_query.erl index 8f243e9534..bdd4c6a992 100644 --- a/src/riak_kv_wm_timeseries_query.erl +++ b/src/riak_kv_wm_timeseries_query.erl @@ -141,10 +141,6 @@ forbidden(RD, Ctx) -> {false, RD, Ctx} end. - - - - -spec content_types_provided(#wm_reqdata{}, #ctx{}) -> cb_rv_spec([{ContentType::string(), Producer::atom()}]). content_types_provided(RD, Ctx) -> @@ -160,8 +156,7 @@ encodings_provided(RD, Ctx) -> -spec content_types_accepted(#wm_reqdata{}, #ctx{}) -> cb_rv_spec([{ContentType::string(), Acceptor::atom()}]). content_types_accepted(RD, Ctx) -> -%% @todo: if we end up without a body in the request this function should be deleted. - {[], RD, Ctx}. + {["text/plain"], RD, Ctx}. -spec resource_exists(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). @@ -243,11 +238,11 @@ query_from_request(RD) -> compile_query(QueryStr). query_string_from_request(RD) -> - case wrq:get_qs_value("query", RD) of + case wrq:req_body(RD) of undefined -> - throw({query, "no query key in query string"}); + throw({query, "no query in body"}); Str -> - Str + binary_to_list(Str) end. compile_query(QueryStr) -> From cb706e94c1e2f2c774f10629b080f131dd7d245a Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Fri, 18 Mar 2016 11:10:53 +0100 Subject: [PATCH 070/122] timeseries_query: attempting to create an existing table now return a 409 --- src/riak_kv_wm_timeseries_query.erl | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/riak_kv_wm_timeseries_query.erl b/src/riak_kv_wm_timeseries_query.erl index bdd4c6a992..57be252500 100644 --- a/src/riak_kv_wm_timeseries_query.erl +++ b/src/riak_kv_wm_timeseries_query.erl @@ -159,9 +159,18 @@ content_types_accepted(RD, Ctx) -> {["text/plain"], RD, Ctx}. --spec resource_exists(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). -resource_exists(RD, #ctx{sql_type=ddl}=Ctx) -> - {true, RD, Ctx}; +-spec resource_exists(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()|halt()). +resource_exists(RD, #ctx{sql_type=ddl, + mod=Mod, + table=Table}=Ctx) -> + case riak_kv_wm_ts_util:table_module_exists(Mod) of + false -> + {true, RD, Ctx}; + true -> + Resp = riak_kv_wm_ts_util:set_error_message("table ~p already exists", + [Table], RD), + {{halt, 409}, Resp, Ctx} + end; resource_exists(RD, #ctx{sql_type=Type, mod=Mod}=Ctx) when Type == describe; Type == select -> From f21214f53492f97b10e9d9a7b61c89996a2c8721 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Fri, 18 Mar 2016 16:15:37 +0100 Subject: [PATCH 071/122] wm_timeseries: consistent throw structure. --- src/riak_kv_wm_timeseries.erl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index f08ea38161..217d13bec4 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -140,8 +140,8 @@ malformed_request(RD, Ctx) -> Ctx2 = extract_params(wrq:req_qs(RD), Ctx), malformed_request(wrq:path_tokens(RD), RD, Ctx2) catch - throw:ParameterError -> - Resp = riak_kv_wm_ts_util:set_error_message("parameter error: ~p", [ParameterError], RD), + throw:{parameter_error, Error} -> + Resp = riak_kv_wm_ts_util:set_error_message("parameter error: ~p", [Error], RD), {true, Resp, Ctx} end. @@ -303,10 +303,12 @@ extract_params([{"timeout", TimeoutStr}], Ctx) -> options = [{timeout, Timeout}]} catch _:_ -> - throw(riak_kv_wm_ts_util:flat_format("timeout not an integer value: ~s", [TimeoutStr])) + Reason = io_lib:format("timeout not an integer value: ~s", [TimeoutStr]), + throw({parameter_error, Reason}) end; extract_params(Params, _Ctx) -> - throw(riak_kv_wm_ts_util:flat_format("incorrect paramters: ~p", [Params])). + Reason = io_lib:format("incorrect paramters: ~p", [Params]), + throw({parameter_error, Reason}). validate_key(Path, Mod) -> UnquotedPath = lists:map(fun mochiweb_util:unquote/1, Path), From 2abaae0cb60ed5c966114ad475e35539ecb18ca6 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Fri, 18 Mar 2016 16:16:40 +0100 Subject: [PATCH 072/122] WIP wm_timeseries_query: create and describe return json body. --- src/riak_kv_wm_timeseries_query.erl | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/riak_kv_wm_timeseries_query.erl b/src/riak_kv_wm_timeseries_query.erl index 57be252500..c9d43e043c 100644 --- a/src/riak_kv_wm_timeseries_query.erl +++ b/src/riak_kv_wm_timeseries_query.erl @@ -108,7 +108,7 @@ malformed_request(RD, Ctx) -> mod=Mod}} catch throw:{query, Reason} -> - Response = riak_kv_wm_ts_util:set_error_message("bad query: ~p", [Reason], RD), + Response = riak_kv_wm_ts_util:set_error_message("bad query: ~s", [Reason], RD), {true, Response, Ctx}; throw:{unsupported_sql_type, Type} -> Response = riak_kv_wm_ts_util:set_error_message( @@ -172,12 +172,21 @@ resource_exists(RD, #ctx{sql_type=ddl, {{halt, 409}, Resp, Ctx} end; resource_exists(RD, #ctx{sql_type=Type, - mod=Mod}=Ctx) when Type == describe; + mod=Mod, + table=Table}=Ctx) when Type == describe; Type == select -> - Res = riak_kv_wm_ts_util:table_module_exists(Mod), - {Res, RD, Ctx}; + case riak_kv_wm_ts_util:table_module_exists(Mod) of + true -> + {true, RD, Ctx}; + false -> + Resp = riak_kv_wm_ts_util:set_error_message("table ~p does not exist", + [Table], RD), + {false, Resp, Ctx} + end; resource_exists(RD, Ctx) -> - {false, RD, Ctx}. + Resp = riak_kv_wm_ts_util:set_error_message("no such resource ~p", + wrq:path(RD), RD), + {false, Resp, Ctx}. -spec post_is_create(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). post_is_create(RD, Ctx) -> @@ -321,3 +330,6 @@ to_json({Columns, Rows}) when is_list(Columns), is_list(Rows) -> {<<"rows">>, Rows}]}); to_json(Other) -> mochijson2:encode(Other). + +%% log(Format, Args) -> +%% lager:log(info, self(), Format, Args). From 9349d61a5e2318f35f7592775fc618317e7253c7 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Fri, 18 Mar 2016 16:17:40 +0100 Subject: [PATCH 073/122] wm_timeseries_util: set_error_message now produces a JSON body. --- src/riak_kv_wm_ts_util.erl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/riak_kv_wm_ts_util.erl b/src/riak_kv_wm_ts_util.erl index c11615e026..67b13f6e95 100644 --- a/src/riak_kv_wm_ts_util.erl +++ b/src/riak_kv_wm_ts_util.erl @@ -24,7 +24,6 @@ -export([table_from_request/1]). -export([utf8_to_binary/1]). --export([flat_format/2]). -export([set_text_resp_header/2]). -export([set_error_message/3]). -export([set_json_response/2]). @@ -61,7 +60,9 @@ set_text_resp_header(IoList, RD) -> "Content-Type", "text/plain", wrq:append_to_response_body(IoList,RD)). set_error_message(Format, Args, RD) -> - set_text_resp_header(flat_format(Format, Args), RD). + Str = flat_format(Format, Args), + Json = mochijson2:encode([{error, list_to_binary(Str)}]), + set_json_response(Json, RD). set_json_response(Json, RD) -> wrq:set_resp_header("Content-Type", "application/json", @@ -118,4 +119,4 @@ local_key_fields_and_types(Mod) -> LK = local_key(Mod), Types = [Mod:get_field_type([F]) || F <- LK ], LKStr = [ binary_to_list(F) || F <- LK ], - lists:zip(LKStr, Types). \ No newline at end of file + lists:zip(LKStr, Types). From 1fade7b95a0e24439c211b0b12bcebd2ceb335df Mon Sep 17 00:00:00 2001 From: "John R. Daily" Date: Fri, 18 Mar 2016 17:02:35 -0400 Subject: [PATCH 074/122] Stop wasting time with AAE and YZ indexing --- src/riak_kv_vnode.erl | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index 88f8144478..dab83ad597 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -842,19 +842,20 @@ handle_command({get_index_entries, Opts}, {reply, ignore, State} end; -%% For now, ignore async_put +%% For now, ignore async_put. This is currently TS-only, and TS +%% supports neither AAE nor YZ. handle_command(?KV_W1C_BATCH_PUT_REQ{objs=Objs, type=Type}, From, State=#state{mod=Mod, idx=Idx, modstate=ModState}) -> StartTS = os:timestamp(), Context = {w1c_batch_put, From, Type, Objs, StartTS}, case Mod:batch_put(Context, Objs, [], ModState) of {ok, UpModState} -> - lists:foreach( - fun({{Bucket, Key}, EncodedVal}) -> - update_hashtree(Bucket, Key, EncodedVal, State), - ?INDEX_BIN(Bucket, Key, EncodedVal, put, Idx) - end, - Objs), + %% When we support AAE, be sure to call a batch version of + %% `update_hashtree' instead of iterating over each + %% element of Objs. + %% + %% riak_kv_index_hashtree:insert/async_insert can + %% take a list {reply, ?KV_W1C_BATCH_PUT_REPLY{reply=ok, type=Type}, State#state{modstate=UpModState}}; {error, Reason, UpModState} -> {reply, ?KV_W1C_BATCH_PUT_REPLY{reply={error, Reason}, type=Type}, State#state{modstate=UpModState}} From c3f5c3e575894f6d90d50f61175a087010d154ac Mon Sep 17 00:00:00 2001 From: "John R. Daily" Date: Fri, 18 Mar 2016 17:28:24 -0400 Subject: [PATCH 075/122] Drop no-longer-needed pattern binding --- src/riak_kv_vnode.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/riak_kv_vnode.erl b/src/riak_kv_vnode.erl index dab83ad597..31b5b53a87 100644 --- a/src/riak_kv_vnode.erl +++ b/src/riak_kv_vnode.erl @@ -845,7 +845,7 @@ handle_command({get_index_entries, Opts}, %% For now, ignore async_put. This is currently TS-only, and TS %% supports neither AAE nor YZ. handle_command(?KV_W1C_BATCH_PUT_REQ{objs=Objs, type=Type}, - From, State=#state{mod=Mod, idx=Idx, modstate=ModState}) -> + From, State=#state{mod=Mod, modstate=ModState}) -> StartTS = os:timestamp(), Context = {w1c_batch_put, From, Type, Objs, StartTS}, case Mod:batch_put(Context, Objs, [], ModState) of From 42bfad3d15f34eaeff6a01f6dacc13380662d998 Mon Sep 17 00:00:00 2001 From: "John R. Daily" Date: Fri, 18 Mar 2016 17:35:01 -0400 Subject: [PATCH 076/122] QL types are now handled differently on develop --- include/riak_kv_ts.hrl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/riak_kv_ts.hrl b/include/riak_kv_ts.hrl index 3b3c9816ee..38e87ba670 100644 --- a/include/riak_kv_ts.hrl +++ b/include/riak_kv_ts.hrl @@ -34,7 +34,7 @@ { calc_type = rows :: select_result_type(), initial_state = [] :: [any()], - col_return_types = [] :: [field_type()], + col_return_types = [] :: [riak_ql_ddl:field_type()], col_names = [] :: [binary()], clause = [] :: [riak_kv_qry_compiler:compiled_select()], finalisers = [] :: [skip | function()] @@ -44,9 +44,9 @@ { 'SELECT' :: #riak_sel_clause_v1{}, 'FROM' = <<>> :: binary() | {list, [binary()]} | {regex, list()}, - 'WHERE' = [] :: [filter()], - 'ORDER BY' = [] :: [sorter()], - 'LIMIT' = [] :: [limit()], + 'WHERE' = [] :: [riak_ql_ddl:filter()], + 'ORDER BY' = [] :: [riak_ql_ddl:sorter()], + 'LIMIT' = [] :: [riak_ql_ddl:limit()], helper_mod :: atom(), %% will include groups when we get that far partition_key = none :: none | #key_v1{}, From 47a8bd8dbda8ad224403bd9f2da9969d5bfcbcbd Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Fri, 18 Mar 2016 22:51:45 +0100 Subject: [PATCH 077/122] wm_timeseries: conversion to returning JSON bodies. --- src/riak_kv_wm_timeseries.erl | 23 ++++++++++------- src/riak_kv_wm_timeseries_listkeys.erl | 34 +++++++++++++++----------- 2 files changed, 34 insertions(+), 23 deletions(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 217d13bec4..5ae1d02d0d 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -202,10 +202,11 @@ resource_exists(Path, 'GET', RD, {{halt, 500}, InternalResp, Ctx} end catch - _:Reason -> - Resp = riak_kv_wm_ts_util:set_error_message("lookup on ~p failed due to ~p", - [Path, Reason], - RD), + throw:{path_error, Reason} -> + Resp = riak_kv_wm_ts_util:set_error_message( + "lookup on ~p failed due to ~p", + [Path, Reason], + RD), {false, Resp, Ctx} end; resource_exists(Path, 'DELETE', RD, #ctx{mod=Mod}=Ctx) -> @@ -216,8 +217,8 @@ resource_exists(Path, 'DELETE', RD, #ctx{mod=Mod}=Ctx) -> Key = validate_key(Path, Mod), {true, RD, Ctx#ctx{key=Key}} catch - _:Reason -> - Resp = riak_kv_wm_ts_util:set_error_message("lookup on ~p failed due to ~p", + throw:{path_error, Reason} -> + Resp = riak_kv_wm_ts_util:set_error_message("deletion of ~p failed due to ~p", [Path, Reason], RD), {false, Resp, Ctx} @@ -273,7 +274,11 @@ delete_resource(RD, #ctx{table=Table, Resp = riak_kv_wm_ts_util:set_json_response(Json, RD), {true, Resp, Ctx}; {error, notfound} -> - {{halt, 404}, RD, Ctx} + Resp = riak_kv_wm_ts_util:set_error_message( + "resource ~p does not exist - impossible to delete", + [wrq:path(RD)], + RD), + {{halt, 404}, Resp, Ctx} catch _:Reason -> lager:log(info, self(), "delete_resource failed: ~p", [Reason]), @@ -330,7 +335,7 @@ match_path([], []) -> match_path([F,V|Path], [{F, Type}|KeyTypes]) -> [convert_field_value(Type, V)|match_path(Path, KeyTypes)]; match_path(Path, _KeyTypes) -> - throw(io_lib:format("incorrect path ~p", [Path])). + throw({path_error, io_lib:format("incorrect path ~p", [Path])}). %% @private convert_field_value(varchar, V) -> @@ -349,7 +354,7 @@ convert_field_value(timestamp, V) -> GoodValue when GoodValue > 0 -> GoodValue; _ -> - throw(url_key_bad_value) + throw({path_error, "incorrect field value"}) end. extract_data(RD, Mod) -> diff --git a/src/riak_kv_wm_timeseries_listkeys.erl b/src/riak_kv_wm_timeseries_listkeys.erl index f172d1ce05..38484c90aa 100644 --- a/src/riak_kv_wm_timeseries_listkeys.erl +++ b/src/riak_kv_wm_timeseries_listkeys.erl @@ -106,8 +106,16 @@ allowed_methods(RD, Ctx) -> {['GET'], RD, Ctx}. -spec resource_exists(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). -resource_exists(RD, #ctx{mod=Mod} = Ctx) -> - {riak_kv_wm_ts_util:table_module_exists(Mod), RD, Ctx}. +resource_exists(RD, #ctx{mod=Mod, + table=Table} = Ctx) -> + case riak_kv_wm_ts_util:table_module_exists(Mod) of + true -> + {true, RD, Ctx}; + false -> + Resp = riak_kv_wm_ts_util:set_error_message( + "table ~p does not exist", [Table], RD), + {false, Resp, Ctx} + end. -spec encodings_provided(#wm_reqdata{}, #ctx{}) -> cb_rv_spec([{Encoding::string(), Producer::function()}]). @@ -120,13 +128,12 @@ encodings_provided(RD, Ctx) -> cb_rv_spec([{ContentType::string(), Producer::atom()}]). %% @doc List the content types available for representing this resource. content_types_provided(RD, Ctx) -> - {[{"text/html", produce_doc_body}], RD, Ctx}. + {[{"application/json", produce_doc_body}], RD, Ctx}. produce_doc_body(RD, Ctx = #ctx{table = Table, mod=Mod, client = Client}) -> {ok, ReqId} = riak_client:stream_list_keys( {Table, Table}, undefined, Client), - lager:log(info, self(), "in produce_doc_body ~p", [Table]), {{halt, 200}, wrq:set_resp_body({stream, prepare_stream(ReqId, Table, Mod)}, RD), Ctx}. prepare_stream(ReqId, Table, Mod) -> @@ -142,19 +149,19 @@ stream_keys(ReqId, Table, Mod) -> stream_keys(ReqId, Table, Mod); {ReqId, From, {keys, Keys}} -> _ = riak_kv_keys_fsm:ack_keys(From), - {ts_keys_to_html(Keys, Table, Mod), fun() -> stream_keys(ReqId, Table, Mod) end}; + {ts_keys_to_json(Keys, Table, Mod), fun() -> stream_keys(ReqId, Table, Mod) end}; {ReqId, {keys, Keys}} -> - {ts_keys_to_html(Keys, Table, Mod), fun() -> stream_keys(ReqId, Table, Mod) end}; + {ts_keys_to_json(Keys, Table, Mod), fun() -> stream_keys(ReqId, Table, Mod) end}; {ReqId, done} -> {<<"">>, done}; {ReqId, {error, timeout}} -> {mochijson2:encode({struct, [{error, timeout}]}), done}; - Weird -> - lager:log(info, self(), "stream_keys got totally Weird=~p", [Weird]), + _Weird -> + %% @todo: should we log this? stream_keys(ReqId, Table, Mod) end. -ts_keys_to_html(EncodedKeys, Table, Mod) -> +ts_keys_to_json(EncodedKeys, Table, Mod) -> BaseUrl = base_url(Table), Keys = decode_keys(EncodedKeys), KeyTypes = riak_kv_wm_ts_util:local_key_fields_and_types(Mod), @@ -166,14 +173,13 @@ ts_keys_to_html(EncodedKeys, Table, Mod) -> %% format_url(BaseUrl, KeyTypes, Key) %% end, %% Keys), - Hrefs = [format_href(URL) || URL <- URLs], - list_to_binary(lists:flatten(Hrefs)). + JsonList = [ mochijson2:encode([{url, URL}]) || URL <- URLs], + list_to_binary(lists:flatten(JsonList)). -format_href(URL) -> - io_lib:format("~s", [URL, URL]). format_url(BaseUrl, KeyTypes, Key) -> - io_lib:format("~s~s", [BaseUrl, key_to_string(Key, KeyTypes)]). + list_to_binary( + io_lib:format("~s~s", [BaseUrl, key_to_string(Key, KeyTypes)])). decode_keys(Keys) -> [tuple_to_list(sext:decode(A)) From df1862eea682c4b1431a4760e36b705e74f393b8 Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Tue, 22 Mar 2016 03:47:34 +0200 Subject: [PATCH 078/122] even better exceptions from erlify_bucket_prop, with tests --- src/riak_kv_wm_utils.erl | 85 ++++++++++++++++++++++++++++++++-------- 1 file changed, 68 insertions(+), 17 deletions(-) diff --git a/src/riak_kv_wm_utils.erl b/src/riak_kv_wm_utils.erl index fbfd750cfc..8fa870ba00 100644 --- a/src/riak_kv_wm_utils.erl +++ b/src/riak_kv_wm_utils.erl @@ -362,20 +362,16 @@ jsonify_bucket_prop({Prop, Value}) -> {Property::atom(), erlpropvalue()}. %% @doc The reverse of jsonify_bucket_prop/1. Converts JSON representation %% of bucket properties to their Erlang form. -erlify_bucket_prop({?JSON_DATATYPE, Type}) when is_binary(Type) -> - {datatype, binary_to_existing_atom(Type, utf8)}; +erlify_bucket_prop({?JSON_DATATYPE, Type}) -> + try + {datatype, binary_to_existing_atom(Type, utf8)} + catch + error:badarg -> + throw({bad_datatype, Type}) + end; erlify_bucket_prop({?JSON_LINKFUN, {struct, Props}}) -> case {proplists:get_value(?JSON_MOD, Props), proplists:get_value(?JSON_FUN, Props)} of - {Mod, Fun} when is_binary(Mod), is_binary(Fun) -> - try - {linkfun, {modfun, - list_to_existing_atom(binary_to_list(Mod)), - list_to_existing_atom(binary_to_list(Fun))}} - catch - error:badarg -> - throw({bad_linkfun_modfun, {Mod, Fun}}) - end; {undefined, undefined} -> case proplists:get_value(?JSON_JSFUN, Props) of Name when is_binary(Name) -> @@ -391,18 +387,29 @@ erlify_bucket_prop({?JSON_LINKFUN, {struct, Props}}) -> throw({bad_linkfun_bkey, {Bucket, Key}}) end; Source when is_binary(Source) -> - {linkfun, {jsanon, Source}} - end + {linkfun, {jsanon, Source}}; + NotBinary -> + throw({bad_linkfun_modfun, {jsanon, NotBinary}}) + end; + NotBinary -> + throw({bad_linkfun_modfun, NotBinary}) + end; + {Mod, Fun} -> + try + {linkfun, {modfun, + binary_to_existing_atom(Mod, utf8), + binary_to_existing_atom(Fun, utf8)}} + catch + error:badarg -> + throw({bad_linkfun_modfun, {Mod, Fun}}) end end; erlify_bucket_prop({?JSON_CHASH, {struct, Props}}) -> Mod = proplists:get_value(?JSON_MOD, Props), Fun = proplists:get_value(?JSON_FUN, Props), try - {chash_keyfun, {list_to_existing_atom( - binary_to_list(Mod)), - list_to_existing_atom( - binary_to_list(Fun))}} + {chash_keyfun, {binary_to_existing_atom(Mod, utf8), + binary_to_existing_atom(Fun, utf8)}} catch error:badarg -> throw({bad_chash_keyfun, {Mod, Fun}}) @@ -461,3 +468,47 @@ method_to_perm('GET') -> "riak_kv.get"; method_to_perm('DELETE') -> "riak_kv.delete". + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +erlify_property_check_valid_test() -> + ?assertEqual({datatype, integer}, + erlify_bucket_prop({?JSON_DATATYPE, <<"integer">>})), + + ?assertEqual({linkfun, {modfun, erlang, halt}}, + erlify_bucket_prop({?JSON_LINKFUN, {struct, [{?JSON_MOD, <<"erlang">>}, + {?JSON_FUN, <<"halt">>}]}})), + ?assertEqual({linkfun, {jsfun, <<"js_never_dies">>}}, + erlify_bucket_prop({?JSON_LINKFUN, {struct, [{?JSON_JSFUN, <<"js_never_dies">>}]}})), + + ?assertEqual({linkfun, {jsanon, {<<"face">>, <<"book">>}}}, + erlify_bucket_prop({?JSON_LINKFUN, {struct, [{?JSON_JSANON, {struct, [{?JSON_JSBUCKET, <<"face">>}, + {?JSON_JSKEY, <<"book">>}]}}]}})), + ?assertEqual({chash_keyfun, {re, run}}, + erlify_bucket_prop({?JSON_CHASH, {struct, [{?JSON_MOD, <<"re">>}, + {?JSON_FUN, <<"run">>}]}})). + +erlify_property_check_exceptions_test() -> + ?assertThrow({bad_datatype, 42}, + erlify_bucket_prop({?JSON_DATATYPE, 42})), + ?assertThrow({bad_datatype, <<"tatadype">>}, + erlify_bucket_prop({?JSON_DATATYPE, <<"tatadype">>})), + + ?assertThrow({bad_linkfun_modfun, {<<"nomod">>, <<"nofun">>}}, + erlify_bucket_prop({?JSON_LINKFUN, {struct, [{?JSON_MOD, <<"nomod">>}, + {?JSON_FUN, <<"nofun">>}]}})), + ?assertThrow({bad_linkfun_modfun, {<<"nomod">>, 368}}, + erlify_bucket_prop({?JSON_LINKFUN, {struct, [{?JSON_MOD, <<"nomod">>}, + {?JSON_FUN, 368}]}})), + ?assertThrow({bad_linkfun_modfun, 635}, + erlify_bucket_prop({?JSON_LINKFUN, {struct, [{?JSON_JSFUN, 635}]}})), + + ?assertThrow({bad_linkfun_bkey, {nobinarybucket, 89}}, + erlify_bucket_prop({?JSON_LINKFUN, {struct, [{?JSON_JSANON, {struct, [{?JSON_JSBUCKET, nobinarybucket}, + {?JSON_JSKEY, 89}]}}]}})), + ?assertThrow({bad_chash_keyfun, {<<"nomod">>, <<"nofun">>}}, + erlify_bucket_prop({?JSON_CHASH, {struct, [{?JSON_MOD, <<"nomod">>}, + {?JSON_FUN, <<"nofun">>}]}})). + +-endif. From 9b6fce81716044c5dd2b24f62390cf5752a19554 Mon Sep 17 00:00:00 2001 From: Brett Hazen Date: Wed, 10 Feb 2016 17:04:55 -0700 Subject: [PATCH 079/122] Create riak_ts-develop-1.3 --- rebar.config | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rebar.config b/rebar.config index 399e2ea6fe..5691757ced 100644 --- a/rebar.config +++ b/rebar.config @@ -28,11 +28,11 @@ {deps, [ {sidejob, ".*", {git, "git://github.com/basho/sidejob.git", {branch, "develop"}}}, {erlang_js, ".*", {git, "git://github.com/basho/erlang_js.git", {tag, "1.3.0"}}}, - {bitcask, ".*", {git, "git://github.com/basho/bitcask.git", {branch, "riak_ts-develop"}}}, + {bitcask, ".*", {git, "git://github.com/basho/bitcask.git", {branch, "riak_ts-develop-1.3"}}}, {eper, ".*", {git, "git://github.com/basho/eper.git", {tag, "0.78"}}}, {sext, ".*", {git, "git://github.com/basho/sext.git", {tag, "1.1p3"}}}, - {riak_pipe, ".*", {git, "git://github.com/basho/riak_pipe.git", {branch, "riak_ts-develop"}}}, - {riak_api, ".*", {git, "git://github.com/basho/riak_api.git", {branch, "riak_ts-develop"}}}, + {riak_pipe, ".*", {git, "git://github.com/basho/riak_pipe.git", {branch, "riak_ts-develop-1.3"}}}, + {riak_api, ".*", {git, "git://github.com/basho/riak_api.git", {branch, "riak_ts-develop-1.3"}}}, {riak_dt, ".*", {git, "git://github.com/basho/riak_dt.git", {branch, "develop"}}}, {msgpack, ".*", {git, "git://github.com/msgpack/msgpack-erlang.git", {tag, "0.3.5"}}}, {riak_ql, ".*", {git, "git@github.com:basho/riak_ql.git", {branch, "develop"}}}, From cfb97492b4d190c4e5712ddda2d55d6b1dd9e81e Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Wed, 3 Feb 2016 15:14:03 +0200 Subject: [PATCH 080/122] move code to be shared between riak_kv_{wm,pb}_timeseries, to _ts_util --- src/riak_kv_pb_timeseries.erl | 431 +++++++--------------------------- src/riak_kv_ts_util.erl | 355 +++++++++++++++++++++++++++- 2 files changed, 431 insertions(+), 355 deletions(-) diff --git a/src/riak_kv_pb_timeseries.erl b/src/riak_kv_pb_timeseries.erl index 72b63087b9..6f8f752ed7 100644 --- a/src/riak_kv_pb_timeseries.erl +++ b/src/riak_kv_pb_timeseries.erl @@ -25,9 +25,7 @@ -include_lib("riak_pb/include/riak_kv_pb.hrl"). -include_lib("riak_pb/include/riak_ts_pb.hrl"). - -include("riak_kv_ts.hrl"). --include("riak_kv_wm_raw.hrl"). -behaviour(riak_api_pb_service). @@ -122,7 +120,7 @@ decode_query(SQL) -> -spec decode_query(Query::#tsinterpolation{}, term()) -> {error, _} | {ok, ts_query_types()}. -decode_query(#tsinterpolation{ base = BaseQuery }, Cover) -> +decode_query(#tsinterpolation{base = BaseQuery}, Cover) -> Lexed = riak_ql_lexer:get_tokens(binary_to_list(BaseQuery)), case riak_ql_parser:ql_parse(Lexed) of {select, SQL} -> @@ -293,19 +291,23 @@ make_insert_response(Mod, #riak_sql_insert_v1{'INSERT' = Table, fields = Fields, make_rpberrresp(?E_BAD_QUERY, ValueReason); {ok, Data} -> insert_putreqs(Mod, Table, Data) - end; + end; {error, FieldReason} -> make_rpberrresp(?E_BAD_QUERY, FieldReason) end. insert_putreqs(Mod, Table, Data) -> - case catch validate_rows(Mod, Data) of + case catch riak_kv_ts_util:validate_rows(Mod, Data) of [] -> - case put_data(Data, Table, Mod) of - 0 -> + case riak_kv_ts_api:put_data(Data, Table, Mod) of + ok -> #tsqueryresp{}; - ErrorCount -> - failed_put_response(ErrorCount) + {error, {some_failed, ErrorCount}} -> + failed_put_response(ErrorCount); + {error, no_type} -> + table_not_activated_response(Table); + {error, OtherReason} -> + make_rpberrresp(?E_PUT, to_string(OtherReason)) end; BadRowIdxs when is_list(BadRowIdxs) -> validate_rows_error_response(BadRowIdxs) @@ -328,14 +330,14 @@ make_empty_row(Mod) -> {ok, [pos_integer()]} | {error, string()}. lookup_field_positions(Mod, FieldIdentifiers) -> case lists:foldl( - fun({identifier, FieldName}, {Good, Bad}) -> - case Mod:is_field_valid(FieldName) of - false -> - {Good, [FieldName | Bad]}; - true -> - {[Mod:get_field_position(FieldName) | Good], Bad} - end - end, {[], []}, FieldIdentifiers) + fun({identifier, FieldName}, {Good, Bad}) -> + case Mod:is_field_valid(FieldName) of + false -> + {Good, [FieldName | Bad]}; + true -> + {[Mod:get_field_position(FieldName) | Good], Bad} + end + end, {[], []}, FieldIdentifiers) of {Positions, []} -> {ok, lists:reverse(Positions)}; @@ -398,9 +400,9 @@ sub_tsttbputreq(Mod, _DDL, #tsttbputreq{table = Table, rows = Data}, sub_putreq_common(Mod, Table, Data, State). sub_putreq_common(Mod, Table, Data, State) -> - case catch validate_rows(Mod, Data) of + case catch riak_kv_ts_util:validate_rows(Mod, Data) of [] -> - case put_data(Data, Table, Mod) of + case riak_kv_ts_util:put_data(Data, Table, Mod) of 0 -> {reply, #tsputresp{}, State}; ErrorCount -> @@ -410,178 +412,28 @@ sub_putreq_common(Mod, Table, Data, State) -> {reply, validate_rows_error_response(BadRowIdxs), State} end. -%% Give validate_rows/2 a DDL Module and a list of decoded rows, -%% and it will return a list of strings that represent the invalid rows indexes. --spec validate_rows(module(), list(tuple())) -> list(string()). -validate_rows(Mod, Rows) -> - ValidateFn = fun(X, {Acc, BadRowIdxs}) -> - case Mod:validate_obj(X) of - true -> {Acc+1, BadRowIdxs}; - _ -> {Acc+1, [integer_to_list(Acc) | BadRowIdxs]} - end - end, - {_, BadRowIdxs} = lists:foldl(ValidateFn, {1, []}, Rows), - lists:reverse(BadRowIdxs). - - --spec put_data([riak_pb_ts_codec:tsrow()], binary(), module()) -> integer(). -%% return count of records we failed to put -put_data(Data, Table, Mod) when is_binary(Table) -> - DDL = Mod:get_ddl(), - Bucket = riak_kv_ts_util:table_to_bucket(Table), - BucketProps = riak_core_bucket:get_bucket(Bucket), - NVal = proplists:get_value(n_val, BucketProps), - - PartitionedData = partition_data(Data, Bucket, BucketProps, DDL, Mod), - PreflistData = add_preflists(PartitionedData, NVal, - riak_core_node_watcher:nodes(riak_kv)), - - SendFullBatches = riak_core_capability:get({riak_kv, w1c_batch_vnode}, false), - %% Default to 1MB for a max batch size to not overwhelm disterl - CappedBatchSize = app_helper:get_env(riak_kv, timeseries_max_batch_size, - 1024 * 1024), - - EncodeFn = - fun(O) -> riak_object:to_binary(v1, O, msgpack) end, - - {ReqIds, FailReqs} = - lists:foldl( - fun({DocIdx, Preflist, Records}, {GlobalReqIds, GlobalErrorsCnt}) -> - case riak_kv_w1c_worker:validate_options( - NVal, Preflist, [], BucketProps) of - {ok, W, PW} -> - DataForVnode = pick_batch_option(SendFullBatches, - CappedBatchSize, - Records, - termsize(hd(Records)), - length(Records)), - Ids = - invoke_async_put(fun(Record) -> - build_object(Bucket, Mod, DDL, - Record, DocIdx) - end, - fun(RObj, LK) -> - riak_kv_w1c_worker:async_put( - RObj, W, PW, Bucket, NVal, LK, - EncodeFn, Preflist) - end, - fun(RObjs) -> - riak_kv_w1c_worker:ts_batch_put( - RObjs, W, PW, Bucket, NVal, - EncodeFn, Preflist) - end, - DataForVnode), - {GlobalReqIds ++ Ids, GlobalErrorsCnt}; - _Error -> - {GlobalReqIds, GlobalErrorsCnt + length(Records)} - end - end, - {[], 0}, PreflistData), - Responses = riak_kv_w1c_worker:async_put_replies(ReqIds, []), - length(lists:filter(fun({error, _}) -> true; - (_) -> false - end, Responses)) + FailReqs. - --spec partition_data(Data :: list(term()), - Bucket :: {binary(), binary()}, - BucketProps :: proplists:proplist(), - DDL :: ?DDL{}, - Mod :: module()) -> - list(tuple(chash:index(), list(term()))). -partition_data(Data, Bucket, BucketProps, DDL, Mod) -> - PartitionTuples = - [ { riak_core_util:chash_key({Bucket, row_to_key(R, DDL, Mod)}, - BucketProps), R } || R <- Data ], - dict:to_list( - lists:foldl(fun({Idx, R}, Dict) -> - dict:append(Idx, R, Dict) - end, - dict:new(), - PartitionTuples)). - -row_to_key(Row, DDL, Mod) -> - riak_kv_ts_util:encode_typeval_key( - riak_ql_ddl:get_partition_key(DDL, Row, Mod)). - -%%%%%%%% -%% Utility functions for batch delivery of records -termsize(Term) -> - size(term_to_binary(Term)). - -pick_batch_option(_, _, Records, _, 1) -> - {individual, Records}; -pick_batch_option(true, MaxBatch, Records, SampleSize, _NumRecs) -> - {batches, create_batches(Records, - estimated_row_count(SampleSize, MaxBatch))}; -pick_batch_option(false, _, Records, _, _) -> - {individual, Records}. - -estimated_row_count(SampleRowSize, MaxBatchSize) -> - %% Assume some rows will be larger, so introduce a fudge factor of - %% roughly 10 percent. - RowSizeFudged = (SampleRowSize * 10) div 9, - MaxBatchSize div RowSizeFudged. - -create_batches(Rows, MaxSize) -> - create_batches(Rows, MaxSize, []). - -create_batches([], _MaxSize, Accum) -> - Accum; -create_batches(Rows, MaxSize, Accum) when length(Rows) < MaxSize -> - [Rows|Accum]; -create_batches(Rows, MaxSize, Accum) -> - {First, Rest} = lists:split(MaxSize, Rows), - create_batches(Rest, MaxSize, [First|Accum]). -%%%%%%%% - -add_preflists(PartitionedData, NVal, UpNodes) -> - lists:map(fun({Idx, Rows}) -> {Idx, - riak_core_apl:get_apl_ann(Idx, NVal, UpNodes), - Rows} end, - PartitionedData). - -build_object(Bucket, Mod, DDL, Row, PK) -> - Obj = Mod:add_column_info(Row), - LK = riak_kv_ts_util:encode_typeval_key( - riak_ql_ddl:get_local_key(DDL, Row, Mod)), - - RObj = riak_object:newts( - Bucket, PK, Obj, - dict:from_list([{?MD_DDL_VERSION, ?DDL_VERSION}])), - {LK, RObj}. - %% ----------- %% get and delete %% ----------- -sub_tsgetreq(Mod, DDL, #tsgetreq{table = Table, - key = PbCompoundKey, - timeout = Timeout}, +sub_tsgetreq(Mod, _DDL, #tsgetreq{table = Table, + key = PbCompoundKey, + timeout = Timeout}, State) -> Options = if Timeout == undefined -> []; true -> [{timeout, Timeout}] end, - CompoundKey = riak_pb_ts_codec:decode_cells(PbCompoundKey), - - Result = - case riak_kv_ts_util:make_ts_keys(CompoundKey, DDL, Mod) of - {ok, PKLK} -> - riak_client:get( - riak_kv_ts_util:table_to_bucket(Table), PKLK, Options, - {riak_client, [node(), undefined]}); - ErrorReason -> - ErrorReason - end, - case Result of - {ok, RObj} -> - Record = riak_object:get_value(RObj), + Mod = riak_ql_ddl:make_module_name(Table), + case riak_kv_ts_util:get_data( + CompoundKey, Table, Mod, Options) of + {ok, Record} -> {ColumnNames, Row} = lists:unzip(Record), %% the columns stored in riak_object are just %% names; we need names with types, so: - ColumnTypes = get_column_types(ColumnNames, Mod), + ColumnTypes = riak_kv_ts_util:get_column_types(ColumnNames, Mod), Rows = riak_pb_ts_codec:encode_rows(ColumnTypes, [Row]), {reply, #tsgetresp{columns = make_tscolumndescription_list( ColumnNames, ColumnTypes), @@ -595,47 +447,19 @@ sub_tsgetreq(Mod, DDL, #tsgetreq{table = Table, end. -sub_tsdelreq(Mod, DDL, #tsdelreq{table = Table, - key = PbCompoundKey, - vclock = PbVClock, - timeout = Timeout}, +sub_tsdelreq(Mod, _DDL, #tsdelreq{table = Table, + key = PbCompoundKey, + vclock = VClock, + timeout = Timeout}, State) -> - %% Pass the {dw,all} option in to the delete FSM - %% to make sure all tombstones are written by the - %% async put before the reaping get runs otherwise - %% if the default {dw,quorum} is used there is the - %% possibility that the last tombstone put overlaps - %% inside the KV vnode with the reaping get and - %% prevents the tombstone removal. Options = - if Timeout == undefined -> [{dw, all}]; - true -> [{timeout, Timeout}, {dw, all}] - end, - VClock = - case PbVClock of - undefined -> - %% this will trigger a get in riak_kv_delete:delete to - %% retrieve the actual vclock - undefined; - PbVClock -> - %% else, clients may have it already (e.g., from an - %% earlier riak_object:get), which will short-circuit - %% to avoid a separate get - riak_object:decode_vclock(PbVClock) + if Timeout == undefined -> []; + true -> [{timeout, Timeout}] end, - CompoundKey = riak_pb_ts_codec:decode_cells(PbCompoundKey), - - Result = - case riak_kv_ts_util:make_ts_keys(CompoundKey, DDL, Mod) of - {ok, PKLK} -> - riak_client:delete_vclock( - riak_kv_ts_util:table_to_bucket(Table), PKLK, VClock, Options, - {riak_client, [node(), undefined]}); - ErrorReason -> - ErrorReason - end, - case Result of + Mod = riak_ql_ddl:make_module_name(Table), + case riak_kv_ts_util:delete_data( + CompoundKey, Table, Mod, Options, VClock) of ok -> {reply, tsdelresp, State}; {error, {bad_key_length, Got, Need}} -> @@ -647,6 +471,13 @@ sub_tsdelreq(Mod, DDL, #tsdelreq{table = Table, end. +-spec make_tscolumndescription_list([binary()], [riak_pb_ts_codec:tscolumntype()]) -> + [#tscolumndescription{}]. +make_tscolumndescription_list(ColumnNames, ColumnTypes) -> + [#tscolumndescription{name = Name, type = riak_pb_ts_codec:encode_field_type(Type)} + || {Name, Type} <- lists:zip(ColumnNames, ColumnTypes)]. + + %% ----------- %% listkeys %% ----------- @@ -677,18 +508,28 @@ sub_tslistkeysreq(Mod, DDL, #tslistkeysreq{table = Table, sub_tscoveragereq(Mod, _DDL, #tscoveragereq{table = Table, query = Q}, State) -> - case compile(Mod, catch decode_query(Q)) of - {error, #rpberrorresp{} = Error} -> - {reply, Error, State}; - {error, _} -> + Client = {riak_client, [node(), undefined]}, + case decode_query(Q) of + {ok, SQL} -> + case riak_kv_ts_util:compile_to_per_quantum_queries(Mod, SQL) of + {ok, Compiled} -> + Bucket = riak_kv_ts_util:table_to_bucket(Table), + convert_cover_list( + riak_kv_ts_util:sql_to_cover(Client, Compiled, Bucket, []), State); + + %% parser messages have a tuple for Reason: + {error, {E, Reason}} when is_atom(E), is_binary(Reason) -> + ErrorMessage = flat_format("~p: ~s", [E, Reason]), + make_rpberrresp(?E_SUBMIT, ErrorMessage); + + {error, Reason} -> + make_rpberrresp( + ?E_BAD_QUERY, flat_format("Failed to compile query: ~p", [Reason])) + end; + {error, Reason} -> {reply, make_rpberrresp( - ?E_BAD_QUERY, "Failed to compile query"), - State}; - SQL -> - %% SQL is a list of queries (1 per quantum) - Bucket = riak_kv_ts_util:table_to_bucket(Table), - Client = {riak_client, [node(), undefined]}, - convert_cover_list(sql_to_cover(Client, SQL, Bucket, []), State) + ?E_BAD_QUERY, flat_format("Failed to parse query: ~p", [Reason])), + State} end. %% Copied and modified from riak_kv_pb_coverage:convert_list. Would @@ -724,103 +565,6 @@ assemble_ts_range({FieldName, {{StartVal, StartIncl}, {EndVal, EndIncl}}}, Text) desc = Text }. - -%% Result from riak_client:get_cover is a nested list of coverage plan -%% because KV coverage requests are designed that way, but in our case -%% all we want is the singleton head - -%% If any of the results from get_cover are errors, we want that tuple -%% to be the sole return value -sql_to_cover(_Client, [], _Bucket, Accum) -> - lists:reverse(Accum); -sql_to_cover(Client, [SQL|Tail], Bucket, Accum) -> - case Client:get_cover(riak_kv_qry_coverage_plan, Bucket, undefined, - {SQL, Bucket}) of - {error, Error} -> - {error, Error}; - [Cover] -> - {Description, RangeReplacement} = reverse_sql(SQL), - sql_to_cover(Client, Tail, Bucket, [{Cover, RangeReplacement, - Description}|Accum]) - end. - -%% Generate a human-readable description of the target -%% <<" / time > X and time < Y">> -%% Generate a start/end timestamp for future replacement in a query -reverse_sql(?SQL_SELECT{'FROM' = Table, - 'WHERE' = KeyProplist, - partition_key = PartitionKey}) -> - QuantumField = identify_quantum_field(PartitionKey), - RangeTuple = extract_time_boundaries(QuantumField, KeyProplist), - Desc = derive_description(Table, QuantumField, RangeTuple), - ReplacementValues = {QuantumField, RangeTuple}, - {Desc, ReplacementValues}. - - -derive_description(Table, Field, {{Start, StartInclusive}, {End, EndInclusive}}) -> - StartOp = pick_operator(">", StartInclusive), - EndOp = pick_operator("<", EndInclusive), - unicode:characters_to_binary( - flat_format("~ts / ~ts ~s ~B and ~ts ~s ~B", - [Table, Field, StartOp, Start, - Field, EndOp, End]), utf8). - -pick_operator(LGT, true) -> - LGT ++ "="; -pick_operator(LGT, false) -> - LGT. - -extract_time_boundaries(FieldName, WhereList) -> - {FieldName, timestamp, Start} = - lists:keyfind(FieldName, 1, proplists:get_value(startkey, WhereList, [])), - {FieldName, timestamp, End} = - lists:keyfind(FieldName, 1, proplists:get_value(endkey, WhereList, [])), - StartInclusive = proplists:get_value(start_inclusive, WhereList, true), - EndInclusive = proplists:get_value(end_inclusive, WhereList, false), - {{Start, StartInclusive}, {End, EndInclusive}}. - - -%%%%%%%%%%%% -%% FRAGILE HORRIBLE BAD BAD BAD AST MANGLING -identify_quantum_field(#key_v1{ast = KeyList}) -> - HashFn = find_hash_fn(KeyList), - P_V1 = hd(HashFn#hash_fn_v1.args), - hd(P_V1#param_v1.name). - -find_hash_fn([]) -> - throw(wtf); -find_hash_fn([#hash_fn_v1{}=Hash|_T]) -> - Hash; -find_hash_fn([_H|T]) -> - find_hash_fn(T). - -%%%%%%%%%%%% - - -compile(_Mod, {error, Err}) -> - {error, make_decoder_error_response(Err)}; -compile(_Mod, {'EXIT', {Err, _}}) -> - {error, make_decoder_error_response(Err)}; -compile(Mod, {ok, ?SQL_SELECT{}=SQL}) -> - case (catch Mod:get_ddl()) of - {_, {undef, _}} -> - {error, no_helper_module}; - DDL -> - case riak_ql_ddl:is_query_valid(Mod, DDL, - riak_kv_ts_util:sql_record_to_tuple(SQL)) of - true -> - case riak_kv_qry_compiler:compile(DDL, SQL, undefined) of - {error,_} = Error -> - Error; - {ok, Queries} -> - Queries - end; - {false, _Errors} -> - {error, invalid_query} - end - end. - - %% query %% @@ -833,10 +577,12 @@ sub_tsqueryreq(Mod, DDL, SQL, State) -> {ok, Data} -> {reply, make_tsquery_resp(Mod, SQL, Data), State}; - %% parser messages have a tuple for Reason: - {error, {E, Reason}} when is_atom(E), is_binary(Reason) -> - ErrorMessage = flat_format("~p: ~s", [E, Reason]), - {reply, make_rpberrresp(?E_SUBMIT, ErrorMessage), State}; + %% %% parser messages have a tuple for Reason: + %% {error, {E, Reason}} when is_atom(E), is_binary(Reason) -> + %% ErrorMessage = flat_format("~p: ~s", [E, Reason]), + %% {reply, make_rpberrresp(?E_SUBMIT, ErrorMessage), State}; + %% parser errors are now handled uniformly (will be caught + %% here in the last case branch) %% the following timeouts are known and distinguished: {error, qry_worker_timeout} -> @@ -848,6 +594,11 @@ sub_tsqueryreq(Mod, DDL, SQL, State) -> %% response {reply, make_rpberrresp(?E_TIMEOUT, "backend timeout"), State}; + %% parser messages have a tuple for Reason: + {error, {E, Reason}} when is_atom(E), is_binary(Reason) -> + ErrorMessage = flat_format("~p: ~s", [E, Reason]), + {reply, make_rpberrresp(?E_SUBMIT, ErrorMessage), State}; + {error, Reason} -> {reply, make_rpberrresp(?E_SUBMIT, to_string(Reason)), State} end. @@ -979,22 +730,6 @@ table_created_missing_response(Table) -> to_string(X) -> flat_format("~p", [X]). -%% Returns a tuple with a list of request IDs and an error tally -invoke_async_put(BuildRObjFun, AsyncPutFun, _BatchPutFun, {individual, Records}) -> - lists:map(fun(Record) -> - {LK, RObj} = BuildRObjFun(Record), - {ok, ReqId} = AsyncPutFun(RObj, LK), - ReqId - end, - Records); -invoke_async_put(BuildRObjFun, _AsyncPutFun, BatchPutFun, {batches, Batches}) -> - lists:map(fun(Batch) -> - RObjs = lists:map(BuildRObjFun, Batch), - {ok, ReqId} = BatchPutFun(RObjs), - ReqId - end, - Batches). - %% helpers to make various error responses -spec make_tsqueryresp([] | {[riak_pb_ts_codec:tscolumnname()], @@ -1013,10 +748,6 @@ make_describe_response(DescribeTableRows) -> #tsqueryresp{columns = make_tscolumndescription_list(ColumnNames, ColumnTypes), rows = riak_pb_ts_codec:encode_rows(ColumnTypes, DescribeTableRows)}. --spec get_column_types(list(binary()), module()) -> [riak_pb_ts_codec:tscolumntype()]. -get_column_types(ColumnNames, Mod) -> - [Mod:get_field_type([N]) || N <- ColumnNames]. - -spec make_tscolumndescription_list([binary()], [riak_pb_ts_codec:tscolumntype()]) -> [#tscolumndescription{}]. make_tscolumndescription_list(ColumnNames, ColumnTypes) -> @@ -1075,28 +806,28 @@ validate_rows_empty_test() -> {module, Mod} = test_helper_validate_rows_mod(), ?assertEqual( [], - validate_rows(Mod, []) + riak_kv_ts_util:validate_rows(Mod, []) ). validate_rows_1_test() -> {module, Mod} = test_helper_validate_rows_mod(), ?assertEqual( [], - validate_rows(Mod, [{<<"f">>, <<"s">>, 11}]) + riak_kv_ts_util:validate_rows(Mod, [{<<"f">>, <<"s">>, 11}]) ). validate_rows_bad_1_test() -> {module, Mod} = test_helper_validate_rows_mod(), ?assertEqual( ["1"], - validate_rows(Mod, [{}]) + riak_kv_ts_util:validate_rows(Mod, [{}]) ). validate_rows_bad_2_test() -> {module, Mod} = test_helper_validate_rows_mod(), ?assertEqual( ["1", "3", "4"], - validate_rows(Mod, [{}, {<<"f">>, <<"s">>, 11}, {a, <<"s">>, 12}, "hithere"]) + riak_kv_ts_util:validate_rows(Mod, [{}, {<<"f">>, <<"s">>, 11}, {a, <<"s">>, 12}, "hithere"]) ). validate_rows_error_response_1_test() -> diff --git a/src/riak_kv_ts_util.erl b/src/riak_kv_ts_util.erl index 5e1a974ca5..c0693fe4cd 100644 --- a/src/riak_kv_ts_util.erl +++ b/src/riak_kv_ts_util.erl @@ -26,16 +26,23 @@ -export([ apply_timeseries_bucket_props/2, + build_sql_record/3, + compile_to_per_quantum_queries/2, + delete_data/2, delete_data/3, delete_data/4, delete_data/5, encode_typeval_key/1, + get_column_types/2, + get_data/2, get_data/3, get_data/4, get_table_ddl/1, lk/1, make_ts_keys/3, maybe_parse_table_def/2, pk/1, + put_data/2, put_data/3, queried_table/1, + sql_record_to_tuple/1, + sql_to_cover/4, table_to_bucket/1, - build_sql_record/3, - sql_record_to_tuple/1 + validate_rows/2 ]). -export([explain_query/1, explain_query/2]). -export([explain_query_print/1]). @@ -49,13 +56,14 @@ %% bucket tuple. This function is a convenient mechanism for doing so %% and making that transition more obvious. +-include("riak_kv_wm_raw.hrl"). -include_lib("riak_ql/include/riak_ql_ddl.hrl"). -include("riak_kv_ts.hrl"). %% riak_ql_ddl:is_query_valid expects a tuple, not a SQL record -sql_record_to_tuple(?SQL_SELECT{'FROM'=From, - 'SELECT'=#riak_sel_clause_v1{clause=Select}, - 'WHERE'=Where}) -> +sql_record_to_tuple(?SQL_SELECT{'FROM' = From, + 'SELECT' = #riak_sel_clause_v1{clause=Select}, + 'WHERE' = Where}) -> {From, Select, Where}. %% Convert the proplist obtained from the QL parser @@ -360,6 +368,343 @@ varchar_quotes(V) -> <<"'", V/binary, "'">>. +%% Give validate_rows/2 a DDL Module and a list of decoded rows, +%% and it will return a list of strings that represent the invalid rows indexes. +-spec validate_rows(module(), list(tuple())) -> list(string()). +validate_rows(Mod, Rows) -> + ValidateFn = fun(X, {Acc, BadRowIdxs}) -> + case Mod:validate_obj(X) of + true -> {Acc+1, BadRowIdxs}; + _ -> {Acc+1, [integer_to_list(Acc) | BadRowIdxs]} + end + end, + {_, BadRowIdxs} = lists:foldl(ValidateFn, {1, []}, Rows), + lists:reverse(BadRowIdxs). + + +-spec put_data([riak_pb_ts_codec:tsrow()], binary(), module()) -> integer(). +%% return count of records we failed to put +put_data(Data, Table, Mod) when is_binary(Table) -> + DDL = Mod:get_ddl(), + Bucket = riak_kv_ts_util:table_to_bucket(Table), + BucketProps = riak_core_bucket:get_bucket(Bucket), + NVal = proplists:get_value(n_val, BucketProps), + + PartitionedData = partition_data(Data, Bucket, BucketProps, DDL, Mod), + PreflistData = add_preflists(PartitionedData, NVal, + riak_core_node_watcher:nodes(riak_kv)), + + SendFullBatches = riak_core_capability:get({riak_kv, w1c_batch_vnode}, false), + %% Default to 1MB for a max batch size to not overwhelm disterl + CappedBatchSize = app_helper:get_env(riak_kv, timeseries_max_batch_size, + 1024 * 1024), + + EncodeFn = + fun(O) -> riak_object:to_binary(v1, O, msgpack) end, + + {ReqIds, FailReqs} = + lists:foldl( + fun({DocIdx, Preflist, Records}, {GlobalReqIds, GlobalErrorsCnt}) -> + case riak_kv_w1c_worker:validate_options( + NVal, Preflist, [], BucketProps) of + {ok, W, PW} -> + DataForVnode = pick_batch_option(SendFullBatches, + CappedBatchSize, + Records, + termsize(hd(Records)), + length(Records)), + Ids = + invoke_async_put(fun(Record) -> + build_object(Bucket, Mod, DDL, + Record, DocIdx) + end, + fun(RObj, LK) -> + riak_kv_w1c_worker:async_put( + RObj, W, PW, Bucket, NVal, LK, + EncodeFn, Preflist) + end, + fun(RObjs) -> + riak_kv_w1c_worker:ts_batch_put( + RObjs, W, PW, Bucket, NVal, + EncodeFn, Preflist) + end, + DataForVnode), + {GlobalReqIds ++ Ids, GlobalErrorsCnt}; + _Error -> + {GlobalReqIds, GlobalErrorsCnt + length(Records)} + end + end, + {[], 0}, PreflistData), + Responses = riak_kv_w1c_worker:async_put_replies(ReqIds, []), + length(lists:filter(fun({error, _}) -> true; + (_) -> false + end, Responses)) + FailReqs. + +-spec partition_data(Data :: list(term()), + Bucket :: {binary(), binary()}, + BucketProps :: proplists:proplist(), + DDL :: ?DDL{}, + Mod :: module()) -> + list(tuple(chash:index(), list(term()))). +partition_data(Data, Bucket, BucketProps, DDL, Mod) -> + PartitionTuples = + [ { riak_core_util:chash_key({Bucket, row_to_key(R, DDL, Mod)}, + BucketProps), R } || R <- Data ], + dict:to_list( + lists:foldl(fun({Idx, R}, Dict) -> + dict:append(Idx, R, Dict) + end, + dict:new(), + PartitionTuples)). + +row_to_key(Row, DDL, Mod) -> + riak_kv_ts_util:encode_typeval_key( + riak_ql_ddl:get_partition_key(DDL, Row, Mod)). + +%%%%%%%% +%% Utility functions for batch delivery of records +termsize(Term) -> + size(term_to_binary(Term)). + +pick_batch_option(_, _, Records, _, 1) -> + {individual, Records}; +pick_batch_option(true, MaxBatch, Records, SampleSize, _NumRecs) -> + {batches, create_batches(Records, + estimated_row_count(SampleSize, MaxBatch))}; +pick_batch_option(false, _, Records, _, _) -> + {individual, Records}. + +estimated_row_count(SampleRowSize, MaxBatchSize) -> + %% Assume some rows will be larger, so introduce a fudge factor of + %% roughly 10 percent. + RowSizeFudged = (SampleRowSize * 10) div 9, + MaxBatchSize div RowSizeFudged. + +create_batches(Rows, MaxSize) -> + create_batches(Rows, MaxSize, []). + +create_batches([], _MaxSize, Accum) -> + Accum; +create_batches(Rows, MaxSize, Accum) when length(Rows) < MaxSize -> + [Rows|Accum]; +create_batches(Rows, MaxSize, Accum) -> + {First, Rest} = lists:split(MaxSize, Rows), + create_batches(Rest, MaxSize, [First|Accum]). +%%%%%%%% + +add_preflists(PartitionedData, NVal, UpNodes) -> + lists:map(fun({Idx, Rows}) -> {Idx, + riak_core_apl:get_apl_ann(Idx, NVal, UpNodes), + Rows} end, + PartitionedData). + +build_object(Bucket, Mod, DDL, Row, PK) -> + Obj = Mod:add_column_info(Row), + LK = riak_kv_ts_util:encode_typeval_key( + riak_ql_ddl:get_local_key(DDL, Row, Mod)), + + RObj = riak_object:newts( + Bucket, PK, Obj, + dict:from_list([{?MD_DDL_VERSION, ?DDL_VERSION}])), + {LK, RObj}. + + + +-spec get_data([riak_pb_ts_codec:ldbvalue()], binary()) -> + {ok, {[binary()], [[riak_pb_ts_codec:ldbvalue()]]}} | {error, term()}. +get_data(Key, Table) -> + get_data(Key, Table, undefined, []). + +-spec get_data([riak_pb_ts_codec:ldbvalue()], binary(), module()) -> + {ok, {[binary()], [[riak_pb_ts_codec:ldbvalue()]]}} | {error, term()}. +get_data(Key, Table, Mod) -> + get_data(Key, Table, Mod, []). + +-spec get_data([riak_pb_ts_codec:ldbvalue()], binary(), module(), proplists:proplist()) -> + {ok, [{binary(), riak_pb_ts_codec:ldbvalue()}]} | {error, term()}. +get_data(Key, Table, Mod0, Options) -> + Mod = + case Mod0 of + undefined -> + riak_ql_ddl:make_module_name(Table); + Mod0 -> + Mod0 + end, + DDL = Mod:get_ddl(), + Result = + case make_ts_keys(Key, DDL, Mod) of + {ok, PKLK} -> + riak_client:get( + table_to_bucket(Table), PKLK, Options, + {riak_client, [node(), undefined]}); + ErrorReason -> + ErrorReason + end, + case Result of + {ok, RObj} -> + Record = riak_object:get_value(RObj), + {ok, Record}; + ErrorReason2 -> + ErrorReason2 + end. + +-spec get_column_types(list(binary()), module()) -> [riak_pb_ts_codec:tscolumntype()]. +get_column_types(ColumnNames, Mod) -> + [Mod:get_field_type([N]) || N <- ColumnNames]. + + +-spec delete_data([any()], riak_object:bucket()) -> + ok | {error, term()}. +delete_data(Key, Table) -> + delete_data(Key, Table, undefined, [], undefined). + +-spec delete_data([any()], riak_object:bucket(), module()) -> + ok | {error, term()}. +delete_data(Key, Table, Mod) -> + delete_data(Key, Table, Mod, [], undefined). + +-spec delete_data([any()], riak_object:bucket(), module(), proplists:proplist()) -> + ok | {error, term()}. +delete_data(Key, Table, Mod, Options) -> + delete_data(Key, Table, Mod, Options, undefined). + +-spec delete_data([any()], riak_object:bucket(), module(), proplists:proplist(), + undefined | vclock:vclock()) -> + ok | {error, term()}. +delete_data(Key, Table, Mod0, Options0, VClock0) -> + Mod = + case Mod0 of + undefined -> + riak_ql_ddl:make_module_name(Table); + Mod0 -> + Mod0 + end, + %% Pass the {dw,all} option in to the delete FSM + %% to make sure all tombstones are written by the + %% async put before the reaping get runs otherwise + %% if the default {dw,quorum} is used there is the + %% possibility that the last tombstone put overlaps + %% inside the KV vnode with the reaping get and + %% prevents the tombstone removal. + Options = lists:keystore(dw, 1, Options0, {dw, all}), + DDL = Mod:get_ddl(), + VClock = + case VClock0 of + undefined -> + %% this will trigger a get in riak_kv_delete:delete to + %% retrieve the actual vclock + undefined; + VClock0 -> + %% else, clients may have it already (e.g., from an + %% earlier riak_object:get), which will short-circuit + %% to avoid a separate get + riak_object:decode_vclock(VClock0) + end, + Result = + case make_ts_keys(Key, DDL, Mod) of + {ok, PKLK} -> + riak_client:delete_vclock( + table_to_bucket(Table), PKLK, VClock, Options, + {riak_client, [node(), undefined]}); + ErrorReason -> + ErrorReason + end, + Result. + + +%% Result from riak_client:get_cover is a nested list of coverage plan +%% because KV coverage requests are designed that way, but in our case +%% all we want is the singleton head + +%% If any of the results from get_cover are errors, we want that tuple +%% to be the sole return value +sql_to_cover(_Client, [], _Bucket, Accum) -> + lists:reverse(Accum); +sql_to_cover(Client, [SQL|Tail], Bucket, Accum) -> + case Client:get_cover(riak_kv_qry_coverage_plan, Bucket, undefined, + {SQL, Bucket}) of + {error, Error} -> + {error, Error}; + [Cover] -> + {Description, RangeReplacement} = reverse_sql(SQL), + sql_to_cover(Client, Tail, Bucket, [{Cover, RangeReplacement, + Description}|Accum]) + end. + +-spec compile_to_per_quantum_queries(module(), ?SQL_SELECT{}) -> + {ok, [?SQL_SELECT{}]} | {error, any()}. +%% @doc Break up a query into a list of per-quantum queries +compile_to_per_quantum_queries(Mod, SQL) -> + case catch Mod:get_ddl() of + {_, {undef, _}} -> + {error, no_helper_module}; + DDL -> + case riak_ql_ddl:is_query_valid( + Mod, DDL, sql_record_to_tuple(SQL)) of + true -> + riak_kv_qry_compiler:compile(DDL, SQL, undefined); + {false, _Errors} -> + {error, invalid_query} + end + end. + + +%% Generate a human-readable description of the target +%% <<"
/ time > X and time < Y">> +%% Generate a start/end timestamp for future replacement in a query +reverse_sql(?SQL_SELECT{'FROM' = Table, + 'WHERE' = KeyProplist, + partition_key = PartitionKey}) -> + QuantumField = identify_quantum_field(PartitionKey), + RangeTuple = extract_time_boundaries(QuantumField, KeyProplist), + Desc = derive_description(Table, QuantumField, RangeTuple), + ReplacementValues = {QuantumField, RangeTuple}, + {Desc, ReplacementValues}. + + +derive_description(Table, Field, {{Start, StartInclusive}, {End, EndInclusive}}) -> + StartOp = pick_operator(">", StartInclusive), + EndOp = pick_operator("<", EndInclusive), + unicode:characters_to_binary( + flat_format("~ts / ~ts ~s ~B and ~ts ~s ~B", + [Table, Field, StartOp, Start, + Field, EndOp, End]), utf8). + +pick_operator(LGT, true) -> + LGT ++ "="; +pick_operator(LGT, false) -> + LGT. + +extract_time_boundaries(FieldName, WhereList) -> + {FieldName, timestamp, Start} = + lists:keyfind(FieldName, 1, proplists:get_value(startkey, WhereList, [])), + {FieldName, timestamp, End} = + lists:keyfind(FieldName, 1, proplists:get_value(endkey, WhereList, [])), + StartInclusive = proplists:get_value(start_inclusive, WhereList, true), + EndInclusive = proplists:get_value(end_inclusive, WhereList, false), + {{Start, StartInclusive}, {End, EndInclusive}}. + + +%%%%%%%%%%%% +%% FRAGILE HORRIBLE BAD BAD BAD AST MANGLING +identify_quantum_field(#key_v1{ast = KeyList}) -> + HashFn = find_hash_fn(KeyList), + P_V1 = hd(HashFn#hash_fn_v1.args), + hd(P_V1#param_v1.name). + +find_hash_fn([]) -> + throw(wtf); +find_hash_fn([#hash_fn_v1{}=Hash|_T]) -> + Hash; +find_hash_fn([_H|T]) -> + find_hash_fn(T). + +%%%%%%%%%%%% + + +flat_format(Format, Args) -> + lists:flatten(io_lib:format(Format, Args)). + %%% %%% TESTS %%% From f22db47d0776e967abd41ce80f7d9048a06d96df Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Thu, 4 Feb 2016 23:10:07 +0200 Subject: [PATCH 081/122] riak_kv_qry: filter out empty records --- src/riak_kv_qry.erl | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/riak_kv_qry.erl b/src/riak_kv_qry.erl index f043b6b137..4aa9751bf1 100644 --- a/src/riak_kv_qry.erl +++ b/src/riak_kv_qry.erl @@ -103,8 +103,17 @@ maybe_submit_to_queue(SQL, ?DDL{table = BucketType} = DDL) -> {error,_} = Error -> Error; {ok, Queries} -> - maybe_await_query_results( - riak_kv_qry_queue:put_on_queue(self(), Queries, DDL)) + case maybe_await_query_results( + riak_kv_qry_queue:put_on_queue(self(), Queries, DDL)) of + {ok, {ColNames, ColTypes, PossiblyWithEmptyRecords}} -> + %% filter out empty records + {ok, + {ColNames, ColTypes, + [R || R <- PossiblyWithEmptyRecords, + R /= [[]]]}}; + {error, Reason} -> + {error, Reason} + end end; {false, Errors} -> {error, {invalid_query, format_query_syntax_errors(Errors)}} From 177cfcca8958da40196b248101dddb430bd104f5 Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Wed, 3 Feb 2016 15:15:14 +0200 Subject: [PATCH 082/122] http client for TS Enable the following requests: GET /ts/1/table/Table single-key get DELETE /ts/1/table/Table single-key delete PUT /ts/1/table/Table batch put GET /ts/1/table/Table/keys list_keys GET /ts/1/coverage coverage for a query POST /ts/1/query execute SQL query Additional parameters (keys, data, query) are to be supplied in request body, as a JSON object. --- src/riak_kv_pb_timeseries.erl | 9 +- src/riak_kv_ts_util.erl | 8 +- src/riak_kv_web.erl | 12 +- src/riak_kv_wm_timeseries.erl | 805 +++++++++++++++++++++++++ src/riak_kv_wm_timeseries_listkeys.erl | 186 ++++++ 5 files changed, 1009 insertions(+), 11 deletions(-) create mode 100644 src/riak_kv_wm_timeseries.erl create mode 100644 src/riak_kv_wm_timeseries_listkeys.erl diff --git a/src/riak_kv_pb_timeseries.erl b/src/riak_kv_pb_timeseries.erl index 6f8f752ed7..1f4b45494d 100644 --- a/src/riak_kv_pb_timeseries.erl +++ b/src/riak_kv_pb_timeseries.erl @@ -471,13 +471,6 @@ sub_tsdelreq(Mod, _DDL, #tsdelreq{table = Table, end. --spec make_tscolumndescription_list([binary()], [riak_pb_ts_codec:tscolumntype()]) -> - [#tscolumndescription{}]. -make_tscolumndescription_list(ColumnNames, ColumnTypes) -> - [#tscolumndescription{name = Name, type = riak_pb_ts_codec:encode_field_type(Type)} - || {Name, Type} <- lists:zip(ColumnNames, ColumnTypes)]. - - %% ----------- %% listkeys %% ----------- @@ -630,6 +623,8 @@ check_table_and_call(Table, Fun, TsMessage, State) -> case riak_kv_ts_util:get_table_ddl(Table) of {ok, Mod, DDL} -> Fun(Mod, DDL, TsMessage, State); + {error, no_type} -> + {reply, table_not_activated_response(Table), State}; {error, missing_helper_module} -> BucketProps = riak_core_bucket:get_bucket( riak_kv_ts_util:table_to_bucket(Table)), diff --git a/src/riak_kv_ts_util.erl b/src/riak_kv_ts_util.erl index c0693fe4cd..0ee393a5e4 100644 --- a/src/riak_kv_ts_util.erl +++ b/src/riak_kv_ts_util.erl @@ -542,8 +542,12 @@ get_data(Key, Table, Mod0, Options) -> end, case Result of {ok, RObj} -> - Record = riak_object:get_value(RObj), - {ok, Record}; + case riak_object:get_value(RObj) of + [] -> + {error, notfound}; + Record -> + {ok, Record} + end; ErrorReason2 -> ErrorReason2 end. diff --git a/src/riak_kv_web.erl b/src/riak_kv_web.erl index f556918f39..abf024c784 100644 --- a/src/riak_kv_web.erl +++ b/src/riak_kv_web.erl @@ -2,7 +2,7 @@ %% %% riak_kv_web: setup Riak's KV HTTP interface %% -%% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved. +%% Copyright (c) 2007-2016 Basho Technologies, Inc. All Rights Reserved. %% %% This file is provided to you under the Apache License, %% Version 2.0 (the "License"); you may not use this file @@ -121,7 +121,15 @@ raw_dispatch(Name) -> {Prefix ++ ["buckets", bucket, "index", field, '*'], riak_kv_wm_index, Props} - ] || {Prefix, Props} <- Props2 ]). + ] || {Prefix, Props} <- Props2 ]) ++ + + lists:flatten( + [ + [{["ts", api_version, "tables", table, "keys"], riak_kv_wm_timeseries_listkeys, Props}, + {["ts", api_version, "tables", table], riak_kv_wm_timeseries, Props}, + {["ts", api_version, "query"], riak_kv_wm_timeseries, Props} + %% {["ts", api_version, "coverage"], riak_kv_wm_timeseries, Props} + ] || {_Prefix, Props} <- Props2]). is_post(Req) -> wrq:method(Req) == 'POST'. diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl new file mode 100644 index 0000000000..1b2e28162d --- /dev/null +++ b/src/riak_kv_wm_timeseries.erl @@ -0,0 +1,805 @@ +%% ------------------------------------------------------------------- +%% +%% riak_kv_wm_timeseries: Webmachine resource for riak TS operations. +%% +%% Copyright (c) 2016 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +%% @doc Resource for Riak TS operations over HTTP. +%% +%% ``` +%% GET /ts/v1/table/Table single-key get +%% DELETE /ts/v1/table/Table single-key delete +%% PUT /ts/v1/table/Table batch put +%% GET /ts/v1/coverage coverage for a query +%% GET/POST /ts/v1/query execute SQL query +%% ''' +%% +%% Request body is expected to be a JSON containing key and/or value(s). +%% Response is a JSON containing data rows with column headers. +%% + +-module(riak_kv_wm_timeseries). + +%% webmachine resource exports +-export([ + init/1, + service_available/2, + is_authorized/2, + forbidden/2, + allowed_methods/2, + process_post/2, + malformed_request/2, + content_types_accepted/2, + resource_exists/2, + delete_resource/2, + content_types_provided/2, + encodings_provided/2, + produce_doc_body/2, + accept_doc_body/2 + ]). + +-include_lib("webmachine/include/webmachine.hrl"). +-include_lib("riak_ql/include/riak_ql_ddl.hrl"). +-include("riak_kv_wm_raw.hrl"). +-include("riak_kv_ts.hrl"). + +-record(ctx, {api_version, + method :: atom(), + prefix, %% string() - prefix for resource uris + timeout, %% integer() - passed-in timeout value in ms + security, %% security context + client, %% riak_client() - the store client + riak, %% local | {node(), atom()} - params for riak client + api_call :: undefined|get|put|delete|query|coverage, + table :: undefined | binary(), + cover_context :: undefined | binary(), + %% data in/out: the following fields are either + %% extracted from the JSON that came in the request body + %% in case of a PUT, or filled out by retrieved values + %% for shipping (as JSON) in response body + key :: undefined | ts_rec(), %% parsed out of JSON that came in the body + data :: undefined | [ts_rec()], %% ditto + query :: undefined | string(), + result :: undefined | ok | {Headers::[binary()], Rows::[ts_rec()]} + | [{entry, proplists:proplist()}] + }). + +-define(DEFAULT_TIMEOUT, 60000). +-define(TABLE_ACTIVATE_WAIT, 30). %% wait until table's bucket type is activated + +-define(CB_RV_SPEC, {boolean(), #wm_reqdata{}, #ctx{}}). +-type ts_rec() :: [riak_pb_ts_codec:ldbvalue()]. + + +-spec init(proplists:proplist()) -> {ok, #ctx{}}. +%% @doc Initialize this resource. This function extracts the +%% 'prefix' and 'riak' properties from the dispatch args. +init(Props) -> + {ok, #ctx{prefix = proplists:get_value(prefix, Props), + riak = proplists:get_value(riak, Props)}}. + +-spec service_available(#wm_reqdata{}, #ctx{}) -> + {boolean(), #wm_reqdata{}, #ctx{}}. +%% @doc Determine whether or not a connection to Riak +%% can be established. This function also takes this +%% opportunity to extract the 'bucket' and 'key' path +%% bindings from the dispatch, as well as any vtag +%% query parameter. +service_available(RD, Ctx = #ctx{riak = RiakProps}) -> + case riak_kv_wm_utils:get_riak_client( + RiakProps, riak_kv_wm_utils:get_client_id(RD)) of + {ok, C} -> + {true, RD, + Ctx#ctx{api_version = wrq:path_info(api_version, RD), + method = wrq:method(RD), + client = C, + table = + case wrq:path_info(table, RD) of + undefined -> undefined; + B -> list_to_binary(riak_kv_wm_utils:maybe_decode_uri(RD, B)) + end + }}; + Error -> + {false, wrq:set_resp_body( + flat_format("Unable to connect to Riak: ~p", [Error]), + wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), + Ctx} + end. + + +is_authorized(ReqData, Ctx) -> + case riak_api_web_security:is_authorized(ReqData) of + false -> + {"Basic realm=\"Riak\"", ReqData, Ctx}; + {true, SecContext} -> + {true, ReqData, Ctx#ctx{security = SecContext}}; + insecure -> + %% XXX 301 may be more appropriate here, but since the http and + %% https port are different and configurable, it is hard to figure + %% out the redirect URL to serve. + {{halt, 426}, + wrq:append_to_resp_body( + <<"Security is enabled and " + "Riak does not accept credentials over HTTP. Try HTTPS instead.">>, ReqData), + Ctx} + end. + + +-spec forbidden(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +forbidden(RD, Ctx) -> + case riak_kv_wm_utils:is_forbidden(RD) of + true -> + {true, RD, Ctx}; + false -> + %%preexec(RD, Ctx) + %%validate_request(RD, Ctx) + %% plug in early, and just do what it takes to do the job + {false, RD, Ctx} + end. +%% Because webmachine chooses to (not) call certain callbacks +%% depending on request method used, sometimes accept_doc_body is not +%% called at all, and we arrive at produce_doc_body empty-handed. +%% This is the case when curl is executed with -X GET and --data. + + +-spec allowed_methods(#wm_reqdata{}, #ctx{}) -> + {[atom()], #wm_reqdata{}, #ctx{}}. +%% @doc Get the list of methods this resource supports. +allowed_methods(RD, Ctx) -> + {['GET', 'POST', 'PUT', 'DELETE'], RD, Ctx}. + + +-spec malformed_request(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +%% @doc Determine whether query parameters, request headers, +%% and request body are badly-formed. +malformed_request(RD, Ctx) -> + %% this is plugged because requests are validated against + %% effective parameters contained in the body (and hence, we need + %% accept_doc_body to parse and extract things out of JSON in the + %% body) + {false, RD, Ctx}. + + +-spec preexec(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +%% * collect any parameters from request body or, failing that, from +%% POST k=v items; +%% * check API version; +%% * validate those parameters against URL and method; +%% * determine which api call to do, and check permissions on that; +preexec(RD, Ctx = #ctx{api_call = Call}) + when Call /= undefined -> + %% been here, figured and executed api call, stored results for + %% shipping to client + {true, RD, Ctx}; +preexec(RD, Ctx) -> + case validate_request(RD, Ctx) of + {true, RD1, Ctx1} -> + case check_permissions(RD1, Ctx1) of + {false, RD2, Ctx2} -> + call_api_function(RD2, Ctx2); + FalseWithDetails -> + FalseWithDetails + end; + FalseWithDetails -> + FalseWithDetails + end. + +-spec validate_request(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +validate_request(RD, Ctx) -> + case wrq:path_info(api_version, RD) of + "v1" -> + validate_request_v1(RD, Ctx); + BadVersion -> + handle_error({unsupported_version, BadVersion}, RD, Ctx) + end. + +-spec validate_request_v1(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +validate_request_v1(RD, Ctx = #ctx{method = Method}) -> + Json = extract_json(RD), + case {Method, string:tokens(wrq:path(RD), "/"), + extract_key(Json), extract_data(Json), + extract_query(Json), extract_cover_context(Json)} of + %% single-key get + {'GET', + ["ts", "v1", "tables", Table], + Key, undefined, undefined, undefined} + when is_list(Table), Key /= undefined -> + valid_params( + RD, Ctx#ctx{api_version = "v1", api_call = get, + table = list_to_binary(Table), key = Key}); + %% single-key delete + {'DELETE', + ["ts", "v1", "tables", Table], + Key, undefined, undefined, undefined} + when is_list(Table), Key /= undefined -> + valid_params( + RD, Ctx#ctx{api_version = "v1", api_call = delete, + table = list_to_binary(Table), key = Key}); + %% batch put + {'PUT', + ["ts", "v1", "tables", Table], + undefined, Data, undefined, undefined} + when is_list(Table), Data /= undefined -> + valid_params( + RD, Ctx#ctx{api_version = "v1", api_call = put, + table = list_to_binary(Table), data = Data}); + %% coverage + {'GET', + ["ts", "v1", "coverage"], + undefined, undefined, Query, undefined} + when is_list(Query) -> + valid_params( + RD, Ctx#ctx{api_version = "v1", api_call = coverage, + query = Query}); + %% query + {Method, + ["ts", "v1", "query"], + undefined, undefined, Query, CoverContext} + when (Method == 'GET' orelse Method == 'POST' orelse Method == 'PUT') + andalso is_list(Query) -> + valid_params( + RD, Ctx#ctx{api_version = "v1", api_call = query, + query = Query, cover_context = CoverContext}); + _Invalid -> + handle_error({malformed_request, Method}, RD, Ctx) + end. + + +-spec valid_params(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +valid_params(RD, Ctx) -> + case wrq:get_qs_value("timeout", none, RD) of + none -> + {true, RD, Ctx}; + TimeoutStr -> + try + Timeout = list_to_integer(TimeoutStr), + {true, RD, Ctx#ctx{timeout = Timeout}} + catch + _:_ -> + handle_error({bad_parameter, "timeout"}, RD, Ctx) + end + end. + +%% This is a special case for curl -G. `curl -G host --data $data` +%% will send the $data in URL instead of in the body, so we try to +%% look for it in req_qs. +extract_json(RD) -> + case proplists:get_value("json", RD#wm_reqdata.req_qs) of + undefined -> + %% if it was a PUT or POST, data is in body + binary_to_list(wrq:req_body(RD)); + BodyInPost -> + BodyInPost + end. + +-spec extract_key(binary()) -> term(). +extract_key(Json) -> + case catch mochijson2:decode(Json) of + {struct, [{<<"key">>, Key}]} -> + %% key alone (it's a get or delete) + validate_ts_record(Key); + Decoded when is_list(Decoded) -> + %% key and data (it's a put) + validate_ts_record( + proplists:get_value(<<"key">>, Decoded)); + _ -> + undefined + end. + +%% because, techically, key and data are 'arguments', we check they +%% are well-formed, too. +-spec extract_data(binary()) -> term(). +extract_data(Json) -> + case catch mochijson2:decode(Json) of + {struct, Decoded} when is_list(Decoded) -> + %% key and data (it's a put) + validate_ts_records( + proplists:get_value(<<"data">>, Decoded)); + _ -> + undefined + end. + +-spec extract_query(binary()) -> term(). +extract_query(Json) -> + case catch mochijson2:decode(Json) of + {struct, Decoded} when is_list(Decoded) -> + validate_ts_query( + proplists:get_value(<<"query">>, Decoded)); + _ -> + undefined + end. + +-spec extract_cover_context(binary()) -> term(). +extract_cover_context(Json) -> + case catch mochijson2:decode(Json) of + Decoded when is_list(Decoded) -> + validate_ts_cover_context( + proplists:get_value(<<"coverage_context">>, Decoded)); + _ -> + undefined + end. + + +validate_ts_record(undefined) -> + undefined; +validate_ts_record(R) when is_list(R) -> + case lists:all( + %% check that all list elements are TS types + fun(X) -> is_integer(X) orelse is_float(X) orelse is_binary(X) end, + R) of + true -> + R; + false -> + undefined + end; +validate_ts_record(_) -> + undefined. + +validate_ts_records(undefined) -> + undefined; +validate_ts_records(RR) when is_list(RR) -> + case lists:all(fun(R) -> validate_ts_record(R) /= undefined end, RR) of + true -> + RR; + false -> + undefined + end; +validate_ts_records(_) -> + undefined. + +validate_ts_query(Q) when is_binary(Q) -> + binary_to_list(Q); +validate_ts_query(_) -> + undefined. + +validate_ts_cover_context(C) when is_binary(C) -> + C; +validate_ts_cover_context(_) -> + undefined. + + +-spec check_permissions(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +%% We have to defer checking permission until we have figured which +%% api call it is, which is done in validate_request, which also needs +%% body, which happens to not be available in Ctx when webmachine +%% would normally call a forbidden callback. I *may* be missing +%% something, but given the extent we have bent the REST rules here, +%% checking permissions at a stage later than webmachine would have +%% done is not a big deal. +check_permissions(RD, Ctx = #ctx{security = undefined}) -> + validate_resource(RD, Ctx); +check_permissions(RD, Ctx = #ctx{table = undefined}) -> + {false, RD, Ctx}; +check_permissions(RD, Ctx = #ctx{security = Security, + api_call = Call, + table = Table}) -> + case riak_core_security:check_permission( + {api_call_to_ts_perm(Call), {Table, Table}}, Security) of + {false, Error, _} -> + handle_error( + {not_permitted, unicode:characters_to_binary(Error, utf8, utf8)}, RD, Ctx); + _ -> + validate_resource(RD, Ctx) + end. + +api_call_to_ts_perm(get) -> + "riak_ts.get"; +api_call_to_ts_perm(put) -> + "riak_ts.put"; +api_call_to_ts_perm(delete) -> + "riak_ts.delete"; +api_call_to_ts_perm(query) -> + "riak_ts.query". + +-spec validate_resource(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +validate_resource(RD, Ctx = #ctx{api_call = Call}) + when Call == query; + Call == coverage -> + %% there is always a resource for queries + {false, RD, Ctx}; +validate_resource(RD, Ctx = #ctx{table = Table}) -> + %% Ensure the bucket type exists, otherwise 404 early. + case riak_kv_wm_utils:bucket_type_exists(Table) of + true -> + {true, RD, Ctx}; + false -> + handle_error({no_such_table, Table}, RD, Ctx) + end. + + +-spec content_types_provided(#wm_reqdata{}, #ctx{}) -> + {[{ContentType::string(), Producer::atom()}], + #wm_reqdata{}, #ctx{}}. +%% @doc List the content types available for representing this resource. +content_types_provided(RD, Ctx) -> + {[{"application/json", produce_doc_body}], RD, Ctx}. + + +-spec encodings_provided(#wm_reqdata{}, #ctx{}) -> + {[{Encoding::string(), Producer::function()}], + #wm_reqdata{}, #ctx{}}. +%% @doc List the encodings available for representing this resource. +%% "identity" and "gzip" are available. +encodings_provided(RD, Ctx) -> + %% identity and gzip + {riak_kv_wm_utils:default_encodings(), RD, Ctx}. + + +-spec content_types_accepted(#wm_reqdata{}, #ctx{}) -> + {[{ContentType::string(), Acceptor::atom()}], + #wm_reqdata{}, #ctx{}}. +content_types_accepted(RD, Ctx) -> + {[{"application/json", accept_doc_body}], RD, Ctx}. + + +-spec resource_exists(#wm_reqdata{}, #ctx{}) -> + {boolean(), #wm_reqdata{}, #ctx{}}. +resource_exists(RD0, Ctx0) -> + case preexec(RD0, Ctx0) of + {true, RD, Ctx} -> + call_api_function(RD, Ctx); + FalseWithDetails -> + FalseWithDetails + end. + +-spec process_post(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +%% @doc Pass through requests to allow POST to function +%% as PUT for clients that do not support PUT. +process_post(RD, Ctx) -> + accept_doc_body(RD, Ctx). + +-spec delete_resource(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +%% same for DELETE +delete_resource(RD, Ctx) -> + accept_doc_body(RD, Ctx). + +-spec accept_doc_body(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +accept_doc_body(RD0, Ctx0) -> + case preexec(RD0, Ctx0) of + {true, RD, Ctx} -> + call_api_function(RD, Ctx); + FalseWithDetails -> + FalseWithDetails + end. + +-spec call_api_function(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +call_api_function(RD, Ctx = #ctx{result = Result}) + when Result /= undefined -> + lager:debug("Function already executed", []), + {true, RD, Ctx}; +call_api_function(RD, Ctx = #ctx{api_call = put, + table = Table, data = Data}) -> + Mod = riak_ql_ddl:make_module_name(Table), + %% convert records to tuples, just for put + Records = [list_to_tuple(R) || R <- Data], + case catch riak_kv_ts_util:validate_rows(Mod, Records) of + {_, {undef, _}} -> + handle_error({no_such_table, Table}, RD, Ctx); + [] -> + case riak_kv_ts_util:put_data(Records, Table, Mod) of + 0 -> + prepare_data_in_body(RD, Ctx#ctx{result = ok}); + ErrorCount -> + handle_error({failed_some_puts, ErrorCount, Table}, RD, Ctx) + end; + BadRowIdxs when is_list(BadRowIdxs) -> + handle_error({invalid_data, BadRowIdxs}, RD, Ctx) + end; + +call_api_function(RD, Ctx0 = #ctx{api_call = get, + table = Table, key = Key, + timeout = Timeout}) -> + Options = + if Timeout == undefined -> []; + true -> [{timeout, Timeout}] + end, + Mod = riak_ql_ddl:make_module_name(Table), + case catch riak_kv_ts_util:get_data(Key, Table, Mod, Options) of + {_, {undef, _}} -> + handle_error({no_such_table, Table}, RD, Ctx0); + {ok, Record} -> + {ColumnNames, Row} = lists:unzip(Record), + %% ColumnTypes = riak_kv_ts_util:get_column_types(ColumnNames, Mod), + %% We don't need column types here as well (for the PB interface, we + %% needed them in order to properly construct tscells) + DataOut = {ColumnNames, [Row]}, + %% all results (from get as well as query) are returned in + %% a uniform 'tabular' form, hence the [] around Row + Ctx = Ctx0#ctx{result = DataOut}, + prepare_data_in_body(RD, Ctx); + {error, notfound} -> + handle_error(notfound, RD, Ctx0); + {error, {bad_key_length, Got, Need}} -> + handle_error({key_element_count_mismatch, Got, Need}, RD, Ctx0); + {error, Reason} -> + handle_error({riak_error, Reason}, RD, Ctx0) + end; + +call_api_function(RD, Ctx = #ctx{api_call = delete, + table = Table, key = Key, + timeout = Timeout}) -> + Options = + if Timeout == undefined -> []; + true -> [{timeout, Timeout}] + end, + Mod = riak_ql_ddl:make_module_name(Table), + case catch riak_kv_ts_util:delete_data(Key, Table, Mod, Options) of + {_, {undef, _}} -> + handle_error({no_such_table, Table}, RD, Ctx); + ok -> + prepare_data_in_body(RD, Ctx#ctx{result = ok}); + {error, {bad_key_length, Got, Need}} -> + handle_error({key_element_count_mismatch, Got, Need}, RD, Ctx); + {error, notfound} -> + handle_error(notfound, RD, Ctx); + {error, Reason} -> + handle_error({riak_error, Reason}, RD, Ctx) + end; + +call_api_function(RD, Ctx = #ctx{api_call = query, + method = Method, + query = Query, cover_context = CoverCtx}) -> + Lexed = riak_ql_lexer:get_tokens(Query), + case riak_ql_parser:parse(Lexed) of + {ok, SQL = ?SQL_SELECT{}} when Method == 'GET' -> + %% inject coverage context + process_query(SQL?SQL_SELECT{cover_context = CoverCtx}, RD, Ctx); + {ok, Other} + when (is_record(Other, ddl_v1) andalso Method == 'POST') orelse + (is_record(Other, riak_sql_describe_v1) andalso Method == 'GET') -> + process_query(Other, RD, Ctx); + {ok, _MethodMismatch} -> + handle_error({inappropriate_sql_for_method, Method}, RD, Ctx); + {error, Reason} -> + handle_error({query_parse_error, Reason}, RD, Ctx) + end; + +call_api_function(RD, Ctx = #ctx{api_call = coverage, + query = Query, + client = Client}) -> + Lexed = riak_ql_lexer:get_tokens(Query), + case riak_ql_parser:parse(Lexed) of + {ok, SQL = ?SQL_SELECT{'FROM' = Table}} -> + Mod = riak_ql_ddl:make_module_name(Table), + case riak_kv_ts_util:compile_to_per_quantum_queries(Mod, SQL) of + {ok, Compiled} -> + Bucket = riak_kv_ts_util:table_to_bucket(Table), + Results = + [begin + Node = proplists:get_value(node, Cover), + {IP, Port} = riak_kv_pb_coverage:node_to_pb_details(Node), + {entry, + [ + {cover_context, + riak_kv_pb_coverage:term_to_checksum_binary({Cover, Range})}, + {ip, IP}, + {port, Port}, + {range, + [ + {field_name, FieldName}, + {lower_bound, StartVal}, + {lower_bound_inclusive, StartIncl}, + {upper_bound, EndVal}, + {upper_bound_inclusive, EndIncl}, + {desc, SQLtext} + ]} + ]} + end || {Cover, + Range = {FieldName, {{StartVal, StartIncl}, {EndVal, EndIncl}}}, + SQLtext} + <- riak_kv_ts_util:sql_to_cover(Client, Compiled, Bucket, [])], + prepare_data_in_body(RD, Ctx#ctx{result = Results}); + {error, _Reason} -> + handle_error(query_compile_fail, RD, Ctx) + end; + {ok, _NonSelectQuery} -> + handle_error(inappropriate_sql_for_coverage, RD, Ctx); + {error, Reason} -> + handle_error({query_parse_error, Reason}, RD, Ctx) + end. + + +process_query(DDL = #ddl_v1{table = Table}, RD, Ctx) -> + {ok, Props1} = riak_kv_ts_util:apply_timeseries_bucket_props(DDL, []), + Props2 = [riak_kv_wm_utils:erlify_bucket_prop(P) || P <- Props1], + %% TODO: let's not bother collecting user properties from (say) + %% sidecar object in body JSON: when #ddl_v2 work is merged, we + %% will have a way to collect those bespoke table properties from + %% WITH clause. + case riak_core_bucket_type:create(Table, Props2) of + ok -> + wait_until_active(Table, RD, Ctx, ?TABLE_ACTIVATE_WAIT); + {error, Reason} -> + handle_error({table_create_fail, Table, Reason}, RD, Ctx) + end; + +process_query(SQL = ?SQL_SELECT{'FROM' = Table}, RD, Ctx0 = #ctx{}) -> + Mod = riak_ql_ddl:make_module_name(Table), + case catch Mod:get_ddl() of + {_, {undef, _}} -> + handle_error({no_such_table, Table}, RD, Ctx0); + DDL -> + case riak_kv_qry:submit(SQL, DDL) of + {ok, Data} -> + {ColumnNames, _ColumnTypes, Rows} = Data, + Ctx = Ctx0#ctx{result = {ColumnNames, Rows}}, + prepare_data_in_body(RD, Ctx); + %% the following timeouts are known and distinguished: + {error, qry_worker_timeout} -> + %% the eleveldb process didn't send us any response after + %% 10 sec (hardcoded in riak_kv_qry), and probably died + handle_error(query_worker_timeout, RD, Ctx0); + {error, backend_timeout} -> + %% the eleveldb process did manage to send us a timeout + %% response + handle_error(backend_timeout, RD, Ctx0); + + {error, Reason} -> + handle_error({query_exec_error, Reason}, RD, Ctx0) + end + end; + +process_query(SQL = #riak_sql_describe_v1{'DESCRIBE' = Table}, RD, Ctx0 = #ctx{}) -> + Mod = riak_ql_ddl:make_module_name(Table), + case catch Mod:get_ddl() of + {_, {undef, _}} -> + handle_error({no_such_table, Table}, RD, Ctx0); + DDL -> + case riak_kv_qry:submit(SQL, DDL) of + {ok, Data} -> + ColumnNames = [<<"Column">>, <<"Type">>, <<"Is Null">>, + <<"Primary Key">>, <<"Local Key">>], + Ctx = Ctx0#ctx{result = {ColumnNames, Data}}, + prepare_data_in_body(RD, Ctx); + {error, Reason} -> + handle_error({query_exec_error, Reason}, RD, Ctx0) + end + end. + + +wait_until_active(Table, RD, Ctx, 0) -> + handle_error({table_activate_fail, Table}, RD, Ctx); +wait_until_active(Table, RD, Ctx, Seconds) -> + case riak_core_bucket_type:activate(Table) of + ok -> + prepare_data_in_body(RD, Ctx#ctx{result = {[], []}}); + %% a way for CREATE TABLE queries to return 'ok' on success + {error, not_ready} -> + timer:sleep(1000), + wait_until_active(Table, RD, Ctx, Seconds - 1); + {error, undefined} -> + %% this is inconceivable because create(Table) has + %% just succeeded, so it's here mostly to pacify + %% the dialyzer (and of course, for the odd chance + %% of Erlang imps crashing nodes between create + %% and activate calls) + handle_error({table_created_missing, Table}, RD, Ctx) + end. + +prepare_data_in_body(RD0, Ctx0) -> + {Json, RD1, Ctx1} = produce_doc_body(RD0, Ctx0), + {true, wrq:append_to_response_body(Json, RD1), Ctx1}. + + +-spec produce_doc_body(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +%% @doc Extract the value of the document, and place it in the +%% response body of the request. +produce_doc_body(RD, Ctx = #ctx{result = ok}) -> + {<<"ok">>, RD, Ctx}; +produce_doc_body(RD, Ctx = #ctx{api_call = Call, + result = {Columns, Rows}}) + when Call == get; + Call == query -> + {mochijson2:encode( + {struct, [{<<"columns">>, Columns}, + {<<"rows">>, Rows}]}), + RD, Ctx}; +produce_doc_body(RD, Ctx = #ctx{api_call = Call, + result = CoverageDetails}) + when Call == coverage -> + SafeCoverageDetails = + [{entry, armor_entry(E)} || {entry, E} <- CoverageDetails], + {mochijson2:encode( + {struct, [{<<"coverage">>, SafeCoverageDetails}]}), + RD, Ctx}. + +armor_entry(EE) -> + lists:map( + fun({cover_context, Bin}) -> + %% prevent list to be read and converted by mochijson2 + %% as utf8 binary + {cover_context, binary_to_list(Bin)}; + (X) -> X + end, EE). + + +error_out(Type, Fmt, Args, RD, Ctx) -> + {Type, + wrq:set_resp_header( + "Content-Type", "text/plain", wrq:append_to_response_body( + flat_format(Fmt, Args), RD)), + Ctx}. + +-spec handle_error(atom()|tuple(), #wm_reqdata{}, #ctx{}) -> {tuple(), #wm_reqdata{}, #ctx{}}. +handle_error(Error, RD, Ctx) -> + case Error of + {unsupported_version, BadVersion} -> + error_out({halt, 412}, + "Unsupported API version ~s", [BadVersion], RD, Ctx); + {malformed_request, Method} -> + error_out({halt, 400}, + "Malformed ~s request", [Method], RD, Ctx); + {bad_parameter, Param} -> + error_out({halt, 400}, + "Bad value for parameter \"~s\"", [Param], RD, Ctx); + {no_such_table, Table} -> + error_out({halt, 404}, + "Table \"~ts\" does not exist", [Table], RD, Ctx); + {failed_some_puts, NoOfFailures, Table} -> + error_out({halt, 400}, + "Failed to put ~b records to table \"~ts\"", [NoOfFailures, Table], RD, Ctx); + {invalid_data, BadRowIdxs} -> + error_out({halt, 400}, + "Invalid record #~s", [hd(BadRowIdxs)], RD, Ctx); + {key_element_count_mismatch, Got, Need} -> + error_out({halt, 400}, + "Incorrect number of elements (~b) for key of length ~b", [Need, Got], RD, Ctx); + notfound -> + error_out({halt, 404}, + "Key not found", [], RD, Ctx); + {riak_error, Detailed} -> + error_out({halt, 500}, + "Internal riak error: ~p", [Detailed], RD, Ctx); + {query_parse_error, Detailed} -> + error_out({halt, 400}, + "Malformed query: ~ts", [Detailed], RD, Ctx); + inappropriate_sql_for_coverage -> + error_out({halt, 400}, + "Inappropriate query for coverage request", [], RD, Ctx); + query_compile_fail -> + error_out({halt, 400}, + "Failed to compile query for coverage request", [], RD, Ctx); + {table_create_fail, Table, Reason} -> + error_out({halt, 500}, + "Failed to create table \"~ts\": ~p", [Table, Reason], RD, Ctx); + query_worker_timeout -> + error_out({halt, 503}, + "Query worker timeout", [], RD, Ctx); + backend_timeout -> + error_out({halt, 503}, + "Storage backend timeout", [], RD, Ctx); + {query_exec_error, Detailed} -> + error_out({halt, 400}, + "Query execution failed: ~ts", [Detailed], RD, Ctx); + {table_activate_fail, Table} -> + error_out({halt, 500}, + "Failed to activate bucket type for table \"~ts\"", [Table], RD, Ctx); + {table_created_missing, Table} -> + error_out({halt, 500}, + "Bucket type for table \"~ts\" disappeared suddenly before activation", [Table], RD, Ctx); + {inappropriate_sql_for_method, Method} -> + error_out({halt, 400}, + "Inappropriate method ~s for SQL query type", [Method], RD, Ctx); + OutOfTheBlue -> + error_out({halt, 418}, + "Phantom error: ~p", [OutOfTheBlue], RD, Ctx) + end. + +flat_format(Format, Args) -> + lists:flatten(io_lib:format(Format, Args)). diff --git a/src/riak_kv_wm_timeseries_listkeys.erl b/src/riak_kv_wm_timeseries_listkeys.erl new file mode 100644 index 0000000000..479a7dbf60 --- /dev/null +++ b/src/riak_kv_wm_timeseries_listkeys.erl @@ -0,0 +1,186 @@ +%% ------------------------------------------------------------------- +%% +%% riak_kv_wm_timeseries_listkeys: Webmachine resource for riak TS +%% streaming operations. +%% +%% Copyright (c) 2016 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +%% @doc Resource for Riak TS operations over HTTP. +%% +%% ``` +%% GET /ts/v1/table/Table/keys list_keys +%% ''' +%% +%% Request body is expected to be a JSON containing key and/or value(s). +%% Response is a JSON containing data rows with column headers. +%% + +-module(riak_kv_wm_timeseries_listkeys). + +%% webmachine resource exports +-export([ + init/1, + service_available/2, + allowed_methods/2, + is_authorized/2, + forbidden/2, + resource_exists/2, + content_types_provided/2, + encodings_provided/2, + produce_doc_body/2 + ]). + +-include("riak_kv_wm_raw.hrl"). +-include_lib("webmachine/include/webmachine.hrl"). + +-record(ctx, {api_version, + riak, + security, + client, + table :: undefined | binary() + }). + +-define(CB_RV_SPEC, {boolean(), #wm_reqdata{}, #ctx{}}). + +-define(DEFAULT_TIMEOUT, 60000). + +-spec init(proplists:proplist()) -> {ok, #ctx{}}. +%% @doc Initialize this resource. This function extracts the +%% 'prefix' and 'riak' properties from the dispatch args. +init(Props) -> + {ok, #ctx{api_version = proplists:get_value(api_version, Props), + riak = proplists:get_value(riak, Props), + table = proplists:get_value(table, Props)}}. + +-spec service_available(#wm_reqdata{}, #ctx{}) -> + {boolean(), #wm_reqdata{}, #ctx{}}. +%% @doc Determine whether or not a connection to Riak +%% can be established. This function also takes this +%% opportunity to extract the 'bucket' and 'key' path +%% bindings from the dispatch, as well as any vtag +%% query parameter. +service_available(RD, Ctx = #ctx{riak = RiakProps}) -> + case riak_kv_wm_utils:get_riak_client( + RiakProps, riak_kv_wm_utils:get_client_id(RD)) of + {ok, C} -> + {true, RD, + Ctx#ctx{api_version = wrq:path_info(api_version, RD), + client = C, + table = + case wrq:path_info(table, RD) of + undefined -> undefined; + B -> list_to_binary(riak_kv_wm_utils:maybe_decode_uri(RD, B)) + end + }}; + Error -> + {false, wrq:set_resp_body( + flat_format("Unable to connect to Riak: ~p", [Error]), + wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), + Ctx} + end. + + +is_authorized(ReqData, Ctx) -> + case riak_api_web_security:is_authorized(ReqData) of + false -> + {"Basic realm=\"Riak\"", ReqData, Ctx}; + {true, SecContext} -> + {true, ReqData, Ctx#ctx{security = SecContext}}; + insecure -> + {{halt, 426}, + wrq:append_to_resp_body( + <<"Security is enabled and " + "Riak does not accept credentials over HTTP. Try HTTPS instead.">>, ReqData), + Ctx} + end. + + +-spec forbidden(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +forbidden(RD, Ctx) -> + case riak_kv_wm_utils:is_forbidden(RD) of + true -> + {true, RD, Ctx}; + false -> + {false, RD, Ctx} + end. + +-spec allowed_methods(#wm_reqdata{}, #ctx{}) -> + {[atom()], #wm_reqdata{}, #ctx{}}. +%% @doc Get the list of methods this resource supports. +allowed_methods(RD, Ctx) -> + {['GET'], RD, Ctx}. + + +-spec resource_exists(#wm_reqdata{}, #ctx{}) -> + {boolean(), #wm_reqdata{}, #ctx{}}. +resource_exists(RD, #ctx{table = Table} = Ctx) -> + {riak_kv_wm_utils:bucket_type_exists(Table), RD, Ctx}. + + +-spec encodings_provided(#wm_reqdata{}, #ctx{}) -> + {[{Encoding::string(), Producer::function()}], + #wm_reqdata{}, #ctx{}}. +%% @doc List the encodings available for representing this resource. +%% "identity" and "gzip" are available. +encodings_provided(RD, Ctx) -> + {riak_kv_wm_utils:default_encodings(), RD, Ctx}. + +-spec content_types_provided(#wm_reqdata{}, #ctx{}) -> + {[{ContentType::string(), Producer::atom()}], + #wm_reqdata{}, #ctx{}}. +%% @doc List the content types available for representing this resource. +content_types_provided(RD, Ctx) -> + {[{"application/json", produce_doc_body}], RD, Ctx}. + + +produce_doc_body(RD, Ctx = #ctx{table = Table, + client = Client}) -> + F = fun() -> + {ok, ReqId} = riak_client:stream_list_keys( + {Table, Table}, undefined, Client), + stream_keys(ReqId) + end, + {{stream, {<<>>, F}}, RD, Ctx}. + +stream_keys(ReqId) -> + receive + %% skip empty shipments + {ReqId, {keys, []}} -> + stream_keys(ReqId); + {ReqId, From, {keys, []}} -> + _ = riak_kv_keys_fsm:ack_keys(From), + stream_keys(ReqId); + {ReqId, From, {keys, Keys}} -> + _ = riak_kv_keys_fsm:ack_keys(From), + {ts_keys_to_json(Keys), fun() -> stream_keys(ReqId) end}; + {ReqId, {keys, Keys}} -> + {ts_keys_to_json(Keys), fun() -> stream_keys(ReqId) end}; + {ReqId, done} -> + {<<>>, done}; + {ReqId, {error, timeout}} -> + {mochijson2:encode({struct, [{error, timeout}]}), done} + end. + +ts_keys_to_json(Keys) -> + KeysTerm = [tuple_to_list(sext:decode(A)) + || A <- Keys, A /= []], + mochijson2:encode({struct, [{<<"keys">>, KeysTerm}]}). + +flat_format(Format, Args) -> + lists:flatten(io_lib:format(Format, Args)). From 97d0b58b2a8230639d2308f8a26f2264eb68bd71 Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Wed, 17 Feb 2016 01:38:11 +0200 Subject: [PATCH 083/122] detect and deal with {error,no_type} (table not acivated case) Complementary to 9b62335 by atill The call to claimant to determine a table's bucket type status is an expensive operation. It was introduced in a1c1e2e6, with the purpose to detect and report the error condition where a TS operation is attempted on a non-activated bucket type, but found to cause serious performance degradation. Andy's commit removed the expensive call; this commit captures and reports the error condition in various places in riak_kv_{wm,pb}_timeseries and riak_kv_ts_util. --- src/riak_kv_pb_timeseries.erl | 28 ++++++++++----------- src/riak_kv_ts_util.erl | 40 +++++++++++++++++++++++------- src/riak_kv_w1c_worker.erl | 46 +++++++++++++++++++---------------- src/riak_kv_wm_timeseries.erl | 8 +++--- 4 files changed, 75 insertions(+), 47 deletions(-) diff --git a/src/riak_kv_pb_timeseries.erl b/src/riak_kv_pb_timeseries.erl index 1f4b45494d..67f4ffd6dc 100644 --- a/src/riak_kv_pb_timeseries.erl +++ b/src/riak_kv_pb_timeseries.erl @@ -403,10 +403,14 @@ sub_putreq_common(Mod, Table, Data, State) -> case catch riak_kv_ts_util:validate_rows(Mod, Data) of [] -> case riak_kv_ts_util:put_data(Data, Table, Mod) of - 0 -> + ok -> {reply, #tsputresp{}, State}; - ErrorCount -> - {reply, failed_put_response(ErrorCount), State} + {error, {some_failed, ErrorCount}} -> + {reply, failed_put_response(ErrorCount), State}; + {error, no_type} -> + {reply, table_not_activated_response(Table), State}; + {error, OtherReason} -> + {reply, make_rpberrresp(?E_PUT, to_string(OtherReason)), State} end; BadRowIdxs when is_list(BadRowIdxs) -> {reply, validate_rows_error_response(BadRowIdxs), State} @@ -438,6 +442,8 @@ sub_tsgetreq(Mod, _DDL, #tsgetreq{table = Table, {reply, #tsgetresp{columns = make_tscolumndescription_list( ColumnNames, ColumnTypes), rows = Rows}, State}; + {error, no_type} -> + {reply, table_not_activated_response(Table), State}; {error, {bad_key_length, Got, Need}} -> {reply, key_element_count_mismatch(Got, Need), State}; {error, notfound} -> @@ -462,6 +468,8 @@ sub_tsdelreq(Mod, _DDL, #tsdelreq{table = Table, CompoundKey, Table, Mod, Options, VClock) of ok -> {reply, tsdelresp, State}; + {error, no_type} -> + {reply, table_not_activated_response(Table), State}; {error, {bad_key_length, Got, Need}} -> {reply, key_element_count_mismatch(Got, Need), State}; {error, notfound} -> @@ -570,14 +578,9 @@ sub_tsqueryreq(Mod, DDL, SQL, State) -> {ok, Data} -> {reply, make_tsquery_resp(Mod, SQL, Data), State}; - %% %% parser messages have a tuple for Reason: - %% {error, {E, Reason}} when is_atom(E), is_binary(Reason) -> - %% ErrorMessage = flat_format("~p: ~s", [E, Reason]), - %% {reply, make_rpberrresp(?E_SUBMIT, ErrorMessage), State}; - %% parser errors are now handled uniformly (will be caught - %% here in the last case branch) - %% the following timeouts are known and distinguished: + {error, no_type} -> + {reply, table_not_activated_response(DDL#ddl_v1.table), State}; {error, qry_worker_timeout} -> %% the eleveldb process didn't send us any response after %% 10 sec (hardcoded in riak_kv_qry), and probably died @@ -628,10 +631,7 @@ check_table_and_call(Table, Fun, TsMessage, State) -> {error, missing_helper_module} -> BucketProps = riak_core_bucket:get_bucket( riak_kv_ts_util:table_to_bucket(Table)), - {reply, missing_helper_module(Table, BucketProps), State}; - {error, _} -> - {reply, table_not_activated_response(Table), - State} + {reply, missing_helper_module(Table, BucketProps), State} end. diff --git a/src/riak_kv_ts_util.erl b/src/riak_kv_ts_util.erl index 0ee393a5e4..319010d514 100644 --- a/src/riak_kv_ts_util.erl +++ b/src/riak_kv_ts_util.erl @@ -382,14 +382,32 @@ validate_rows(Mod, Rows) -> lists:reverse(BadRowIdxs). --spec put_data([riak_pb_ts_codec:tsrow()], binary(), module()) -> integer(). -%% return count of records we failed to put -put_data(Data, Table, Mod) when is_binary(Table) -> +-spec put_data([[riak_pb_ts_codec:ldbvalue()]], binary()) -> + ok | {error, {some_failed, integer()}} | {error, term()}. +put_data(Data, Table) -> + put_data(Data, Table, riak_ql_ddl:make_module_name(Table)). + +-spec put_data([[riak_pb_ts_codec:ldbvalue()]], binary(), module()) -> + ok | {error, {some_failed, integer()}} | {error, term()}. +put_data(Data, Table, Mod) -> DDL = Mod:get_ddl(), - Bucket = riak_kv_ts_util:table_to_bucket(Table), - BucketProps = riak_core_bucket:get_bucket(Bucket), - NVal = proplists:get_value(n_val, BucketProps), + Bucket = table_to_bucket(Table), + case riak_core_bucket:get_bucket(Bucket) of + {error, Reason} -> + %% happens when, for example, the table has not been + %% activated (Reason == no_type) + {error, Reason}; + BucketProps -> + case put_data_to_partitions(Data, Bucket, BucketProps, DDL, Mod) of + 0 -> + ok; + NErrors -> + {error, {some_failed, NErrors}} + end + end. +put_data_to_partitions(Data, Bucket, BucketProps, DDL, Mod) -> + NVal = proplists:get_value(n_val, BucketProps), PartitionedData = partition_data(Data, Bucket, BucketProps, DDL, Mod), PreflistData = add_preflists(PartitionedData, NVal, riak_core_node_watcher:nodes(riak_kv)), @@ -436,9 +454,13 @@ put_data(Data, Table, Mod) when is_binary(Table) -> end, {[], 0}, PreflistData), Responses = riak_kv_w1c_worker:async_put_replies(ReqIds, []), - length(lists:filter(fun({error, _}) -> true; - (_) -> false - end, Responses)) + FailReqs. + _NErrors = + length( + lists:filter( + fun({error, _}) -> true; + (_) -> false + end, Responses)) + FailReqs. + -spec partition_data(Data :: list(term()), Bucket :: {binary(), binary()}, diff --git a/src/riak_kv_w1c_worker.erl b/src/riak_kv_w1c_worker.erl index 2b78f8b1f8..4637b22000 100644 --- a/src/riak_kv_w1c_worker.erl +++ b/src/riak_kv_w1c_worker.erl @@ -1,5 +1,5 @@ %% ------------------------------------------------------------------- -%% Copyright (c) 2015 Basho Technologies, Inc. All Rights Reserved. +%% Copyright (c) 2015, 2016 Basho Technologies, Inc. All Rights Reserved. %% %% This file is provided to you under the Apache License, %% Version 2.0 (the "License"); you may not use this file @@ -80,27 +80,31 @@ start_link(Name) -> %% {error, term()} put(RObj0, Options) -> Bucket = riak_object:bucket(RObj0), - BucketProps = riak_core_bucket:get_bucket(Bucket), - NVal = proplists:get_value(n_val, BucketProps), - {RObj, Key, EncodeFn} = kv_or_ts_details(RObj0, - riak_object:get_ts_local_key(RObj0)), - DocIdx = chash_key(Bucket, Key, BucketProps), - Preflist = - case proplists:get_value(sloppy_quorum, Options, true) of - true -> - UpNodes = riak_core_node_watcher:nodes(riak_kv), - riak_core_apl:get_apl_ann(DocIdx, NVal, UpNodes); - false -> - riak_core_apl:get_primary_apl(DocIdx, NVal, riak_kv) - end, + case riak_core_bucket:get_bucket(Bucket) of + {error, Reason} -> + {error, Reason}; + BucketProps -> + NVal = proplists:get_value(n_val, BucketProps), + {RObj, Key, EncodeFn} = + kv_or_ts_details(RObj0, riak_object:get_ts_local_key(RObj0)), + DocIdx = chash_key(Bucket, Key, BucketProps), + Preflist = + case proplists:get_value(sloppy_quorum, Options, true) of + true -> + UpNodes = riak_core_node_watcher:nodes(riak_kv), + riak_core_apl:get_apl_ann(DocIdx, NVal, UpNodes); + false -> + riak_core_apl:get_primary_apl(DocIdx, NVal, riak_kv) + end, - case validate_options(NVal, Preflist, Options, BucketProps) of - {ok, W, PW} -> - synchronize_put( - async_put( - RObj, W, PW, Bucket, NVal, Key, EncodeFn, Preflist), Options); - Error -> - Error + case validate_options(NVal, Preflist, Options, BucketProps) of + {ok, W, PW} -> + synchronize_put( + async_put( + RObj, W, PW, Bucket, NVal, Key, EncodeFn, Preflist), Options); + Error -> + Error + end end. -spec async_put(RObj :: riak_object:riak_object(), diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 1b2e28162d..5ada15507a 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -493,10 +493,12 @@ call_api_function(RD, Ctx = #ctx{api_call = put, handle_error({no_such_table, Table}, RD, Ctx); [] -> case riak_kv_ts_util:put_data(Records, Table, Mod) of - 0 -> + ok -> prepare_data_in_body(RD, Ctx#ctx{result = ok}); - ErrorCount -> - handle_error({failed_some_puts, ErrorCount, Table}, RD, Ctx) + {error, {some_failed, ErrorCount}} -> + handle_error({failed_some_puts, ErrorCount, Table}, RD, Ctx); + {error, no_ctype} -> + handle_error({table_activate_fail, Table}, RD, Ctx) end; BadRowIdxs when is_list(BadRowIdxs) -> handle_error({invalid_data, BadRowIdxs}, RD, Ctx) From 4fd5b2737a1193aa742fa36e305e1fa62c9295d1 Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Wed, 17 Feb 2016 04:22:54 +0200 Subject: [PATCH 084/122] make TS query served from own WM resource andalso, check detailed per-table permissions when security is on. --- src/riak_kv_web.erl | 2 +- src/riak_kv_wm_timeseries.erl | 224 ++----------- src/riak_kv_wm_timeseries_query.erl | 500 ++++++++++++++++++++++++++++ 3 files changed, 536 insertions(+), 190 deletions(-) create mode 100644 src/riak_kv_wm_timeseries_query.erl diff --git a/src/riak_kv_web.erl b/src/riak_kv_web.erl index abf024c784..9d7043923e 100644 --- a/src/riak_kv_web.erl +++ b/src/riak_kv_web.erl @@ -127,7 +127,7 @@ raw_dispatch(Name) -> [ [{["ts", api_version, "tables", table, "keys"], riak_kv_wm_timeseries_listkeys, Props}, {["ts", api_version, "tables", table], riak_kv_wm_timeseries, Props}, - {["ts", api_version, "query"], riak_kv_wm_timeseries, Props} + {["ts", api_version, "query"], riak_kv_wm_timeseries_query, Props} %% {["ts", api_version, "coverage"], riak_kv_wm_timeseries, Props} ] || {_Prefix, Props} <- Props2]). diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 5ada15507a..45511a95e4 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -27,7 +27,6 @@ %% DELETE /ts/v1/table/Table single-key delete %% PUT /ts/v1/table/Table batch put %% GET /ts/v1/coverage coverage for a query -%% GET/POST /ts/v1/query execute SQL query %% ''' %% %% Request body is expected to be a JSON containing key and/or value(s). @@ -66,16 +65,15 @@ security, %% security context client, %% riak_client() - the store client riak, %% local | {node(), atom()} - params for riak client - api_call :: undefined|get|put|delete|query|coverage, + api_call :: undefined|get|put|delete|coverage, table :: undefined | binary(), - cover_context :: undefined | binary(), %% data in/out: the following fields are either %% extracted from the JSON that came in the request body %% in case of a PUT, or filled out by retrieved values %% for shipping (as JSON) in response body key :: undefined | ts_rec(), %% parsed out of JSON that came in the body data :: undefined | [ts_rec()], %% ditto - query :: undefined | string(), + query :: string(), result :: undefined | ok | {Headers::[binary()], Rows::[ts_rec()]} | [{entry, proplists:proplist()}] }). @@ -162,7 +160,7 @@ forbidden(RD, Ctx) -> {[atom()], #wm_reqdata{}, #ctx{}}. %% @doc Get the list of methods this resource supports. allowed_methods(RD, Ctx) -> - {['GET', 'POST', 'PUT', 'DELETE'], RD, Ctx}. + {['GET', 'PUT', 'DELETE'], RD, Ctx}. -spec malformed_request(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. @@ -213,12 +211,11 @@ validate_request(RD, Ctx) -> validate_request_v1(RD, Ctx = #ctx{method = Method}) -> Json = extract_json(RD), case {Method, string:tokens(wrq:path(RD), "/"), - extract_key(Json), extract_data(Json), - extract_query(Json), extract_cover_context(Json)} of + extract_key(Json), extract_data(Json), extract_query(Json)} of %% single-key get {'GET', ["ts", "v1", "tables", Table], - Key, undefined, undefined, undefined} + Key, undefined, undefined} when is_list(Table), Key /= undefined -> valid_params( RD, Ctx#ctx{api_version = "v1", api_call = get, @@ -226,7 +223,7 @@ validate_request_v1(RD, Ctx = #ctx{method = Method}) -> %% single-key delete {'DELETE', ["ts", "v1", "tables", Table], - Key, undefined, undefined, undefined} + Key, undefined, undefined} when is_list(Table), Key /= undefined -> valid_params( RD, Ctx#ctx{api_version = "v1", api_call = delete, @@ -234,7 +231,7 @@ validate_request_v1(RD, Ctx = #ctx{method = Method}) -> %% batch put {'PUT', ["ts", "v1", "tables", Table], - undefined, Data, undefined, undefined} + undefined, Data, undefined} when is_list(Table), Data /= undefined -> valid_params( RD, Ctx#ctx{api_version = "v1", api_call = put, @@ -242,20 +239,11 @@ validate_request_v1(RD, Ctx = #ctx{method = Method}) -> %% coverage {'GET', ["ts", "v1", "coverage"], - undefined, undefined, Query, undefined} + undefined, undefined, Query} when is_list(Query) -> valid_params( RD, Ctx#ctx{api_version = "v1", api_call = coverage, query = Query}); - %% query - {Method, - ["ts", "v1", "query"], - undefined, undefined, Query, CoverContext} - when (Method == 'GET' orelse Method == 'POST' orelse Method == 'PUT') - andalso is_list(Query) -> - valid_params( - RD, Ctx#ctx{api_version = "v1", api_call = query, - query = Query, cover_context = CoverContext}); _Invalid -> handle_error({malformed_request, Method}, RD, Ctx) end. @@ -290,15 +278,13 @@ extract_json(RD) -> -spec extract_key(binary()) -> term(). extract_key(Json) -> - case catch mochijson2:decode(Json) of - {struct, [{<<"key">>, Key}]} -> - %% key alone (it's a get or delete) - validate_ts_record(Key); - Decoded when is_list(Decoded) -> + try mochijson2:decode(Json) of + {struct, Decoded} when is_list(Decoded) -> %% key and data (it's a put) validate_ts_record( - proplists:get_value(<<"key">>, Decoded)); - _ -> + proplists:get_value(<<"key">>, Decoded)) + catch + _:_ -> undefined end. @@ -306,32 +292,24 @@ extract_key(Json) -> %% are well-formed, too. -spec extract_data(binary()) -> term(). extract_data(Json) -> - case catch mochijson2:decode(Json) of + try mochijson2:decode(Json) of {struct, Decoded} when is_list(Decoded) -> %% key and data (it's a put) validate_ts_records( - proplists:get_value(<<"data">>, Decoded)); - _ -> + proplists:get_value(<<"data">>, Decoded)) + catch + _:_ -> undefined end. -spec extract_query(binary()) -> term(). extract_query(Json) -> - case catch mochijson2:decode(Json) of + try mochijson2:decode(Json) of {struct, Decoded} when is_list(Decoded) -> validate_ts_query( - proplists:get_value(<<"query">>, Decoded)); - _ -> - undefined - end. - --spec extract_cover_context(binary()) -> term(). -extract_cover_context(Json) -> - case catch mochijson2:decode(Json) of - Decoded when is_list(Decoded) -> - validate_ts_cover_context( - proplists:get_value(<<"coverage_context">>, Decoded)); - _ -> + proplists:get_value(<<"query">>, Decoded)) + catch + _:_ -> undefined end. @@ -368,11 +346,6 @@ validate_ts_query(Q) when is_binary(Q) -> validate_ts_query(_) -> undefined. -validate_ts_cover_context(C) when is_binary(C) -> - C; -validate_ts_cover_context(_) -> - undefined. - -spec check_permissions(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. %% We have to defer checking permission until we have figured which @@ -390,7 +363,7 @@ check_permissions(RD, Ctx = #ctx{security = Security, api_call = Call, table = Table}) -> case riak_core_security:check_permission( - {api_call_to_ts_perm(Call), {Table, Table}}, Security) of + {api_call_to_ts_perm(Call), Table}, Security) of {false, Error, _} -> handle_error( {not_permitted, unicode:characters_to_binary(Error, utf8, utf8)}, RD, Ctx); @@ -404,14 +377,12 @@ api_call_to_ts_perm(put) -> "riak_ts.put"; api_call_to_ts_perm(delete) -> "riak_ts.delete"; -api_call_to_ts_perm(query) -> - "riak_ts.query". +api_call_to_ts_perm(coverage) -> + "riak_ts.coverage". -spec validate_resource(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -validate_resource(RD, Ctx = #ctx{api_call = Call}) - when Call == query; - Call == coverage -> - %% there is always a resource for queries +validate_resource(RD, Ctx = #ctx{api_call = coverage}) -> + %% there is always a resource for coverage {false, RD, Ctx}; validate_resource(RD, Ctx = #ctx{table = Table}) -> %% Ensure the bucket type exists, otherwise 404 early. @@ -426,7 +397,6 @@ validate_resource(RD, Ctx = #ctx{table = Table}) -> -spec content_types_provided(#wm_reqdata{}, #ctx{}) -> {[{ContentType::string(), Producer::atom()}], #wm_reqdata{}, #ctx{}}. -%% @doc List the content types available for representing this resource. content_types_provided(RD, Ctx) -> {[{"application/json", produce_doc_body}], RD, Ctx}. @@ -434,10 +404,7 @@ content_types_provided(RD, Ctx) -> -spec encodings_provided(#wm_reqdata{}, #ctx{}) -> {[{Encoding::string(), Producer::function()}], #wm_reqdata{}, #ctx{}}. -%% @doc List the encodings available for representing this resource. -%% "identity" and "gzip" are available. encodings_provided(RD, Ctx) -> - %% identity and gzip {riak_kv_wm_utils:default_encodings(), RD, Ctx}. @@ -498,7 +465,7 @@ call_api_function(RD, Ctx = #ctx{api_call = put, {error, {some_failed, ErrorCount}} -> handle_error({failed_some_puts, ErrorCount, Table}, RD, Ctx); {error, no_ctype} -> - handle_error({table_activate_fail, Table}, RD, Ctx) + handle_error({no_such_table, Table}, RD, Ctx) end; BadRowIdxs when is_list(BadRowIdxs) -> handle_error({invalid_data, BadRowIdxs}, RD, Ctx) @@ -554,24 +521,6 @@ call_api_function(RD, Ctx = #ctx{api_call = delete, handle_error({riak_error, Reason}, RD, Ctx) end; -call_api_function(RD, Ctx = #ctx{api_call = query, - method = Method, - query = Query, cover_context = CoverCtx}) -> - Lexed = riak_ql_lexer:get_tokens(Query), - case riak_ql_parser:parse(Lexed) of - {ok, SQL = ?SQL_SELECT{}} when Method == 'GET' -> - %% inject coverage context - process_query(SQL?SQL_SELECT{cover_context = CoverCtx}, RD, Ctx); - {ok, Other} - when (is_record(Other, ddl_v1) andalso Method == 'POST') orelse - (is_record(Other, riak_sql_describe_v1) andalso Method == 'GET') -> - process_query(Other, RD, Ctx); - {ok, _MethodMismatch} -> - handle_error({inappropriate_sql_for_method, Method}, RD, Ctx); - {error, Reason} -> - handle_error({query_parse_error, Reason}, RD, Ctx) - end; - call_api_function(RD, Ctx = #ctx{api_call = coverage, query = Query, client = Client}) -> @@ -617,104 +566,22 @@ call_api_function(RD, Ctx = #ctx{api_call = coverage, end. -process_query(DDL = #ddl_v1{table = Table}, RD, Ctx) -> - {ok, Props1} = riak_kv_ts_util:apply_timeseries_bucket_props(DDL, []), - Props2 = [riak_kv_wm_utils:erlify_bucket_prop(P) || P <- Props1], - %% TODO: let's not bother collecting user properties from (say) - %% sidecar object in body JSON: when #ddl_v2 work is merged, we - %% will have a way to collect those bespoke table properties from - %% WITH clause. - case riak_core_bucket_type:create(Table, Props2) of - ok -> - wait_until_active(Table, RD, Ctx, ?TABLE_ACTIVATE_WAIT); - {error, Reason} -> - handle_error({table_create_fail, Table, Reason}, RD, Ctx) - end; - -process_query(SQL = ?SQL_SELECT{'FROM' = Table}, RD, Ctx0 = #ctx{}) -> - Mod = riak_ql_ddl:make_module_name(Table), - case catch Mod:get_ddl() of - {_, {undef, _}} -> - handle_error({no_such_table, Table}, RD, Ctx0); - DDL -> - case riak_kv_qry:submit(SQL, DDL) of - {ok, Data} -> - {ColumnNames, _ColumnTypes, Rows} = Data, - Ctx = Ctx0#ctx{result = {ColumnNames, Rows}}, - prepare_data_in_body(RD, Ctx); - %% the following timeouts are known and distinguished: - {error, qry_worker_timeout} -> - %% the eleveldb process didn't send us any response after - %% 10 sec (hardcoded in riak_kv_qry), and probably died - handle_error(query_worker_timeout, RD, Ctx0); - {error, backend_timeout} -> - %% the eleveldb process did manage to send us a timeout - %% response - handle_error(backend_timeout, RD, Ctx0); - - {error, Reason} -> - handle_error({query_exec_error, Reason}, RD, Ctx0) - end - end; - -process_query(SQL = #riak_sql_describe_v1{'DESCRIBE' = Table}, RD, Ctx0 = #ctx{}) -> - Mod = riak_ql_ddl:make_module_name(Table), - case catch Mod:get_ddl() of - {_, {undef, _}} -> - handle_error({no_such_table, Table}, RD, Ctx0); - DDL -> - case riak_kv_qry:submit(SQL, DDL) of - {ok, Data} -> - ColumnNames = [<<"Column">>, <<"Type">>, <<"Is Null">>, - <<"Primary Key">>, <<"Local Key">>], - Ctx = Ctx0#ctx{result = {ColumnNames, Data}}, - prepare_data_in_body(RD, Ctx); - {error, Reason} -> - handle_error({query_exec_error, Reason}, RD, Ctx0) - end - end. - - -wait_until_active(Table, RD, Ctx, 0) -> - handle_error({table_activate_fail, Table}, RD, Ctx); -wait_until_active(Table, RD, Ctx, Seconds) -> - case riak_core_bucket_type:activate(Table) of - ok -> - prepare_data_in_body(RD, Ctx#ctx{result = {[], []}}); - %% a way for CREATE TABLE queries to return 'ok' on success - {error, not_ready} -> - timer:sleep(1000), - wait_until_active(Table, RD, Ctx, Seconds - 1); - {error, undefined} -> - %% this is inconceivable because create(Table) has - %% just succeeded, so it's here mostly to pacify - %% the dialyzer (and of course, for the odd chance - %% of Erlang imps crashing nodes between create - %% and activate calls) - handle_error({table_created_missing, Table}, RD, Ctx) - end. - prepare_data_in_body(RD0, Ctx0) -> {Json, RD1, Ctx1} = produce_doc_body(RD0, Ctx0), {true, wrq:append_to_response_body(Json, RD1), Ctx1}. -spec produce_doc_body(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -%% @doc Extract the value of the document, and place it in the -%% response body of the request. produce_doc_body(RD, Ctx = #ctx{result = ok}) -> {<<"ok">>, RD, Ctx}; -produce_doc_body(RD, Ctx = #ctx{api_call = Call, - result = {Columns, Rows}}) - when Call == get; - Call == query -> +produce_doc_body(RD, Ctx = #ctx{api_call = get, + result = {Columns, Rows}}) -> {mochijson2:encode( {struct, [{<<"columns">>, Columns}, {<<"rows">>, Rows}]}), RD, Ctx}; -produce_doc_body(RD, Ctx = #ctx{api_call = Call, - result = CoverageDetails}) - when Call == coverage -> +produce_doc_body(RD, Ctx = #ctx{api_call = coverage, + result = CoverageDetails}) -> SafeCoverageDetails = [{entry, armor_entry(E)} || {entry, E} <- CoverageDetails], {mochijson2:encode( @@ -744,6 +611,9 @@ handle_error(Error, RD, Ctx) -> {unsupported_version, BadVersion} -> error_out({halt, 412}, "Unsupported API version ~s", [BadVersion], RD, Ctx); + {not_permitted, Table} -> + error_out({halt, 401}, + "Access to table ~s not allowed", [Table], RD, Ctx); {malformed_request, Method} -> error_out({halt, 400}, "Malformed ~s request", [Method], RD, Ctx); @@ -776,31 +646,7 @@ handle_error(Error, RD, Ctx) -> "Inappropriate query for coverage request", [], RD, Ctx); query_compile_fail -> error_out({halt, 400}, - "Failed to compile query for coverage request", [], RD, Ctx); - {table_create_fail, Table, Reason} -> - error_out({halt, 500}, - "Failed to create table \"~ts\": ~p", [Table, Reason], RD, Ctx); - query_worker_timeout -> - error_out({halt, 503}, - "Query worker timeout", [], RD, Ctx); - backend_timeout -> - error_out({halt, 503}, - "Storage backend timeout", [], RD, Ctx); - {query_exec_error, Detailed} -> - error_out({halt, 400}, - "Query execution failed: ~ts", [Detailed], RD, Ctx); - {table_activate_fail, Table} -> - error_out({halt, 500}, - "Failed to activate bucket type for table \"~ts\"", [Table], RD, Ctx); - {table_created_missing, Table} -> - error_out({halt, 500}, - "Bucket type for table \"~ts\" disappeared suddenly before activation", [Table], RD, Ctx); - {inappropriate_sql_for_method, Method} -> - error_out({halt, 400}, - "Inappropriate method ~s for SQL query type", [Method], RD, Ctx); - OutOfTheBlue -> - error_out({halt, 418}, - "Phantom error: ~p", [OutOfTheBlue], RD, Ctx) + "Failed to compile query for coverage request", [], RD, Ctx) end. flat_format(Format, Args) -> diff --git a/src/riak_kv_wm_timeseries_query.erl b/src/riak_kv_wm_timeseries_query.erl new file mode 100644 index 0000000000..1c8ca0045e --- /dev/null +++ b/src/riak_kv_wm_timeseries_query.erl @@ -0,0 +1,500 @@ +%% ------------------------------------------------------------------- +%% +%% riak_kv_wm_timeseries_query: Webmachine resource for riak TS query call. +%% +%% Copyright (c) 2016 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +%% @doc Resource for Riak TS operations over HTTP. +%% +%% ``` +%% GET/POST /ts/v1/query execute SQL query +%% ''' +%% +%% Request body is expected to be a JSON containing key and/or value(s). +%% Response is a JSON containing data rows with column headers. +%% + +-module(riak_kv_wm_timeseries_query). + +%% webmachine resource exports +-export([ + init/1, + service_available/2, + is_authorized/2, + forbidden/2, + allowed_methods/2, + process_post/2, + malformed_request/2, + content_types_accepted/2, + resource_exists/2, + content_types_provided/2, + encodings_provided/2, + produce_doc_body/2, + accept_doc_body/2 + ]). + +-include_lib("webmachine/include/webmachine.hrl"). +-include_lib("riak_ql/include/riak_ql_ddl.hrl"). +-include("riak_kv_wm_raw.hrl"). +-include("riak_kv_ts.hrl"). + +-record(ctx, {api_version, + method :: atom(), + prefix, %% string() - prefix for resource uris + timeout, %% integer() - passed-in timeout value in ms + security, %% security context + client, %% riak_client() - the store client + riak, %% local | {node(), atom()} - params for riak client + table :: undefined | binary(), + query :: undefined | string(), + compiled_query :: undefined | #ddl_v1{} | #riak_sql_describe_v1{} | ?SQL_SELECT{}, + result :: undefined | ok | {Headers::[binary()], Rows::[ts_rec()]} | + [{entry, proplists:proplist()}] + }). + +-define(DEFAULT_TIMEOUT, 60000). +-define(TABLE_ACTIVATE_WAIT, 30). %% wait until table's bucket type is activated + +-define(CB_RV_SPEC, {boolean(), #wm_reqdata{}, #ctx{}}). +-type ts_rec() :: [riak_pb_ts_codec:ldbvalue()]. + + +-spec init(proplists:proplist()) -> {ok, #ctx{}}. +%% @doc Initialize this resource. This function extracts the +%% 'prefix' and 'riak' properties from the dispatch args. +init(Props) -> + {ok, #ctx{prefix = proplists:get_value(prefix, Props), + riak = proplists:get_value(riak, Props)}}. + +-spec service_available(#wm_reqdata{}, #ctx{}) -> + {boolean(), #wm_reqdata{}, #ctx{}}. +%% @doc Determine whether or not a connection to Riak +%% can be established. This function also takes this +%% opportunity to extract the 'bucket' and 'key' path +%% bindings from the dispatch, as well as any vtag +%% query parameter. +service_available(RD, Ctx = #ctx{riak = RiakProps}) -> + case riak_kv_wm_utils:get_riak_client( + RiakProps, riak_kv_wm_utils:get_client_id(RD)) of + {ok, C} -> + {true, RD, + Ctx#ctx{api_version = wrq:path_info(api_version, RD), + method = wrq:method(RD), + client = C, + table = + case wrq:path_info(table, RD) of + undefined -> undefined; + B -> list_to_binary(riak_kv_wm_utils:maybe_decode_uri(RD, B)) + end + }}; + Error -> + {false, wrq:set_resp_body( + flat_format("Unable to connect to Riak: ~p", [Error]), + wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), + Ctx} + end. + + +is_authorized(ReqData, Ctx) -> + case riak_api_web_security:is_authorized(ReqData) of + false -> + {"Basic realm=\"Riak\"", ReqData, Ctx}; + {true, SecContext} -> + {true, ReqData, Ctx#ctx{security = SecContext}}; + insecure -> + %% XXX 301 may be more appropriate here, but since the http and + %% https port are different and configurable, it is hard to figure + %% out the redirect URL to serve. + {{halt, 426}, + wrq:append_to_resp_body( + <<"Security is enabled and " + "Riak does not accept credentials over HTTP. Try HTTPS instead.">>, ReqData), + Ctx} + end. + + +-spec forbidden(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +forbidden(RD, Ctx) -> + case riak_kv_wm_utils:is_forbidden(RD) of + true -> + {true, RD, Ctx}; + false -> + %% plug in early, and just do what it takes to do the job + {false, RD, Ctx} + end. +%% Because webmachine chooses to (not) call certain callbacks +%% depending on request method used, sometimes accept_doc_body is not +%% called at all, and we arrive at produce_doc_body empty-handed. +%% This is the case when curl is executed with -G and --data. + + +-spec allowed_methods(#wm_reqdata{}, #ctx{}) -> + {[atom()], #wm_reqdata{}, #ctx{}}. +allowed_methods(RD, Ctx) -> + {['GET', 'POST'], RD, Ctx}. + + +-spec malformed_request(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +malformed_request(RD, Ctx) -> + %% this is plugged because requests are validated against + %% effective query contained in the body (and hence, we need + %% accept_doc_body to parse and extract things out of JSON first) + {false, RD, Ctx}. + + +-spec preexec(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +%% * extract query from request body or, failing that, from +%% POST k=v items, try to compile it; +%% * check API version; +%% * validate query type against HTTP method; +%% * check permissions on the query type. +preexec(RD, Ctx) -> + case validate_request(RD, Ctx) of + {true, RD1, Ctx1} -> + case check_permissions(RD1, Ctx1) of + {false, RD2, Ctx2} -> + call_api_function(RD2, Ctx2); + FalseWithDetails -> + FalseWithDetails + end; + FalseWithDetails -> + FalseWithDetails + end. + +-spec validate_request(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +validate_request(RD, Ctx) -> + case wrq:path_info(api_version, RD) of + "v1" -> + validate_request_v1(RD, Ctx); + BadVersion -> + handle_error({unsupported_version, BadVersion}, RD, Ctx) + end. + +-spec validate_request_v1(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +validate_request_v1(RD, Ctx = #ctx{method = Method}) -> + Json = extract_json(RD), + case {Method, string:tokens(wrq:path(RD), "/"), + extract_query(Json), extract_cover_context(Json)} of + {Method, ["ts", "v1", "query"], + Query, CoverContext} + when (Method == 'GET' orelse Method == 'POST') + andalso is_list(Query) -> + case riak_ql_parser:ql_parse( + riak_ql_lexer:get_tokens(Query)) of + {error, Reason} -> + handle_error({query_parse_error, Reason}, RD, Ctx); + {ddl, DDL} -> + valid_params( + RD, Ctx#ctx{api_version = "v1", + query = Query, + compiled_query = DDL}); + {Type, Compiled} when Type == select; + Type == describe -> + {ok, SQL} = riak_kv_ts_util:build_sql_record( + Type, Compiled, CoverContext), + valid_params( + RD, Ctx#ctx{api_version = "v1", + query = Query, + compiled_query = SQL}) + end; + _Invalid -> + handle_error({malformed_request, Method}, RD, Ctx) + end. + + +-spec valid_params(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +valid_params(RD, Ctx) -> + %% no params currently for query + {true, RD, Ctx}. + +%% This is a special case for curl -G. `curl -G host --data $data` +%% will send the $data in URL instead of in the body, so we try to +%% look for it in req_qs. +extract_json(RD) -> + case proplists:get_value("json", RD#wm_reqdata.req_qs) of + undefined -> + %% if it was a PUT or POST, data is in body + binary_to_list(wrq:req_body(RD)); + BodyInPost -> + BodyInPost + end. + +-spec extract_query(binary()) -> term(). +extract_query(Json) -> + try mochijson2:decode(Json) of + {struct, Decoded} when is_list(Decoded) -> + validate_ts_query( + proplists:get_value(<<"query">>, Decoded)) + catch + _:_ -> + undefined + end. + +-spec extract_cover_context(binary()) -> term(). +extract_cover_context(Json) -> + try mochijson2:decode(Json) of + {struct, Decoded} when is_list(Decoded) -> + validate_ts_cover_context( + proplists:get_value(<<"coverage_context">>, Decoded)) + catch + _:_ -> + undefined + end. + + +validate_ts_query(Q) when is_binary(Q) -> + binary_to_list(Q); +validate_ts_query(_) -> + undefined. + +validate_ts_cover_context(C) when is_binary(C) -> + C; +validate_ts_cover_context(_) -> + undefined. + + +-spec check_permissions(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +check_permissions(RD, Ctx = #ctx{security = undefined}) -> + {false, RD, Ctx}; +check_permissions(RD, Ctx = #ctx{security = Security, + compiled_query = CompiledQry}) -> + case riak_core_security:check_permission( + decode_query_permissions(CompiledQry), Security) of + {false, Error, _} -> + handle_error( + {not_permitted, unicode:characters_to_binary(Error, utf8, utf8)}, RD, Ctx); + _ -> + {false, RD, Ctx} + end. + +decode_query_permissions(#ddl_v1{table = NewBucketType}) -> + {"riak_kv.ts_create_table", NewBucketType}; +decode_query_permissions(?SQL_SELECT{'FROM' = Table}) -> + {"riak_kv.ts_query", Table}; +decode_query_permissions(#riak_sql_describe_v1{'DESCRIBE' = Table}) -> + {"riak_kv.ts_describe", Table}. + + +-spec content_types_provided(#wm_reqdata{}, #ctx{}) -> + {[{ContentType::string(), Producer::atom()}], + #wm_reqdata{}, #ctx{}}. +content_types_provided(RD, Ctx) -> + {[{"application/json", produce_doc_body}], RD, Ctx}. + + +-spec encodings_provided(#wm_reqdata{}, #ctx{}) -> + {[{Encoding::string(), Producer::function()}], + #wm_reqdata{}, #ctx{}}. +encodings_provided(RD, Ctx) -> + {riak_kv_wm_utils:default_encodings(), RD, Ctx}. + + +-spec content_types_accepted(#wm_reqdata{}, #ctx{}) -> + {[{ContentType::string(), Acceptor::atom()}], + #wm_reqdata{}, #ctx{}}. +content_types_accepted(RD, Ctx) -> + {[{"application/json", accept_doc_body}], RD, Ctx}. + + +-spec resource_exists(#wm_reqdata{}, #ctx{}) -> + {boolean(), #wm_reqdata{}, #ctx{}}. +resource_exists(RD0, Ctx0) -> + case preexec(RD0, Ctx0) of + {true, RD, Ctx} -> + call_api_function(RD, Ctx); + FalseWithDetails -> + FalseWithDetails + end. + +-spec process_post(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +%% @doc Pass through requests to allow POST to function +%% as PUT for clients that do not support PUT. +process_post(RD, Ctx) -> + accept_doc_body(RD, Ctx). + +-spec accept_doc_body(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +accept_doc_body(RD0, Ctx0) -> + case preexec(RD0, Ctx0) of + {true, RD, Ctx} -> + call_api_function(RD, Ctx); + FalseWithDetails -> + FalseWithDetails + end. + +-spec call_api_function(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +call_api_function(RD, Ctx = #ctx{result = Result}) + when Result /= undefined -> + lager:debug("Function already executed", []), + {true, RD, Ctx}; +call_api_function(RD, Ctx = #ctx{method = Method, + compiled_query = CompiledQry}) -> + case CompiledQry of + SQL = ?SQL_SELECT{} when Method == 'GET' -> + %% inject coverage context + process_query(SQL, RD, Ctx); + Other when (is_record(Other, ddl_v1) andalso Method == 'POST') orelse + (is_record(Other, riak_sql_describe_v1) andalso Method == 'GET') -> + process_query(Other, RD, Ctx); + _Other -> + handle_error({inappropriate_sql_for_method, Method}, RD, Ctx) + end. + + +process_query(DDL = #ddl_v1{table = Table}, RD, Ctx) -> + {ok, Props1} = riak_kv_ts_util:apply_timeseries_bucket_props(DDL, []), + Props2 = [riak_kv_wm_utils:erlify_bucket_prop(P) || P <- Props1], + %% TODO: let's not bother collecting user properties from (say) + %% sidecar object in body JSON: when #ddl_v2 work is merged, we + %% will have a way to collect those bespoke table properties from + %% WITH clause. + case riak_core_bucket_type:create(Table, Props2) of + ok -> + wait_until_active(Table, RD, Ctx, ?TABLE_ACTIVATE_WAIT); + {error, Reason} -> + handle_error({table_create_fail, Table, Reason}, RD, Ctx) + end; + +process_query(SQL = ?SQL_SELECT{'FROM' = Table}, RD, Ctx0 = #ctx{}) -> + Mod = riak_ql_ddl:make_module_name(Table), + case catch Mod:get_ddl() of + {_, {undef, _}} -> + handle_error({no_such_table, Table}, RD, Ctx0); + DDL -> + case riak_kv_qry:submit(SQL, DDL) of + {ok, Data} -> + {ColumnNames, _ColumnTypes, Rows} = Data, + Ctx = Ctx0#ctx{result = {ColumnNames, Rows}}, + prepare_data_in_body(RD, Ctx); + %% the following timeouts are known and distinguished: + {error, qry_worker_timeout} -> + %% the eleveldb process didn't send us any response after + %% 10 sec (hardcoded in riak_kv_qry), and probably died + handle_error(query_worker_timeout, RD, Ctx0); + {error, backend_timeout} -> + %% the eleveldb process did manage to send us a timeout + %% response + handle_error(backend_timeout, RD, Ctx0); + + {error, Reason} -> + handle_error({query_exec_error, Reason}, RD, Ctx0) + end + end; + +process_query(SQL = #riak_sql_describe_v1{'DESCRIBE' = Table}, RD, Ctx0 = #ctx{}) -> + Mod = riak_ql_ddl:make_module_name(Table), + case catch Mod:get_ddl() of + {_, {undef, _}} -> + handle_error({no_such_table, Table}, RD, Ctx0); + DDL -> + case riak_kv_qry:submit(SQL, DDL) of + {ok, Data} -> + ColumnNames = [<<"Column">>, <<"Type">>, <<"Is Null">>, + <<"Primary Key">>, <<"Local Key">>], + Ctx = Ctx0#ctx{result = {ColumnNames, Data}}, + prepare_data_in_body(RD, Ctx); + {error, Reason} -> + handle_error({query_exec_error, Reason}, RD, Ctx0) + end + end. + + +wait_until_active(Table, RD, Ctx, 0) -> + handle_error({table_activate_fail, Table}, RD, Ctx); +wait_until_active(Table, RD, Ctx, Seconds) -> + case riak_core_bucket_type:activate(Table) of + ok -> + prepare_data_in_body(RD, Ctx#ctx{result = {[],[]}}); + %% a way for CREATE TABLE queries to return 'ok' on success + {error, not_ready} -> + timer:sleep(1000), + wait_until_active(Table, RD, Ctx, Seconds - 1); + {error, undefined} -> + %% this is inconceivable because create(Table) has + %% just succeeded, so it's here mostly to pacify + %% the dialyzer (and of course, for the odd chance + %% of Erlang imps crashing nodes between create + %% and activate calls) + handle_error({table_created_missing, Table}, RD, Ctx) + end. + +prepare_data_in_body(RD0, Ctx0) -> + {Json, RD1, Ctx1} = produce_doc_body(RD0, Ctx0), + {true, wrq:append_to_response_body(Json, RD1), Ctx1}. + + +-spec produce_doc_body(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +%% @doc Extract the value of the document, and place it in the +%% response body of the request. +produce_doc_body(RD, Ctx = #ctx{result = {Columns, Rows}}) -> + {mochijson2:encode( + {struct, [{<<"columns">>, Columns}, + {<<"rows">>, Rows}]}), + RD, Ctx}. + + +error_out(Type, Fmt, Args, RD, Ctx) -> + {Type, + wrq:set_resp_header( + "Content-Type", "text/plain", wrq:append_to_response_body( + flat_format(Fmt, Args), RD)), + Ctx}. + +-spec handle_error(atom()|tuple(), #wm_reqdata{}, #ctx{}) -> {tuple(), #wm_reqdata{}, #ctx{}}. +handle_error(Error, RD, Ctx) -> + case Error of + {unsupported_version, BadVersion} -> + error_out({halt, 412}, + "Unsupported API version ~s", [BadVersion], RD, Ctx); + {not_permitted, Table} -> + error_out({halt, 401}, + "Access to table ~s not allowed", [Table], RD, Ctx); + {malformed_request, Method} -> + error_out({halt, 400}, + "Malformed ~s request", [Method], RD, Ctx); + {no_such_table, Table} -> + error_out({halt, 404}, + "Table \"~ts\" does not exist", [Table], RD, Ctx); + {query_parse_error, Detailed} -> + error_out({halt, 400}, + "Malformed query: ~ts", [Detailed], RD, Ctx); + {table_create_fail, Table, Reason} -> + error_out({halt, 500}, + "Failed to create table \"~ts\": ~p", [Table, Reason], RD, Ctx); + query_worker_timeout -> + error_out({halt, 503}, + "Query worker timeout", [], RD, Ctx); + backend_timeout -> + error_out({halt, 503}, + "Storage backend timeout", [], RD, Ctx); + {query_exec_error, Detailed} -> + error_out({halt, 400}, + "Query execution failed: ~ts", [Detailed], RD, Ctx); + {table_activate_fail, Table} -> + error_out({halt, 500}, + "Failed to activate bucket type for table \"~ts\"", [Table], RD, Ctx); + {table_created_missing, Table} -> + error_out({halt, 500}, + "Bucket type for table \"~ts\" disappeared suddenly before activation", [Table], RD, Ctx); + {inappropriate_sql_for_method, Method} -> + error_out({halt, 400}, + "Inappropriate method ~s for SQL query type", [Method], RD, Ctx) + end. + +flat_format(Format, Args) -> + lists:flatten(io_lib:format(Format, Args)). From d5286934a3e59692c68507bfc574d0de092b8d09 Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Wed, 17 Feb 2016 18:58:27 +0200 Subject: [PATCH 085/122] ignore non-relevant parameters in json for TS calls --- src/riak_kv_wm_timeseries.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 45511a95e4..5dae075295 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -215,7 +215,7 @@ validate_request_v1(RD, Ctx = #ctx{method = Method}) -> %% single-key get {'GET', ["ts", "v1", "tables", Table], - Key, undefined, undefined} + Key, _, _} when is_list(Table), Key /= undefined -> valid_params( RD, Ctx#ctx{api_version = "v1", api_call = get, @@ -223,7 +223,7 @@ validate_request_v1(RD, Ctx = #ctx{method = Method}) -> %% single-key delete {'DELETE', ["ts", "v1", "tables", Table], - Key, undefined, undefined} + Key, _, _} when is_list(Table), Key /= undefined -> valid_params( RD, Ctx#ctx{api_version = "v1", api_call = delete, @@ -231,7 +231,7 @@ validate_request_v1(RD, Ctx = #ctx{method = Method}) -> %% batch put {'PUT', ["ts", "v1", "tables", Table], - undefined, Data, undefined} + _, Data, _} when is_list(Table), Data /= undefined -> valid_params( RD, Ctx#ctx{api_version = "v1", api_call = put, From 50acf38041097adad3da88576117bb4766bbc7b9 Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Wed, 17 Feb 2016 23:54:51 +0200 Subject: [PATCH 086/122] repin bitcask, other deps back to riak_ts-develop because there were no changes for them to progress to 1.3, whereby they lose the version and become riak_ts-develop. --- rebar.config | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rebar.config b/rebar.config index 5691757ced..399e2ea6fe 100644 --- a/rebar.config +++ b/rebar.config @@ -28,11 +28,11 @@ {deps, [ {sidejob, ".*", {git, "git://github.com/basho/sidejob.git", {branch, "develop"}}}, {erlang_js, ".*", {git, "git://github.com/basho/erlang_js.git", {tag, "1.3.0"}}}, - {bitcask, ".*", {git, "git://github.com/basho/bitcask.git", {branch, "riak_ts-develop-1.3"}}}, + {bitcask, ".*", {git, "git://github.com/basho/bitcask.git", {branch, "riak_ts-develop"}}}, {eper, ".*", {git, "git://github.com/basho/eper.git", {tag, "0.78"}}}, {sext, ".*", {git, "git://github.com/basho/sext.git", {tag, "1.1p3"}}}, - {riak_pipe, ".*", {git, "git://github.com/basho/riak_pipe.git", {branch, "riak_ts-develop-1.3"}}}, - {riak_api, ".*", {git, "git://github.com/basho/riak_api.git", {branch, "riak_ts-develop-1.3"}}}, + {riak_pipe, ".*", {git, "git://github.com/basho/riak_pipe.git", {branch, "riak_ts-develop"}}}, + {riak_api, ".*", {git, "git://github.com/basho/riak_api.git", {branch, "riak_ts-develop"}}}, {riak_dt, ".*", {git, "git://github.com/basho/riak_dt.git", {branch, "develop"}}}, {msgpack, ".*", {git, "git://github.com/msgpack/msgpack-erlang.git", {tag, "0.3.5"}}}, {riak_ql, ".*", {git, "git@github.com:basho/riak_ql.git", {branch, "develop"}}}, From ea22a8fe7ebac249b84e5ec58498f75931a2bd6b Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Wed, 24 Feb 2016 22:55:12 +0200 Subject: [PATCH 087/122] filter empty records at collection site in riak_kv_qry_worker This improves on 5c56530. --- src/riak_kv_qry.erl | 13 ++----------- src/riak_kv_qry_worker.erl | 4 +++- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/src/riak_kv_qry.erl b/src/riak_kv_qry.erl index 4aa9751bf1..d1dff62d4d 100644 --- a/src/riak_kv_qry.erl +++ b/src/riak_kv_qry.erl @@ -103,17 +103,8 @@ maybe_submit_to_queue(SQL, ?DDL{table = BucketType} = DDL) -> {error,_} = Error -> Error; {ok, Queries} -> - case maybe_await_query_results( - riak_kv_qry_queue:put_on_queue(self(), Queries, DDL)) of - {ok, {ColNames, ColTypes, PossiblyWithEmptyRecords}} -> - %% filter out empty records - {ok, - {ColNames, ColTypes, - [R || R <- PossiblyWithEmptyRecords, - R /= [[]]]}}; - {error, Reason} -> - {error, Reason} - end + maybe_await_query_results( + riak_kv_qry_queue:put_on_queue(self(), Queries, DDL)) end; {false, Errors} -> {error, {invalid_query, format_query_syntax_errors(Errors)}} diff --git a/src/riak_kv_qry_worker.erl b/src/riak_kv_qry_worker.erl index 064523de05..99550529b0 100644 --- a/src/riak_kv_qry_worker.erl +++ b/src/riak_kv_qry_worker.erl @@ -279,7 +279,9 @@ prepare_final_results(#state{ %% prepare_final_results2(#riak_sel_clause_v1{ col_return_types = ColTypes, col_names = ColNames}, Rows) -> - {ColNames, ColTypes, Rows}. + %% filter out empty records + FinalRows = [R || R <- Rows, R /= [[]]], + {ColNames, ColTypes, FinalRows}. %%%=================================================================== %%% Unit tests From 5c81ba54d23a7822bd399b047ff81f38dd449a40 Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Fri, 26 Feb 2016 02:08:10 +0200 Subject: [PATCH 088/122] wm_timeseries: also support keys in url for get & delete --- src/riak_kv_web.erl | 4 +- src/riak_kv_wm_timeseries.erl | 136 +++++++++++++++++++++++++++++++++- 2 files changed, 134 insertions(+), 6 deletions(-) diff --git a/src/riak_kv_web.erl b/src/riak_kv_web.erl index 9d7043923e..4314762aa6 100644 --- a/src/riak_kv_web.erl +++ b/src/riak_kv_web.erl @@ -125,8 +125,8 @@ raw_dispatch(Name) -> lists:flatten( [ - [{["ts", api_version, "tables", table, "keys"], riak_kv_wm_timeseries_listkeys, Props}, - {["ts", api_version, "tables", table], riak_kv_wm_timeseries, Props}, + [{["ts", api_version, "tables", table, "list_keys"], riak_kv_wm_timeseries_listkeys, Props}, + {["ts", api_version, "tables", table, "keys", '*'], riak_kv_wm_timeseries, Props}, {["ts", api_version, "query"], riak_kv_wm_timeseries_query, Props} %% {["ts", api_version, "coverage"], riak_kv_wm_timeseries, Props} ] || {_Prefix, Props} <- Props2]). diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 5dae075295..0ccd44c53c 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -208,13 +208,25 @@ validate_request(RD, Ctx) -> end. -spec validate_request_v1(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -validate_request_v1(RD, Ctx = #ctx{method = Method}) -> +validate_request_v1(RD, Ctx) -> + case string:tokens(wrq:path(RD), "/") of + ["ts", "v1", "tables", Table, "keys" | KeysInUrl] + when KeysInUrl /= [] -> + KeysInUrlUnquoted = lists:map(fun mochiweb_util:unquote/1, KeysInUrl), + validate_request_v1_with({Table, KeysInUrlUnquoted}, RD, Ctx); + _ -> + validate_request_v1_with(json, RD, Ctx) + end. + +-spec validate_request_v1_with(json | {string(), [string()]}, #wm_reqdata{}, #ctx{}) -> + ?CB_RV_SPEC. +validate_request_v1_with(json, RD, Ctx = #ctx{method = Method}) -> Json = extract_json(RD), case {Method, string:tokens(wrq:path(RD), "/"), extract_key(Json), extract_data(Json), extract_query(Json)} of %% single-key get {'GET', - ["ts", "v1", "tables", Table], + ["ts", "v1", "tables", Table, "keys"], Key, _, _} when is_list(Table), Key /= undefined -> valid_params( @@ -222,7 +234,7 @@ validate_request_v1(RD, Ctx = #ctx{method = Method}) -> table = list_to_binary(Table), key = Key}); %% single-key delete {'DELETE', - ["ts", "v1", "tables", Table], + ["ts", "v1", "tables", Table, "keys"], Key, _, _} when is_list(Table), Key /= undefined -> valid_params( @@ -230,7 +242,7 @@ validate_request_v1(RD, Ctx = #ctx{method = Method}) -> table = list_to_binary(Table), key = Key}); %% batch put {'PUT', - ["ts", "v1", "tables", Table], + ["ts", "v1", "tables", Table, "keys"], _, Data, _} when is_list(Table), Data /= undefined -> valid_params( @@ -246,8 +258,112 @@ validate_request_v1(RD, Ctx = #ctx{method = Method}) -> query = Query}); _Invalid -> handle_error({malformed_request, Method}, RD, Ctx) + end; + +validate_request_v1_with({Table, KeysInUrl}, RD, Ctx = #ctx{method = Method}) -> + %% only get and delete can have keys in url + case {Method, + path_elements_to_key(Table, KeysInUrl)} of + {'GET', {ok, Key}} -> + valid_params( + RD, Ctx#ctx{api_version = "v1", api_call = get, + table = list_to_binary(Table), key = Key}); + %% single-key delete + {'DELETE', {ok, Key}} -> + valid_params( + RD, Ctx#ctx{api_version = "v1", api_call = delete, + table = list_to_binary(Table), key = Key}); + {_, {error, Reason}} -> + handle_error(Reason, RD, Ctx); + {'PUT', _} -> + handle_error(url_key_with_put, RD, Ctx) + end. + +%% extract keys from path elements in the URL (.../K1/V1/K2/V2 -> +%% [{K1, V1}, {K2, V2}]), check with Table's DDL to make sure keys are +%% correct and values are of (convertible to) appropriate types, and +%% return the KV list +-spec path_elements_to_key(string(), [string()]) -> + {ok, [{string(), riak_pb_ts_codec:ldbvalue()}]} | + {error, atom()|tuple()}. +path_elements_to_key(Table, PEList) -> + Mod = riak_ql_ddl:make_module_name(list_to_binary(Table)), + try + DDL = Mod:get_ddl(), + #ddl_v1{local_key = #key_v1{ast = LK}} = DDL, + TableKeyLength = length(LK), + if TableKeyLength * 2 == length(PEList) -> + %% values with field names: "f1/v1/f2/v2/f3/v3" + %% 1. check that supplied key fields exist and values + %% supplied are convertible to their types + FVList = + [convert_fv(Table, Mod, K, V) + || {K, V} <- empair(PEList, [])], + %% 2. possibly reorder field-value pairs to match the LK order + OrderedKeyValues = + ensure_lk_order_and_strip(DDL, FVList), + {ok, OrderedKeyValues}; + TableKeyLength == length(PEList) -> + %% bare values: "v1/v2/v3" + %% 1. retrieve field values from the DDL + Fields = [F || #param_v1{name = F} <- LK], + FVList = + [convert_fv(Table, Mod, K, V) + || {K, V} <- lists:zip(Fields, PEList)], + {_, OrderedKeyValues} = + lists:unzip(FVList), + {ok, OrderedKeyValues}; + el/=se -> + {error, url_unpaired_keys} + end + catch + error:undef -> + {error, {no_such_table, Table}}; + throw:ConvertFailed -> + {error, ConvertFailed} end. +empair([], Q) -> lists:reverse(Q); +empair([K, V | T], Q) -> empair(T, [{K, V}|Q]). + +convert_fv(Table, Mod, FieldRaw, V) -> + Field = [list_to_binary(X) || X <- string:tokens(FieldRaw, ".")], + try + case Mod:is_field_valid(Field) of + true -> + case Mod:get_field_type(Field) of + varchar -> + {Field, list_to_binary(V)}; + sint64 -> + {Field, list_to_integer(V)}; + double -> + %% list_to_float("42") will fail, so + try + {Field, list_to_float(V)} + catch + error:badarg -> + {Field, float(list_to_integer(V))} + end; + timestamp -> + case list_to_integer(V) of + BadValue when BadValue < 1 -> + throw({url_key_bad_value, Table, Field}); + GoodValue -> + {Field, GoodValue} + end + end; + false -> + throw({url_key_bad_key, Table, Field}) + end + catch + error:badarg -> + %% rethrow with key, for more informative reporting + throw({url_key_bad_value, Table, Field}) + end. + +ensure_lk_order_and_strip(#ddl_v1{local_key = #key_v1{ast = LK}}, FVList) -> + [proplists:get_value(F, FVList) + || #param_v1{name = F} <- LK]. -spec valid_params(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. valid_params(RD, Ctx) -> @@ -617,6 +733,9 @@ handle_error(Error, RD, Ctx) -> {malformed_request, Method} -> error_out({halt, 400}, "Malformed ~s request", [Method], RD, Ctx); + url_key_with_put -> + error_out({halt, 400}, + "Malformed PUT request (did you mean a GET with keys in URL?)", [], RD, Ctx); {bad_parameter, Param} -> error_out({halt, 400}, "Bad value for parameter \"~s\"", [Param], RD, Ctx); @@ -632,6 +751,15 @@ handle_error(Error, RD, Ctx) -> {key_element_count_mismatch, Got, Need} -> error_out({halt, 400}, "Incorrect number of elements (~b) for key of length ~b", [Need, Got], RD, Ctx); + {url_key_bad_key, Table, Key} -> + error_out({halt, 400}, + "Table \"~ts\" has no field named \"~s\"", [Table, Key], RD, Ctx); + {url_key_bad_value, Table, Key} -> + error_out({halt, 400}, + "Bad value for field \"~s\" in table \"~ts\"", [Key, Table], RD, Ctx); + url_unpaired_keys -> + error_out({halt, 400}, + "Unpaired field/value for key spec in URL", [], RD, Ctx); notfound -> error_out({halt, 404}, "Key not found", [], RD, Ctx); From 8d9fcd276b99de253ed0425680d8de8b85eef0a7 Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Fri, 26 Feb 2016 03:31:09 +0200 Subject: [PATCH 089/122] wm_timeseries: disable key-fields-in-json-body mode for TS get and delete The version with both key-in-json and key-in-url is forked into own branch feature-az-http_ts_api_with_keys_in_json_in_body. If a decision is taken to resurrect that branch, there is a complementary branch, of the same name, in riak-erlang-http-client, which can produce requests with key in json. --- src/riak_kv_wm_timeseries.erl | 41 ++++++----------------------------- 1 file changed, 7 insertions(+), 34 deletions(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 0ccd44c53c..bb84d3e544 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -68,9 +68,10 @@ api_call :: undefined|get|put|delete|coverage, table :: undefined | binary(), %% data in/out: the following fields are either - %% extracted from the JSON that came in the request body - %% in case of a PUT, or filled out by retrieved values - %% for shipping (as JSON) in response body + %% extracted from the JSON/path elements that came in + %% the request body in case of a PUT, or filled out by + %% retrieved values for shipping (as JSON) in response + %% body key :: undefined | ts_rec(), %% parsed out of JSON that came in the body data :: undefined | [ts_rec()], %% ditto query :: string(), @@ -223,27 +224,11 @@ validate_request_v1(RD, Ctx) -> validate_request_v1_with(json, RD, Ctx = #ctx{method = Method}) -> Json = extract_json(RD), case {Method, string:tokens(wrq:path(RD), "/"), - extract_key(Json), extract_data(Json), extract_query(Json)} of - %% single-key get - {'GET', - ["ts", "v1", "tables", Table, "keys"], - Key, _, _} - when is_list(Table), Key /= undefined -> - valid_params( - RD, Ctx#ctx{api_version = "v1", api_call = get, - table = list_to_binary(Table), key = Key}); - %% single-key delete - {'DELETE', - ["ts", "v1", "tables", Table, "keys"], - Key, _, _} - when is_list(Table), Key /= undefined -> - valid_params( - RD, Ctx#ctx{api_version = "v1", api_call = delete, - table = list_to_binary(Table), key = Key}); + extract_data(Json), extract_query(Json)} of %% batch put {'PUT', ["ts", "v1", "tables", Table, "keys"], - _, Data, _} + Data, _} when is_list(Table), Data /= undefined -> valid_params( RD, Ctx#ctx{api_version = "v1", api_call = put, @@ -251,7 +236,7 @@ validate_request_v1_with(json, RD, Ctx = #ctx{method = Method}) -> %% coverage {'GET', ["ts", "v1", "coverage"], - undefined, undefined, Query} + undefined, Query} when is_list(Query) -> valid_params( RD, Ctx#ctx{api_version = "v1", api_call = coverage, @@ -392,18 +377,6 @@ extract_json(RD) -> BodyInPost end. --spec extract_key(binary()) -> term(). -extract_key(Json) -> - try mochijson2:decode(Json) of - {struct, Decoded} when is_list(Decoded) -> - %% key and data (it's a put) - validate_ts_record( - proplists:get_value(<<"key">>, Decoded)) - catch - _:_ -> - undefined - end. - %% because, techically, key and data are 'arguments', we check they %% are well-formed, too. -spec extract_data(binary()) -> term(). From e7b677b9fb4454d773288ef6a7f950484fb056f7 Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Fri, 26 Feb 2016 18:32:06 +0200 Subject: [PATCH 090/122] wm_timeseries: use POST for batch puts --- src/riak_kv_wm_timeseries.erl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index bb84d3e544..be1b8669a4 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -161,7 +161,7 @@ forbidden(RD, Ctx) -> {[atom()], #wm_reqdata{}, #ctx{}}. %% @doc Get the list of methods this resource supports. allowed_methods(RD, Ctx) -> - {['GET', 'PUT', 'DELETE'], RD, Ctx}. + {['GET', 'POST', 'DELETE'], RD, Ctx}. -spec malformed_request(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. @@ -226,7 +226,7 @@ validate_request_v1_with(json, RD, Ctx = #ctx{method = Method}) -> case {Method, string:tokens(wrq:path(RD), "/"), extract_data(Json), extract_query(Json)} of %% batch put - {'PUT', + {'POST', ["ts", "v1", "tables", Table, "keys"], Data, _} when is_list(Table), Data /= undefined -> @@ -260,7 +260,7 @@ validate_request_v1_with({Table, KeysInUrl}, RD, Ctx = #ctx{method = Method}) -> table = list_to_binary(Table), key = Key}); {_, {error, Reason}} -> handle_error(Reason, RD, Ctx); - {'PUT', _} -> + {'POST', _} -> handle_error(url_key_with_put, RD, Ctx) end. @@ -708,7 +708,7 @@ handle_error(Error, RD, Ctx) -> "Malformed ~s request", [Method], RD, Ctx); url_key_with_put -> error_out({halt, 400}, - "Malformed PUT request (did you mean a GET with keys in URL?)", [], RD, Ctx); + "Malformed POST request (did you mean a GET with keys in URL?)", [], RD, Ctx); {bad_parameter, Param} -> error_out({halt, 400}, "Bad value for parameter \"~s\"", [Param], RD, Ctx); From b89ab2f8d3a42ef8ece7981668ad754e8abb220c Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Sat, 27 Feb 2016 06:57:37 +0200 Subject: [PATCH 091/122] collect internal TS API functions into riak_kv_ts_api --- src/riak_kv_pb_timeseries.erl | 9 +- src/riak_kv_ts_api.erl | 339 ++++++++++++++++++++++++++++ src/riak_kv_ts_util.erl | 277 +---------------------- src/riak_kv_wm_timeseries.erl | 8 +- src/riak_kv_wm_timeseries_query.erl | 4 +- 5 files changed, 350 insertions(+), 287 deletions(-) create mode 100644 src/riak_kv_ts_api.erl diff --git a/src/riak_kv_pb_timeseries.erl b/src/riak_kv_pb_timeseries.erl index 67f4ffd6dc..84bcaddf18 100644 --- a/src/riak_kv_pb_timeseries.erl +++ b/src/riak_kv_pb_timeseries.erl @@ -402,7 +402,7 @@ sub_tsttbputreq(Mod, _DDL, #tsttbputreq{table = Table, rows = Data}, sub_putreq_common(Mod, Table, Data, State) -> case catch riak_kv_ts_util:validate_rows(Mod, Data) of [] -> - case riak_kv_ts_util:put_data(Data, Table, Mod) of + case riak_kv_ts_api:put_data(Data, Table, Mod) of ok -> {reply, #tsputresp{}, State}; {error, {some_failed, ErrorCount}} -> @@ -431,7 +431,7 @@ sub_tsgetreq(Mod, _DDL, #tsgetreq{table = Table, end, CompoundKey = riak_pb_ts_codec:decode_cells(PbCompoundKey), Mod = riak_ql_ddl:make_module_name(Table), - case riak_kv_ts_util:get_data( + case riak_kv_ts_api:get_data( CompoundKey, Table, Mod, Options) of {ok, Record} -> {ColumnNames, Row} = lists:unzip(Record), @@ -464,7 +464,7 @@ sub_tsdelreq(Mod, _DDL, #tsdelreq{table = Table, end, CompoundKey = riak_pb_ts_codec:decode_cells(PbCompoundKey), Mod = riak_ql_ddl:make_module_name(Table), - case riak_kv_ts_util:delete_data( + case riak_kv_ts_api:delete_data( CompoundKey, Table, Mod, Options, VClock) of ok -> {reply, tsdelresp, State}; @@ -512,7 +512,7 @@ sub_tscoveragereq(Mod, _DDL, #tscoveragereq{table = Table, Client = {riak_client, [node(), undefined]}, case decode_query(Q) of {ok, SQL} -> - case riak_kv_ts_util:compile_to_per_quantum_queries(Mod, SQL) of + case riak_kv_ts_api:compile_to_per_quantum_queries(Mod, SQL) of {ok, Compiled} -> Bucket = riak_kv_ts_util:table_to_bucket(Table), convert_cover_list( @@ -577,7 +577,6 @@ sub_tsqueryreq(Mod, DDL, SQL, State) -> case riak_kv_qry:submit(SQL, DDL) of {ok, Data} -> {reply, make_tsquery_resp(Mod, SQL, Data), State}; - %% the following timeouts are known and distinguished: {error, no_type} -> {reply, table_not_activated_response(DDL#ddl_v1.table), State}; diff --git a/src/riak_kv_ts_api.erl b/src/riak_kv_ts_api.erl new file mode 100644 index 0000000000..47098e4e1d --- /dev/null +++ b/src/riak_kv_ts_api.erl @@ -0,0 +1,339 @@ +%% ------------------------------------------------------------------- +%% +%% riak_kv_ts_util: supporting functions for timeseries code paths +%% +%% Copyright (c) 2016 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +%% @doc Internal API for TS calls: single-key get and delete, batch +%% put, coverage and query + +-module(riak_kv_ts_api). + +-export([ + put_data/2, put_data/3, + get_data/2, get_data/3, get_data/4, + delete_data/2, delete_data/3, delete_data/4, delete_data/5, + query/2, + compile_to_per_quantum_queries/2 %% coverage + %% To reassemble the broken-up queries into coverage entries + %% for returning to pb or http clients (each needing to + %% convert and repackage entry details in their own way), + %% respective callbacks in riak_kv_{pb,wm}_timeseries will + %% use riak_kv_ts_util:sql_to_cover/4. + ]). + +-include_lib("riak_ql/include/riak_ql_ddl.hrl"). +-include("riak_kv_wm_raw.hrl"). +-include("riak_kv_ts.hrl"). + + +-spec query(string() | ?SQL_SELECT{} | #riak_sql_describe_v1{}, #ddl_v1{}) -> + {ok, any()} | + {error, term()}. +query(QueryStringOrSQL, DDL) -> + riak_kv_qry:submit(QueryStringOrSQL, DDL). + + +-spec put_data([[riak_pb_ts_codec:ldbvalue()]], binary()) -> + ok | {error, {some_failed, integer()}} | {error, term()}. +put_data(Data, Table) -> + put_data(Data, Table, riak_ql_ddl:make_module_name(Table)). + +-spec put_data([[riak_pb_ts_codec:ldbvalue()]], binary(), module()) -> + ok | {error, {some_failed, integer()}} | {error, term()}. +put_data(Data, Table, Mod) -> + DDL = Mod:get_ddl(), + Bucket = riak_kv_ts_util:table_to_bucket(Table), + case riak_core_bucket:get_bucket(Bucket) of + {error, Reason} -> + %% happens when, for example, the table has not been + %% activated (Reason == no_type) + {error, Reason}; + BucketProps -> + case put_data_to_partitions(Data, Bucket, BucketProps, DDL, Mod) of + 0 -> + ok; + NErrors -> + {error, {some_failed, NErrors}} + end + end. + +put_data_to_partitions(Data, Bucket, BucketProps, DDL, Mod) -> + NVal = proplists:get_value(n_val, BucketProps), + PartitionedData = partition_data(Data, Bucket, BucketProps, DDL, Mod), + PreflistData = add_preflists(PartitionedData, NVal, + riak_core_node_watcher:nodes(riak_kv)), + + SendFullBatches = riak_core_capability:get({riak_kv, w1c_batch_vnode}, false), + %% Default to 1MB for a max batch size to not overwhelm disterl + CappedBatchSize = app_helper:get_env(riak_kv, timeseries_max_batch_size, + 1024 * 1024), + + EncodeFn = + fun(O) -> riak_object:to_binary(v1, O, msgpack) end, + + {ReqIds, FailReqs} = + lists:foldl( + fun({DocIdx, Preflist, Records}, {GlobalReqIds, GlobalErrorsCnt}) -> + case riak_kv_w1c_worker:validate_options( + NVal, Preflist, [], BucketProps) of + {ok, W, PW} -> + DataForVnode = pick_batch_option(SendFullBatches, + CappedBatchSize, + Records, + termsize(hd(Records)), + length(Records)), + Ids = + invoke_async_put(fun(Record) -> + build_object(Bucket, Mod, DDL, + Record, DocIdx) + end, + fun(RObj, LK) -> + riak_kv_w1c_worker:async_put( + RObj, W, PW, Bucket, NVal, LK, + EncodeFn, Preflist) + end, + fun(RObjs) -> + riak_kv_w1c_worker:ts_batch_put( + RObjs, W, PW, Bucket, NVal, + EncodeFn, Preflist) + end, + DataForVnode), + {GlobalReqIds ++ Ids, GlobalErrorsCnt}; + _Error -> + {GlobalReqIds, GlobalErrorsCnt + length(Records)} + end + end, + {[], 0}, PreflistData), + Responses = riak_kv_w1c_worker:async_put_replies(ReqIds, []), + _NErrors = + length( + lists:filter( + fun({error, _}) -> true; + (_) -> false + end, Responses)) + FailReqs. + + +-spec partition_data(Data :: list(term()), + Bucket :: {binary(), binary()}, + BucketProps :: proplists:proplist(), + DDL :: ?DDL{}, + Mod :: module()) -> + list(tuple(chash:index(), list(term()))). +partition_data(Data, Bucket, BucketProps, DDL, Mod) -> + PartitionTuples = + [ { riak_core_util:chash_key({Bucket, row_to_key(R, DDL, Mod)}, + BucketProps), R } || R <- Data ], + dict:to_list( + lists:foldl(fun({Idx, R}, Dict) -> + dict:append(Idx, R, Dict) + end, + dict:new(), + PartitionTuples)). + +row_to_key(Row, DDL, Mod) -> + riak_kv_ts_util:encode_typeval_key( + riak_ql_ddl:get_partition_key(DDL, Row, Mod)). + + +%% Returns a tuple with a list of request IDs and an error tally +invoke_async_put(BuildRObjFun, AsyncPutFun, _BatchPutFun, {individual, Records}) -> + lists:map(fun(Record) -> + {LK, RObj} = BuildRObjFun(Record), + {ok, ReqId} = AsyncPutFun(RObj, LK), + ReqId + end, + Records); +invoke_async_put(BuildRObjFun, _AsyncPutFun, BatchPutFun, {batches, Batches}) -> + lists:map(fun(Batch) -> + RObjs = lists:map(BuildRObjFun, Batch), + {ok, ReqId} = BatchPutFun(RObjs), + ReqId + end, + Batches). + +%%%%%%%% +%% Utility functions for batch delivery of records +termsize(Term) -> + size(term_to_binary(Term)). + +pick_batch_option(_, _, Records, _, 1) -> + {individual, Records}; +pick_batch_option(true, MaxBatch, Records, SampleSize, _NumRecs) -> + {batches, create_batches(Records, + estimated_row_count(SampleSize, MaxBatch))}; +pick_batch_option(false, _, Records, _, _) -> + {individual, Records}. + +estimated_row_count(SampleRowSize, MaxBatchSize) -> + %% Assume some rows will be larger, so introduce a fudge factor of + %% roughly 10 percent. + RowSizeFudged = (SampleRowSize * 10) div 9, + MaxBatchSize div RowSizeFudged. + +create_batches(Rows, MaxSize) -> + create_batches(Rows, MaxSize, []). + +create_batches([], _MaxSize, Accum) -> + Accum; +create_batches(Rows, MaxSize, Accum) when length(Rows) < MaxSize -> + [Rows|Accum]; +create_batches(Rows, MaxSize, Accum) -> + {First, Rest} = lists:split(MaxSize, Rows), + create_batches(Rest, MaxSize, [First|Accum]). +%%%%%%%% + +add_preflists(PartitionedData, NVal, UpNodes) -> + lists:map(fun({Idx, Rows}) -> {Idx, + riak_core_apl:get_apl_ann(Idx, NVal, UpNodes), + Rows} end, + PartitionedData). + +build_object(Bucket, Mod, DDL, Row, PK) -> + Obj = Mod:add_column_info(Row), + LK = riak_kv_ts_util:encode_typeval_key( + riak_ql_ddl:get_local_key(DDL, Row, Mod)), + + RObj = riak_object:newts( + Bucket, PK, Obj, + dict:from_list([{?MD_DDL_VERSION, ?DDL_VERSION}])), + {LK, RObj}. + + + +-spec get_data([riak_pb_ts_codec:ldbvalue()], binary()) -> + {ok, {[binary()], [[riak_pb_ts_codec:ldbvalue()]]}} | {error, term()}. +get_data(Key, Table) -> + get_data(Key, Table, undefined, []). + +-spec get_data([riak_pb_ts_codec:ldbvalue()], binary(), module()) -> + {ok, {[binary()], [[riak_pb_ts_codec:ldbvalue()]]}} | {error, term()}. +get_data(Key, Table, Mod) -> + get_data(Key, Table, Mod, []). + +-spec get_data([riak_pb_ts_codec:ldbvalue()], binary(), module(), proplists:proplist()) -> + {ok, [{binary(), riak_pb_ts_codec:ldbvalue()}]} | {error, term()}. +get_data(Key, Table, Mod0, Options) -> + Mod = + case Mod0 of + undefined -> + riak_ql_ddl:make_module_name(Table); + Mod0 -> + Mod0 + end, + DDL = Mod:get_ddl(), + Result = + case riak_kv_ts_util:make_ts_keys(Key, DDL, Mod) of + {ok, PKLK} -> + riak_client:get( + riak_kv_ts_util:table_to_bucket(Table), PKLK, Options, + {riak_client, [node(), undefined]}); + ErrorReason -> + ErrorReason + end, + case Result of + {ok, RObj} -> + case riak_object:get_value(RObj) of + [] -> + {error, notfound}; + Record -> + {ok, Record} + end; + ErrorReason2 -> + ErrorReason2 + end. + + +-spec delete_data([any()], riak_object:bucket()) -> + ok | {error, term()}. +delete_data(Key, Table) -> + delete_data(Key, Table, undefined, [], undefined). + +-spec delete_data([any()], riak_object:bucket(), module()) -> + ok | {error, term()}. +delete_data(Key, Table, Mod) -> + delete_data(Key, Table, Mod, [], undefined). + +-spec delete_data([any()], riak_object:bucket(), module(), proplists:proplist()) -> + ok | {error, term()}. +delete_data(Key, Table, Mod, Options) -> + delete_data(Key, Table, Mod, Options, undefined). + +-spec delete_data([any()], riak_object:bucket(), module(), proplists:proplist(), + undefined | vclock:vclock()) -> + ok | {error, term()}. +delete_data(Key, Table, Mod0, Options0, VClock0) -> + Mod = + case Mod0 of + undefined -> + riak_ql_ddl:make_module_name(Table); + Mod0 -> + Mod0 + end, + %% Pass the {dw,all} option in to the delete FSM + %% to make sure all tombstones are written by the + %% async put before the reaping get runs otherwise + %% if the default {dw,quorum} is used there is the + %% possibility that the last tombstone put overlaps + %% inside the KV vnode with the reaping get and + %% prevents the tombstone removal. + Options = lists:keystore(dw, 1, Options0, {dw, all}), + DDL = Mod:get_ddl(), + VClock = + case VClock0 of + undefined -> + %% this will trigger a get in riak_kv_delete:delete to + %% retrieve the actual vclock + undefined; + VClock0 -> + %% else, clients may have it already (e.g., from an + %% earlier riak_object:get), which will short-circuit + %% to avoid a separate get + riak_object:decode_vclock(VClock0) + end, + Result = + case riak_kv_ts_util:make_ts_keys(Key, DDL, Mod) of + {ok, PKLK} -> + riak_client:delete_vclock( + riak_kv_ts_util:table_to_bucket(Table), PKLK, VClock, Options, + {riak_client, [node(), undefined]}); + ErrorReason -> + ErrorReason + end, + Result. + + + + +-spec compile_to_per_quantum_queries(module(), ?SQL_SELECT{}) -> + {ok, [?SQL_SELECT{}]} | {error, any()}. +%% @doc Break up a query into a list of per-quantum queries +compile_to_per_quantum_queries(Mod, SQL) -> + case catch Mod:get_ddl() of + {_, {undef, _}} -> + {error, no_helper_module}; + DDL -> + case riak_ql_ddl:is_query_valid( + Mod, DDL, riak_kv_ts_util:sql_record_to_tuple(SQL)) of + true -> + riak_kv_qry_compiler:compile(DDL, SQL, undefined); + {false, _Errors} -> + {error, invalid_query} + end + end. diff --git a/src/riak_kv_ts_util.erl b/src/riak_kv_ts_util.erl index 319010d514..0d61e127f4 100644 --- a/src/riak_kv_ts_util.erl +++ b/src/riak_kv_ts_util.erl @@ -27,17 +27,13 @@ -export([ apply_timeseries_bucket_props/2, build_sql_record/3, - compile_to_per_quantum_queries/2, - delete_data/2, delete_data/3, delete_data/4, delete_data/5, encode_typeval_key/1, get_column_types/2, - get_data/2, get_data/3, get_data/4, get_table_ddl/1, lk/1, make_ts_keys/3, maybe_parse_table_def/2, pk/1, - put_data/2, put_data/3, queried_table/1, sql_record_to_tuple/1, sql_to_cover/4, @@ -56,7 +52,7 @@ %% bucket tuple. This function is a convenient mechanism for doing so %% and making that transition more obvious. --include("riak_kv_wm_raw.hrl"). +%%-include("riak_kv_wm_raw.hrl"). -include_lib("riak_ql/include/riak_ql_ddl.hrl"). -include("riak_kv_ts.hrl"). @@ -382,262 +378,11 @@ validate_rows(Mod, Rows) -> lists:reverse(BadRowIdxs). --spec put_data([[riak_pb_ts_codec:ldbvalue()]], binary()) -> - ok | {error, {some_failed, integer()}} | {error, term()}. -put_data(Data, Table) -> - put_data(Data, Table, riak_ql_ddl:make_module_name(Table)). - --spec put_data([[riak_pb_ts_codec:ldbvalue()]], binary(), module()) -> - ok | {error, {some_failed, integer()}} | {error, term()}. -put_data(Data, Table, Mod) -> - DDL = Mod:get_ddl(), - Bucket = table_to_bucket(Table), - case riak_core_bucket:get_bucket(Bucket) of - {error, Reason} -> - %% happens when, for example, the table has not been - %% activated (Reason == no_type) - {error, Reason}; - BucketProps -> - case put_data_to_partitions(Data, Bucket, BucketProps, DDL, Mod) of - 0 -> - ok; - NErrors -> - {error, {some_failed, NErrors}} - end - end. - -put_data_to_partitions(Data, Bucket, BucketProps, DDL, Mod) -> - NVal = proplists:get_value(n_val, BucketProps), - PartitionedData = partition_data(Data, Bucket, BucketProps, DDL, Mod), - PreflistData = add_preflists(PartitionedData, NVal, - riak_core_node_watcher:nodes(riak_kv)), - - SendFullBatches = riak_core_capability:get({riak_kv, w1c_batch_vnode}, false), - %% Default to 1MB for a max batch size to not overwhelm disterl - CappedBatchSize = app_helper:get_env(riak_kv, timeseries_max_batch_size, - 1024 * 1024), - - EncodeFn = - fun(O) -> riak_object:to_binary(v1, O, msgpack) end, - - {ReqIds, FailReqs} = - lists:foldl( - fun({DocIdx, Preflist, Records}, {GlobalReqIds, GlobalErrorsCnt}) -> - case riak_kv_w1c_worker:validate_options( - NVal, Preflist, [], BucketProps) of - {ok, W, PW} -> - DataForVnode = pick_batch_option(SendFullBatches, - CappedBatchSize, - Records, - termsize(hd(Records)), - length(Records)), - Ids = - invoke_async_put(fun(Record) -> - build_object(Bucket, Mod, DDL, - Record, DocIdx) - end, - fun(RObj, LK) -> - riak_kv_w1c_worker:async_put( - RObj, W, PW, Bucket, NVal, LK, - EncodeFn, Preflist) - end, - fun(RObjs) -> - riak_kv_w1c_worker:ts_batch_put( - RObjs, W, PW, Bucket, NVal, - EncodeFn, Preflist) - end, - DataForVnode), - {GlobalReqIds ++ Ids, GlobalErrorsCnt}; - _Error -> - {GlobalReqIds, GlobalErrorsCnt + length(Records)} - end - end, - {[], 0}, PreflistData), - Responses = riak_kv_w1c_worker:async_put_replies(ReqIds, []), - _NErrors = - length( - lists:filter( - fun({error, _}) -> true; - (_) -> false - end, Responses)) + FailReqs. - - --spec partition_data(Data :: list(term()), - Bucket :: {binary(), binary()}, - BucketProps :: proplists:proplist(), - DDL :: ?DDL{}, - Mod :: module()) -> - list(tuple(chash:index(), list(term()))). -partition_data(Data, Bucket, BucketProps, DDL, Mod) -> - PartitionTuples = - [ { riak_core_util:chash_key({Bucket, row_to_key(R, DDL, Mod)}, - BucketProps), R } || R <- Data ], - dict:to_list( - lists:foldl(fun({Idx, R}, Dict) -> - dict:append(Idx, R, Dict) - end, - dict:new(), - PartitionTuples)). - -row_to_key(Row, DDL, Mod) -> - riak_kv_ts_util:encode_typeval_key( - riak_ql_ddl:get_partition_key(DDL, Row, Mod)). - -%%%%%%%% -%% Utility functions for batch delivery of records -termsize(Term) -> - size(term_to_binary(Term)). - -pick_batch_option(_, _, Records, _, 1) -> - {individual, Records}; -pick_batch_option(true, MaxBatch, Records, SampleSize, _NumRecs) -> - {batches, create_batches(Records, - estimated_row_count(SampleSize, MaxBatch))}; -pick_batch_option(false, _, Records, _, _) -> - {individual, Records}. - -estimated_row_count(SampleRowSize, MaxBatchSize) -> - %% Assume some rows will be larger, so introduce a fudge factor of - %% roughly 10 percent. - RowSizeFudged = (SampleRowSize * 10) div 9, - MaxBatchSize div RowSizeFudged. - -create_batches(Rows, MaxSize) -> - create_batches(Rows, MaxSize, []). - -create_batches([], _MaxSize, Accum) -> - Accum; -create_batches(Rows, MaxSize, Accum) when length(Rows) < MaxSize -> - [Rows|Accum]; -create_batches(Rows, MaxSize, Accum) -> - {First, Rest} = lists:split(MaxSize, Rows), - create_batches(Rest, MaxSize, [First|Accum]). -%%%%%%%% - -add_preflists(PartitionedData, NVal, UpNodes) -> - lists:map(fun({Idx, Rows}) -> {Idx, - riak_core_apl:get_apl_ann(Idx, NVal, UpNodes), - Rows} end, - PartitionedData). - -build_object(Bucket, Mod, DDL, Row, PK) -> - Obj = Mod:add_column_info(Row), - LK = riak_kv_ts_util:encode_typeval_key( - riak_ql_ddl:get_local_key(DDL, Row, Mod)), - - RObj = riak_object:newts( - Bucket, PK, Obj, - dict:from_list([{?MD_DDL_VERSION, ?DDL_VERSION}])), - {LK, RObj}. - - - --spec get_data([riak_pb_ts_codec:ldbvalue()], binary()) -> - {ok, {[binary()], [[riak_pb_ts_codec:ldbvalue()]]}} | {error, term()}. -get_data(Key, Table) -> - get_data(Key, Table, undefined, []). - --spec get_data([riak_pb_ts_codec:ldbvalue()], binary(), module()) -> - {ok, {[binary()], [[riak_pb_ts_codec:ldbvalue()]]}} | {error, term()}. -get_data(Key, Table, Mod) -> - get_data(Key, Table, Mod, []). - --spec get_data([riak_pb_ts_codec:ldbvalue()], binary(), module(), proplists:proplist()) -> - {ok, [{binary(), riak_pb_ts_codec:ldbvalue()}]} | {error, term()}. -get_data(Key, Table, Mod0, Options) -> - Mod = - case Mod0 of - undefined -> - riak_ql_ddl:make_module_name(Table); - Mod0 -> - Mod0 - end, - DDL = Mod:get_ddl(), - Result = - case make_ts_keys(Key, DDL, Mod) of - {ok, PKLK} -> - riak_client:get( - table_to_bucket(Table), PKLK, Options, - {riak_client, [node(), undefined]}); - ErrorReason -> - ErrorReason - end, - case Result of - {ok, RObj} -> - case riak_object:get_value(RObj) of - [] -> - {error, notfound}; - Record -> - {ok, Record} - end; - ErrorReason2 -> - ErrorReason2 - end. - -spec get_column_types(list(binary()), module()) -> [riak_pb_ts_codec:tscolumntype()]. get_column_types(ColumnNames, Mod) -> [Mod:get_field_type([N]) || N <- ColumnNames]. --spec delete_data([any()], riak_object:bucket()) -> - ok | {error, term()}. -delete_data(Key, Table) -> - delete_data(Key, Table, undefined, [], undefined). - --spec delete_data([any()], riak_object:bucket(), module()) -> - ok | {error, term()}. -delete_data(Key, Table, Mod) -> - delete_data(Key, Table, Mod, [], undefined). - --spec delete_data([any()], riak_object:bucket(), module(), proplists:proplist()) -> - ok | {error, term()}. -delete_data(Key, Table, Mod, Options) -> - delete_data(Key, Table, Mod, Options, undefined). - --spec delete_data([any()], riak_object:bucket(), module(), proplists:proplist(), - undefined | vclock:vclock()) -> - ok | {error, term()}. -delete_data(Key, Table, Mod0, Options0, VClock0) -> - Mod = - case Mod0 of - undefined -> - riak_ql_ddl:make_module_name(Table); - Mod0 -> - Mod0 - end, - %% Pass the {dw,all} option in to the delete FSM - %% to make sure all tombstones are written by the - %% async put before the reaping get runs otherwise - %% if the default {dw,quorum} is used there is the - %% possibility that the last tombstone put overlaps - %% inside the KV vnode with the reaping get and - %% prevents the tombstone removal. - Options = lists:keystore(dw, 1, Options0, {dw, all}), - DDL = Mod:get_ddl(), - VClock = - case VClock0 of - undefined -> - %% this will trigger a get in riak_kv_delete:delete to - %% retrieve the actual vclock - undefined; - VClock0 -> - %% else, clients may have it already (e.g., from an - %% earlier riak_object:get), which will short-circuit - %% to avoid a separate get - riak_object:decode_vclock(VClock0) - end, - Result = - case make_ts_keys(Key, DDL, Mod) of - {ok, PKLK} -> - riak_client:delete_vclock( - table_to_bucket(Table), PKLK, VClock, Options, - {riak_client, [node(), undefined]}); - ErrorReason -> - ErrorReason - end, - Result. - - %% Result from riak_client:get_cover is a nested list of coverage plan %% because KV coverage requests are designed that way, but in our case %% all we want is the singleton head @@ -657,23 +402,6 @@ sql_to_cover(Client, [SQL|Tail], Bucket, Accum) -> Description}|Accum]) end. --spec compile_to_per_quantum_queries(module(), ?SQL_SELECT{}) -> - {ok, [?SQL_SELECT{}]} | {error, any()}. -%% @doc Break up a query into a list of per-quantum queries -compile_to_per_quantum_queries(Mod, SQL) -> - case catch Mod:get_ddl() of - {_, {undef, _}} -> - {error, no_helper_module}; - DDL -> - case riak_ql_ddl:is_query_valid( - Mod, DDL, sql_record_to_tuple(SQL)) of - true -> - riak_kv_qry_compiler:compile(DDL, SQL, undefined); - {false, _Errors} -> - {error, invalid_query} - end - end. - %% Generate a human-readable description of the target %% <<"
/ time > X and time < Y">> @@ -798,6 +526,3 @@ make_ts_keys_4_test() -> ). -endif. - -flat_format(F, A) -> - lists:flatten(io_lib:format(F, A)). diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index be1b8669a4..50fcabdd94 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -548,7 +548,7 @@ call_api_function(RD, Ctx = #ctx{api_call = put, {_, {undef, _}} -> handle_error({no_such_table, Table}, RD, Ctx); [] -> - case riak_kv_ts_util:put_data(Records, Table, Mod) of + case riak_kv_ts_api:put_data(Records, Table, Mod) of ok -> prepare_data_in_body(RD, Ctx#ctx{result = ok}); {error, {some_failed, ErrorCount}} -> @@ -568,7 +568,7 @@ call_api_function(RD, Ctx0 = #ctx{api_call = get, true -> [{timeout, Timeout}] end, Mod = riak_ql_ddl:make_module_name(Table), - case catch riak_kv_ts_util:get_data(Key, Table, Mod, Options) of + case catch riak_kv_ts_api:get_data(Key, Table, Mod, Options) of {_, {undef, _}} -> handle_error({no_such_table, Table}, RD, Ctx0); {ok, Record} -> @@ -597,7 +597,7 @@ call_api_function(RD, Ctx = #ctx{api_call = delete, true -> [{timeout, Timeout}] end, Mod = riak_ql_ddl:make_module_name(Table), - case catch riak_kv_ts_util:delete_data(Key, Table, Mod, Options) of + case catch riak_kv_ts_api:delete_data(Key, Table, Mod, Options) of {_, {undef, _}} -> handle_error({no_such_table, Table}, RD, Ctx); ok -> @@ -617,7 +617,7 @@ call_api_function(RD, Ctx = #ctx{api_call = coverage, case riak_ql_parser:parse(Lexed) of {ok, SQL = ?SQL_SELECT{'FROM' = Table}} -> Mod = riak_ql_ddl:make_module_name(Table), - case riak_kv_ts_util:compile_to_per_quantum_queries(Mod, SQL) of + case riak_kv_ts_api:compile_to_per_quantum_queries(Mod, SQL) of {ok, Compiled} -> Bucket = riak_kv_ts_util:table_to_bucket(Table), Results = diff --git a/src/riak_kv_wm_timeseries_query.erl b/src/riak_kv_wm_timeseries_query.erl index 1c8ca0045e..ab771c6073 100644 --- a/src/riak_kv_wm_timeseries_query.erl +++ b/src/riak_kv_wm_timeseries_query.erl @@ -376,7 +376,7 @@ process_query(SQL = ?SQL_SELECT{'FROM' = Table}, RD, Ctx0 = #ctx{}) -> {_, {undef, _}} -> handle_error({no_such_table, Table}, RD, Ctx0); DDL -> - case riak_kv_qry:submit(SQL, DDL) of + case riak_kv_ts_api:query(SQL, DDL) of {ok, Data} -> {ColumnNames, _ColumnTypes, Rows} = Data, Ctx = Ctx0#ctx{result = {ColumnNames, Rows}}, @@ -402,7 +402,7 @@ process_query(SQL = #riak_sql_describe_v1{'DESCRIBE' = Table}, RD, Ctx0 = #ctx{} {_, {undef, _}} -> handle_error({no_such_table, Table}, RD, Ctx0); DDL -> - case riak_kv_qry:submit(SQL, DDL) of + case riak_kv_ts_api:query(SQL, DDL) of {ok, Data} -> ColumnNames = [<<"Column">>, <<"Type">>, <<"Is Null">>, <<"Primary Key">>, <<"Local Key">>], From 2c2561bdeb88ee31c4c744a9953494b7d577b42e Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Tue, 1 Mar 2016 13:39:33 +0200 Subject: [PATCH 092/122] cumulative update including * don't support coverage API call in HTTP callbacks; * unify permission strings, for shared use between wm and pb callbacks; * straighten up function call chain in riak_kv_wm_timeseries, now that all api calls are mapped 1:1 onto HTTP methods; * in particular, use wrq:path_info/2 to get table and keys, instead of clunky tokenization where it's no longer necessary; * introduce security and validation checks for listkeys. --- src/riak_kv_pb_timeseries.erl | 12 +- src/riak_kv_ts_util.erl | 22 ++ src/riak_kv_web.erl | 1 - src/riak_kv_wm_timeseries.erl | 352 ++++++++----------------- src/riak_kv_wm_timeseries_listkeys.erl | 106 ++++++-- src/riak_kv_wm_timeseries_query.erl | 66 +++-- 6 files changed, 247 insertions(+), 312 deletions(-) diff --git a/src/riak_kv_pb_timeseries.erl b/src/riak_kv_pb_timeseries.erl index 84bcaddf18..077627298e 100644 --- a/src/riak_kv_pb_timeseries.erl +++ b/src/riak_kv_pb_timeseries.erl @@ -100,17 +100,17 @@ decode(Code, Bin) -> {ok, make_decoder_error_response(Error)} end; #tsgetreq{table = Table}-> - {ok, Msg, {"riak_kv.ts_get", Table}}; + {ok, Msg, {riak_kv_ts_util:api_call_to_perm(get), Table}}; #tsputreq{table = Table} -> - {ok, Msg, {"riak_kv.ts_put", Table}}; + {ok, Msg, {riak_kv_ts_util:api_call_to_perm(put), Table}}; #tsttbputreq{table = Table} -> - {ok, Msg, {"riak_kv.ts_put", Table}}; + {ok, Msg, {riak_kv_ts_util:api_call_to_perm(put), Table}}; #tsdelreq{table = Table} -> - {ok, Msg, {"riak_kv.ts_del", Table}}; + {ok, Msg, {riak_kv_ts_util:api_call_to_perm(delete), Table}}; #tslistkeysreq{table = Table} -> - {ok, Msg, {"riak_kv.ts_listkeys", Table}}; + {ok, Msg, {riak_kv_ts_util:api_call_to_perm(listkeys), Table}}; #tscoveragereq{table = Table} -> - {ok, Msg, {"riak_kv.ts_cover", Table}} + {ok, Msg, {riak_kv_ts_util:api_call_to_perm(coverage), Table}} end. -spec decode_query(Query::#tsinterpolation{}) -> diff --git a/src/riak_kv_ts_util.erl b/src/riak_kv_ts_util.erl index 0d61e127f4..f33e80b106 100644 --- a/src/riak_kv_ts_util.erl +++ b/src/riak_kv_ts_util.erl @@ -25,6 +25,7 @@ -module(riak_kv_ts_util). -export([ + api_call_to_perm/1, apply_timeseries_bucket_props/2, build_sql_record/3, encode_typeval_key/1, @@ -43,6 +44,27 @@ -export([explain_query/1, explain_query/2]). -export([explain_query_print/1]). +-type api_call() :: get | put | delete | listkeys | coverage | + query_create_table | query_select | query_describe. +-spec api_call_to_perm(api_call()) -> string(). +api_call_to_perm(get) -> + "riak_ts.get"; +api_call_to_perm(put) -> + "riak_ts.put"; +api_call_to_perm(delete) -> + "riak_ts.delete"; +api_call_to_perm(listkeys) -> + "riak_ts.listkeys"; +api_call_to_perm(coverage) -> + "riak_ts.coverage"; +api_call_to_perm(query_create_table) -> + "riak_ts.query_create_table"; +api_call_to_perm(query_select) -> + "riak_ts.query_select"; +api_call_to_perm(query_describe) -> + "riak_ts.query_describe". + + %% NOTE on table_to_bucket/1: Clients will work with table %% names. Those names map to a bucket type/bucket name tuple in Riak, %% with both the type name and the bucket name matching the table. diff --git a/src/riak_kv_web.erl b/src/riak_kv_web.erl index 4314762aa6..6ae950bfcf 100644 --- a/src/riak_kv_web.erl +++ b/src/riak_kv_web.erl @@ -128,7 +128,6 @@ raw_dispatch(Name) -> [{["ts", api_version, "tables", table, "list_keys"], riak_kv_wm_timeseries_listkeys, Props}, {["ts", api_version, "tables", table, "keys", '*'], riak_kv_wm_timeseries, Props}, {["ts", api_version, "query"], riak_kv_wm_timeseries_query, Props} - %% {["ts", api_version, "coverage"], riak_kv_wm_timeseries, Props} ] || {_Prefix, Props} <- Props2]). is_post(Req) -> diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 50fcabdd94..3ba7289db1 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -23,10 +23,10 @@ %% @doc Resource for Riak TS operations over HTTP. %% %% ``` -%% GET /ts/v1/table/Table single-key get -%% DELETE /ts/v1/table/Table single-key delete -%% PUT /ts/v1/table/Table batch put -%% GET /ts/v1/coverage coverage for a query +%% GET /ts/v1/table/Table/keys/K1/V1/... single-key get +%% DELETE /ts/v1/table/Table/keys/K1/V1/... single-key delete +%% POST /ts/v1/table/Table/keys singe-key or batch put depending +%% on the body %% ''' %% %% Request body is expected to be a JSON containing key and/or value(s). @@ -65,7 +65,7 @@ security, %% security context client, %% riak_client() - the store client riak, %% local | {node(), atom()} - params for riak client - api_call :: undefined|get|put|delete|coverage, + api_call :: undefined|get|put|delete, table :: undefined | binary(), %% data in/out: the following fields are either %% extracted from the JSON/path elements that came in @@ -74,15 +74,13 @@ %% body key :: undefined | ts_rec(), %% parsed out of JSON that came in the body data :: undefined | [ts_rec()], %% ditto - query :: string(), result :: undefined | ok | {Headers::[binary()], Rows::[ts_rec()]} - | [{entry, proplists:proplist()}] }). -define(DEFAULT_TIMEOUT, 60000). -define(TABLE_ACTIVATE_WAIT, 30). %% wait until table's bucket type is activated --define(CB_RV_SPEC, {boolean(), #wm_reqdata{}, #ctx{}}). +-define(CB_RV_SPEC, {boolean()|atom()|tuple(), #wm_reqdata{}, #ctx{}}). -type ts_rec() :: [riak_pb_ts_codec:ldbvalue()]. @@ -97,9 +95,8 @@ init(Props) -> {boolean(), #wm_reqdata{}, #ctx{}}. %% @doc Determine whether or not a connection to Riak %% can be established. This function also takes this -%% opportunity to extract the 'bucket' and 'key' path -%% bindings from the dispatch, as well as any vtag -%% query parameter. +%% opportunity to extract the 'table' and 'key' path +%% bindings from the dispatch. service_available(RD, Ctx = #ctx{riak = RiakProps}) -> case riak_kv_wm_utils:get_riak_client( RiakProps, riak_kv_wm_utils:get_client_id(RD)) of @@ -108,35 +105,23 @@ service_available(RD, Ctx = #ctx{riak = RiakProps}) -> Ctx#ctx{api_version = wrq:path_info(api_version, RD), method = wrq:method(RD), client = C, - table = - case wrq:path_info(table, RD) of - undefined -> undefined; - B -> list_to_binary(riak_kv_wm_utils:maybe_decode_uri(RD, B)) - end + table = utf8_to_binary( + mochiweb_util:unquote( + wrq:path_info(table, RD))) }}; - Error -> - {false, wrq:set_resp_body( - flat_format("Unable to connect to Riak: ~p", [Error]), - wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), - Ctx} + {error, Reason} -> + handle_error({riak_client_error, Reason}, RD, Ctx) end. -is_authorized(ReqData, Ctx) -> - case riak_api_web_security:is_authorized(ReqData) of +is_authorized(RD, Ctx) -> + case riak_api_web_security:is_authorized(RD) of false -> - {"Basic realm=\"Riak\"", ReqData, Ctx}; + {"Basic realm=\"Riak\"", RD, Ctx}; {true, SecContext} -> - {true, ReqData, Ctx#ctx{security = SecContext}}; + {true, RD, Ctx#ctx{security = SecContext}}; insecure -> - %% XXX 301 may be more appropriate here, but since the http and - %% https port are different and configurable, it is hard to figure - %% out the redirect URL to serve. - {{halt, 426}, - wrq:append_to_resp_body( - <<"Security is enabled and " - "Riak does not accept credentials over HTTP. Try HTTPS instead.">>, ReqData), - Ctx} + handle_error(insecure_connection, RD, Ctx) end. @@ -151,10 +136,6 @@ forbidden(RD, Ctx) -> %% plug in early, and just do what it takes to do the job {false, RD, Ctx} end. -%% Because webmachine chooses to (not) call certain callbacks -%% depending on request method used, sometimes accept_doc_body is not -%% called at all, and we arrive at produce_doc_body empty-handed. -%% This is the case when curl is executed with -X GET and --data. -spec allowed_methods(#wm_reqdata{}, #ctx{}) -> @@ -190,7 +171,7 @@ preexec(RD, Ctx) -> case validate_request(RD, Ctx) of {true, RD1, Ctx1} -> case check_permissions(RD1, Ctx1) of - {false, RD2, Ctx2} -> + {true, RD2, Ctx2} -> call_api_function(RD2, Ctx2); FalseWithDetails -> FalseWithDetails @@ -210,69 +191,108 @@ validate_request(RD, Ctx) -> -spec validate_request_v1(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. validate_request_v1(RD, Ctx) -> - case string:tokens(wrq:path(RD), "/") of - ["ts", "v1", "tables", Table, "keys" | KeysInUrl] + case wrq:path_tokens(RD) of + KeysInUrl when KeysInUrl /= [] -> KeysInUrlUnquoted = lists:map(fun mochiweb_util:unquote/1, KeysInUrl), - validate_request_v1_with({Table, KeysInUrlUnquoted}, RD, Ctx); + validate_request_v1_with(KeysInUrlUnquoted, RD, Ctx); _ -> validate_request_v1_with(json, RD, Ctx) end. --spec validate_request_v1_with(json | {string(), [string()]}, #wm_reqdata{}, #ctx{}) -> +-spec validate_request_v1_with(json | [string()], #wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. validate_request_v1_with(json, RD, Ctx = #ctx{method = Method}) -> Json = extract_json(RD), - case {Method, string:tokens(wrq:path(RD), "/"), - extract_data(Json), extract_query(Json)} of + case {Method, extract_data(Json)} of %% batch put - {'POST', - ["ts", "v1", "tables", Table, "keys"], - Data, _} - when is_list(Table), Data /= undefined -> + {'POST', Data} + when Data /= undefined -> valid_params( RD, Ctx#ctx{api_version = "v1", api_call = put, - table = list_to_binary(Table), data = Data}); - %% coverage - {'GET', - ["ts", "v1", "coverage"], - undefined, Query} - when is_list(Query) -> - valid_params( - RD, Ctx#ctx{api_version = "v1", api_call = coverage, - query = Query}); + data = Data}); _Invalid -> handle_error({malformed_request, Method}, RD, Ctx) end; -validate_request_v1_with({Table, KeysInUrl}, RD, Ctx = #ctx{method = Method}) -> - %% only get and delete can have keys in url - case {Method, - path_elements_to_key(Table, KeysInUrl)} of +validate_request_v1_with(KeysInUrl, RD, Ctx = #ctx{method = Method, + table = Table}) -> + case {Method, path_elements_to_key(Table, KeysInUrl)} of {'GET', {ok, Key}} -> valid_params( RD, Ctx#ctx{api_version = "v1", api_call = get, - table = list_to_binary(Table), key = Key}); - %% single-key delete + key = Key}); {'DELETE', {ok, Key}} -> valid_params( RD, Ctx#ctx{api_version = "v1", api_call = delete, - table = list_to_binary(Table), key = Key}); + key = Key}); {_, {error, Reason}} -> handle_error(Reason, RD, Ctx); - {'POST', _} -> - handle_error(url_key_with_put, RD, Ctx) + {BadMethod, _} -> + handle_error({url_key_bad_method, BadMethod}, RD, Ctx) + end. + +extract_json(RD) -> + case proplists:get_value("json", RD#wm_reqdata.req_qs) of + undefined -> + %% if it was a PUT or POST, data is in body + binary_to_list(wrq:req_body(RD)); + BodyInPost -> + BodyInPost + end. + +%% because, techically, key and data are 'arguments', we check they +%% are well-formed, too. +-spec extract_data(binary()) -> term(). +extract_data(Json) -> + try mochijson2:decode(Json) of + {struct, Decoded} when is_list(Decoded) -> + %% key and data (it's a put) + validate_ts_records( + proplists:get_value(<<"data">>, Decoded)) + catch + _:_ -> + undefined end. + +validate_ts_record(undefined) -> + undefined; +validate_ts_record(R) when is_list(R) -> + case lists:all( + %% check that all list elements are TS types + fun(X) -> is_integer(X) orelse is_float(X) orelse is_binary(X) end, + R) of + true -> + R; + false -> + undefined + end; +validate_ts_record(_) -> + undefined. + +validate_ts_records(undefined) -> + undefined; +validate_ts_records(RR) when is_list(RR) -> + case lists:all(fun(R) -> validate_ts_record(R) /= undefined end, RR) of + true -> + RR; + false -> + undefined + end; +validate_ts_records(_) -> + undefined. + + %% extract keys from path elements in the URL (.../K1/V1/K2/V2 -> %% [{K1, V1}, {K2, V2}]), check with Table's DDL to make sure keys are %% correct and values are of (convertible to) appropriate types, and %% return the KV list --spec path_elements_to_key(string(), [string()]) -> +-spec path_elements_to_key(binary(), [string()]) -> {ok, [{string(), riak_pb_ts_codec:ldbvalue()}]} | {error, atom()|tuple()}. path_elements_to_key(Table, PEList) -> - Mod = riak_ql_ddl:make_module_name(list_to_binary(Table)), + Mod = riak_ql_ddl:make_module_name(Table), try DDL = Mod:get_ddl(), #ddl_v1{local_key = #key_v1{ast = LK}} = DDL, @@ -365,114 +385,23 @@ valid_params(RD, Ctx) -> end end. -%% This is a special case for curl -G. `curl -G host --data $data` -%% will send the $data in URL instead of in the body, so we try to -%% look for it in req_qs. -extract_json(RD) -> - case proplists:get_value("json", RD#wm_reqdata.req_qs) of - undefined -> - %% if it was a PUT or POST, data is in body - binary_to_list(wrq:req_body(RD)); - BodyInPost -> - BodyInPost - end. - -%% because, techically, key and data are 'arguments', we check they -%% are well-formed, too. --spec extract_data(binary()) -> term(). -extract_data(Json) -> - try mochijson2:decode(Json) of - {struct, Decoded} when is_list(Decoded) -> - %% key and data (it's a put) - validate_ts_records( - proplists:get_value(<<"data">>, Decoded)) - catch - _:_ -> - undefined - end. - --spec extract_query(binary()) -> term(). -extract_query(Json) -> - try mochijson2:decode(Json) of - {struct, Decoded} when is_list(Decoded) -> - validate_ts_query( - proplists:get_value(<<"query">>, Decoded)) - catch - _:_ -> - undefined - end. - - -validate_ts_record(undefined) -> - undefined; -validate_ts_record(R) when is_list(R) -> - case lists:all( - %% check that all list elements are TS types - fun(X) -> is_integer(X) orelse is_float(X) orelse is_binary(X) end, - R) of - true -> - R; - false -> - undefined - end; -validate_ts_record(_) -> - undefined. - -validate_ts_records(undefined) -> - undefined; -validate_ts_records(RR) when is_list(RR) -> - case lists:all(fun(R) -> validate_ts_record(R) /= undefined end, RR) of - true -> - RR; - false -> - undefined - end; -validate_ts_records(_) -> - undefined. - -validate_ts_query(Q) when is_binary(Q) -> - binary_to_list(Q); -validate_ts_query(_) -> - undefined. - - --spec check_permissions(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -%% We have to defer checking permission until we have figured which -%% api call it is, which is done in validate_request, which also needs -%% body, which happens to not be available in Ctx when webmachine -%% would normally call a forbidden callback. I *may* be missing -%% something, but given the extent we have bent the REST rules here, -%% checking permissions at a stage later than webmachine would have -%% done is not a big deal. +-spec check_permissions(#wm_reqdata{}, #ctx{}) -> {term(), #wm_reqdata{}, #ctx{}}. check_permissions(RD, Ctx = #ctx{security = undefined}) -> validate_resource(RD, Ctx); -check_permissions(RD, Ctx = #ctx{table = undefined}) -> - {false, RD, Ctx}; check_permissions(RD, Ctx = #ctx{security = Security, api_call = Call, table = Table}) -> case riak_core_security:check_permission( - {api_call_to_ts_perm(Call), Table}, Security) of + {riak_kv_ts_util:api_call_to_perm(Call), Table}, Security) of {false, Error, _} -> handle_error( - {not_permitted, unicode:characters_to_binary(Error, utf8, utf8)}, RD, Ctx); + {not_permitted, utf8_to_binary(Error)}, RD, Ctx); _ -> validate_resource(RD, Ctx) end. -api_call_to_ts_perm(get) -> - "riak_ts.get"; -api_call_to_ts_perm(put) -> - "riak_ts.put"; -api_call_to_ts_perm(delete) -> - "riak_ts.delete"; -api_call_to_ts_perm(coverage) -> - "riak_ts.coverage". -spec validate_resource(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -validate_resource(RD, Ctx = #ctx{api_call = coverage}) -> - %% there is always a resource for coverage - {false, RD, Ctx}; validate_resource(RD, Ctx = #ctx{table = Table}) -> %% Ensure the bucket type exists, otherwise 404 early. case riak_kv_wm_utils:bucket_type_exists(Table) of @@ -544,9 +473,7 @@ call_api_function(RD, Ctx = #ctx{api_call = put, Mod = riak_ql_ddl:make_module_name(Table), %% convert records to tuples, just for put Records = [list_to_tuple(R) || R <- Data], - case catch riak_kv_ts_util:validate_rows(Mod, Records) of - {_, {undef, _}} -> - handle_error({no_such_table, Table}, RD, Ctx); + case riak_kv_ts_util:validate_rows(Mod, Records) of [] -> case riak_kv_ts_api:put_data(Records, Table, Mod) of ok -> @@ -568,9 +495,7 @@ call_api_function(RD, Ctx0 = #ctx{api_call = get, true -> [{timeout, Timeout}] end, Mod = riak_ql_ddl:make_module_name(Table), - case catch riak_kv_ts_api:get_data(Key, Table, Mod, Options) of - {_, {undef, _}} -> - handle_error({no_such_table, Table}, RD, Ctx0); + case riak_kv_ts_api:get_data(Key, Table, Mod, Options) of {ok, Record} -> {ColumnNames, Row} = lists:unzip(Record), %% ColumnTypes = riak_kv_ts_util:get_column_types(ColumnNames, Mod), @@ -597,9 +522,7 @@ call_api_function(RD, Ctx = #ctx{api_call = delete, true -> [{timeout, Timeout}] end, Mod = riak_ql_ddl:make_module_name(Table), - case catch riak_kv_ts_api:delete_data(Key, Table, Mod, Options) of - {_, {undef, _}} -> - handle_error({no_such_table, Table}, RD, Ctx); + case riak_kv_ts_api:delete_data(Key, Table, Mod, Options) of ok -> prepare_data_in_body(RD, Ctx#ctx{result = ok}); {error, {bad_key_length, Got, Need}} -> @@ -608,50 +531,6 @@ call_api_function(RD, Ctx = #ctx{api_call = delete, handle_error(notfound, RD, Ctx); {error, Reason} -> handle_error({riak_error, Reason}, RD, Ctx) - end; - -call_api_function(RD, Ctx = #ctx{api_call = coverage, - query = Query, - client = Client}) -> - Lexed = riak_ql_lexer:get_tokens(Query), - case riak_ql_parser:parse(Lexed) of - {ok, SQL = ?SQL_SELECT{'FROM' = Table}} -> - Mod = riak_ql_ddl:make_module_name(Table), - case riak_kv_ts_api:compile_to_per_quantum_queries(Mod, SQL) of - {ok, Compiled} -> - Bucket = riak_kv_ts_util:table_to_bucket(Table), - Results = - [begin - Node = proplists:get_value(node, Cover), - {IP, Port} = riak_kv_pb_coverage:node_to_pb_details(Node), - {entry, - [ - {cover_context, - riak_kv_pb_coverage:term_to_checksum_binary({Cover, Range})}, - {ip, IP}, - {port, Port}, - {range, - [ - {field_name, FieldName}, - {lower_bound, StartVal}, - {lower_bound_inclusive, StartIncl}, - {upper_bound, EndVal}, - {upper_bound_inclusive, EndIncl}, - {desc, SQLtext} - ]} - ]} - end || {Cover, - Range = {FieldName, {{StartVal, StartIncl}, {EndVal, EndIncl}}}, - SQLtext} - <- riak_kv_ts_util:sql_to_cover(Client, Compiled, Bucket, [])], - prepare_data_in_body(RD, Ctx#ctx{result = Results}); - {error, _Reason} -> - handle_error(query_compile_fail, RD, Ctx) - end; - {ok, _NonSelectQuery} -> - handle_error(inappropriate_sql_for_coverage, RD, Ctx); - {error, Reason} -> - handle_error({query_parse_error, Reason}, RD, Ctx) end. @@ -668,24 +547,8 @@ produce_doc_body(RD, Ctx = #ctx{api_call = get, {mochijson2:encode( {struct, [{<<"columns">>, Columns}, {<<"rows">>, Rows}]}), - RD, Ctx}; -produce_doc_body(RD, Ctx = #ctx{api_call = coverage, - result = CoverageDetails}) -> - SafeCoverageDetails = - [{entry, armor_entry(E)} || {entry, E} <- CoverageDetails], - {mochijson2:encode( - {struct, [{<<"coverage">>, SafeCoverageDetails}]}), RD, Ctx}. -armor_entry(EE) -> - lists:map( - fun({cover_context, Bin}) -> - %% prevent list to be read and converted by mochijson2 - %% as utf8 binary - {cover_context, binary_to_list(Bin)}; - (X) -> X - end, EE). - error_out(Type, Fmt, Args, RD, Ctx) -> {Type, @@ -697,18 +560,25 @@ error_out(Type, Fmt, Args, RD, Ctx) -> -spec handle_error(atom()|tuple(), #wm_reqdata{}, #ctx{}) -> {tuple(), #wm_reqdata{}, #ctx{}}. handle_error(Error, RD, Ctx) -> case Error of + {riak_client_error, Reason} -> + error_out(false, + "Unable to connect to Riak: ~p", [Reason], RD, Ctx); + insecure_connection -> + error_out({halt, 426}, + "Security is enabled and Riak does not" + " accept credentials over HTTP. Try HTTPS instead.", [], RD, Ctx); {unsupported_version, BadVersion} -> error_out({halt, 412}, "Unsupported API version ~s", [BadVersion], RD, Ctx); {not_permitted, Table} -> error_out({halt, 401}, - "Access to table ~s not allowed", [Table], RD, Ctx); + "Access to table ~ts not allowed", [Table], RD, Ctx); {malformed_request, Method} -> error_out({halt, 400}, "Malformed ~s request", [Method], RD, Ctx); - url_key_with_put -> + {url_key_bad_method, Method} -> error_out({halt, 400}, - "Malformed POST request (did you mean a GET with keys in URL?)", [], RD, Ctx); + "Inappropriate ~s request", [Method], RD, Ctx); {bad_parameter, Param} -> error_out({halt, 400}, "Bad value for parameter \"~s\"", [Param], RD, Ctx); @@ -738,17 +608,11 @@ handle_error(Error, RD, Ctx) -> "Key not found", [], RD, Ctx); {riak_error, Detailed} -> error_out({halt, 500}, - "Internal riak error: ~p", [Detailed], RD, Ctx); - {query_parse_error, Detailed} -> - error_out({halt, 400}, - "Malformed query: ~ts", [Detailed], RD, Ctx); - inappropriate_sql_for_coverage -> - error_out({halt, 400}, - "Inappropriate query for coverage request", [], RD, Ctx); - query_compile_fail -> - error_out({halt, 400}, - "Failed to compile query for coverage request", [], RD, Ctx) + "Internal riak error: ~p", [Detailed], RD, Ctx) end. flat_format(Format, Args) -> lists:flatten(io_lib:format(Format, Args)). + +utf8_to_binary(S) -> + unicode:characters_to_binary(S, utf8, utf8). diff --git a/src/riak_kv_wm_timeseries_listkeys.erl b/src/riak_kv_wm_timeseries_listkeys.erl index 479a7dbf60..6460b531fa 100644 --- a/src/riak_kv_wm_timeseries_listkeys.erl +++ b/src/riak_kv_wm_timeseries_listkeys.erl @@ -40,6 +40,7 @@ allowed_methods/2, is_authorized/2, forbidden/2, + malformed_request/2, resource_exists/2, content_types_provided/2, encodings_provided/2, @@ -56,7 +57,7 @@ table :: undefined | binary() }). --define(CB_RV_SPEC, {boolean(), #wm_reqdata{}, #ctx{}}). +-define(CB_RV_SPEC, {boolean()|atom()|tuple(), #wm_reqdata{}, #ctx{}}). -define(DEFAULT_TIMEOUT, 60000). @@ -71,10 +72,7 @@ init(Props) -> -spec service_available(#wm_reqdata{}, #ctx{}) -> {boolean(), #wm_reqdata{}, #ctx{}}. %% @doc Determine whether or not a connection to Riak -%% can be established. This function also takes this -%% opportunity to extract the 'bucket' and 'key' path -%% bindings from the dispatch, as well as any vtag -%% query parameter. +%% can be established. service_available(RD, Ctx = #ctx{riak = RiakProps}) -> case riak_kv_wm_utils:get_riak_client( RiakProps, riak_kv_wm_utils:get_client_id(RD)) of @@ -82,32 +80,23 @@ service_available(RD, Ctx = #ctx{riak = RiakProps}) -> {true, RD, Ctx#ctx{api_version = wrq:path_info(api_version, RD), client = C, - table = - case wrq:path_info(table, RD) of - undefined -> undefined; - B -> list_to_binary(riak_kv_wm_utils:maybe_decode_uri(RD, B)) - end + table = utf8_to_binary( + mochiweb_util:unquote( + wrq:path_info(table, RD))) }}; - Error -> - {false, wrq:set_resp_body( - flat_format("Unable to connect to Riak: ~p", [Error]), - wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), - Ctx} + {error, Reason} -> + handle_error({riak_client_error, Reason}, RD, Ctx) end. -is_authorized(ReqData, Ctx) -> - case riak_api_web_security:is_authorized(ReqData) of +is_authorized(RD, Ctx) -> + case riak_api_web_security:is_authorized(RD) of false -> - {"Basic realm=\"Riak\"", ReqData, Ctx}; + {"Basic realm=\"Riak\"", RD, Ctx}; {true, SecContext} -> - {true, ReqData, Ctx#ctx{security = SecContext}}; + {true, RD, Ctx#ctx{security = SecContext}}; insecure -> - {{halt, 426}, - wrq:append_to_resp_body( - <<"Security is enabled and " - "Riak does not accept credentials over HTTP. Try HTTPS instead.">>, ReqData), - Ctx} + handle_error(insecure_connection, RD, Ctx) end. @@ -117,9 +106,36 @@ forbidden(RD, Ctx) -> true -> {true, RD, Ctx}; false -> - {false, RD, Ctx} + case check_permissions(RD, Ctx) of + {true, RD1, Ctx1} -> + {false, RD1, Ctx1}; + ErrorAlreadyReported -> + ErrorAlreadyReported + end end. +-spec check_permissions(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +check_permissions(RD, Ctx = #ctx{security = undefined}) -> + {true, RD, Ctx}; +check_permissions(RD, Ctx = #ctx{security = Security, + table = Table}) -> + case riak_core_security:check_permission( + {riak_kv_ts_util:api_call_to_perm(listkeys), Table}, Security) of + {false, Error, _} -> + handle_error( + {not_permitted, utf8_to_binary(Error)}, RD, Ctx); + _ -> + {true, RD, Ctx} + end. + + +-spec malformed_request(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +malformed_request(RD, Ctx = #ctx{api_version = "v1"}) -> + {false, RD, Ctx}; +malformed_request(RD, Ctx = #ctx{api_version = UnsupportedVersion}) -> + handle_error({unsupported_version, UnsupportedVersion}, RD, Ctx). + + -spec allowed_methods(#wm_reqdata{}, #ctx{}) -> {[atom()], #wm_reqdata{}, #ctx{}}. %% @doc Get the list of methods this resource supports. @@ -130,7 +146,13 @@ allowed_methods(RD, Ctx) -> -spec resource_exists(#wm_reqdata{}, #ctx{}) -> {boolean(), #wm_reqdata{}, #ctx{}}. resource_exists(RD, #ctx{table = Table} = Ctx) -> - {riak_kv_wm_utils:bucket_type_exists(Table), RD, Ctx}. + Mod = riak_ql_ddl:make_module_name(Table), + case catch Mod:get_ddl() of + {_, {undef, _}} -> + handle_error({no_such_table, Table}, RD, Ctx); + _ -> + {true, RD, Ctx} + end. -spec encodings_provided(#wm_reqdata{}, #ctx{}) -> @@ -182,5 +204,37 @@ ts_keys_to_json(Keys) -> || A <- Keys, A /= []], mochijson2:encode({struct, [{<<"keys">>, KeysTerm}]}). + +error_out(Type, Fmt, Args, RD, Ctx) -> + {Type, + wrq:set_resp_header( + "Content-Type", "text/plain", wrq:append_to_response_body( + flat_format(Fmt, Args), RD)), + Ctx}. + +-spec handle_error(atom()|tuple(), #wm_reqdata{}, #ctx{}) -> {tuple(), #wm_reqdata{}, #ctx{}}. +handle_error(Error, RD, Ctx) -> + case Error of + {riak_client_error, Reason} -> + error_out(false, + "Unable to connect to Riak: ~p", [Reason], RD, Ctx); + insecure_connection -> + error_out({halt, 426}, + "Security is enabled and Riak does not" + " accept credentials over HTTP. Try HTTPS instead.", [], RD, Ctx); + {unsupported_version, BadVersion} -> + error_out({halt, 412}, + "Unsupported API version ~s", [BadVersion], RD, Ctx); + {not_permitted, Table} -> + error_out({halt, 401}, + "Access to table ~ts not allowed", [Table], RD, Ctx); + {no_such_table, Table} -> + error_out({halt, 404}, + "Table \"~ts\" does not exist", [Table], RD, Ctx) + end. + flat_format(Format, Args) -> lists:flatten(io_lib:format(Format, Args)). + +utf8_to_binary(S) -> + unicode:characters_to_binary(S, utf8, utf8). diff --git a/src/riak_kv_wm_timeseries_query.erl b/src/riak_kv_wm_timeseries_query.erl index ab771c6073..3ba77d937e 100644 --- a/src/riak_kv_wm_timeseries_query.erl +++ b/src/riak_kv_wm_timeseries_query.erl @@ -61,7 +61,6 @@ security, %% security context client, %% riak_client() - the store client riak, %% local | {node(), atom()} - params for riak client - table :: undefined | binary(), query :: undefined | string(), compiled_query :: undefined | #ddl_v1{} | #riak_sql_describe_v1{} | ?SQL_SELECT{}, result :: undefined | ok | {Headers::[binary()], Rows::[ts_rec()]} | @@ -71,7 +70,7 @@ -define(DEFAULT_TIMEOUT, 60000). -define(TABLE_ACTIVATE_WAIT, 30). %% wait until table's bucket type is activated --define(CB_RV_SPEC, {boolean(), #wm_reqdata{}, #ctx{}}). +-define(CB_RV_SPEC, {boolean()|atom()|tuple(), #wm_reqdata{}, #ctx{}}). -type ts_rec() :: [riak_pb_ts_codec:ldbvalue()]. @@ -96,36 +95,24 @@ service_available(RD, Ctx = #ctx{riak = RiakProps}) -> {true, RD, Ctx#ctx{api_version = wrq:path_info(api_version, RD), method = wrq:method(RD), - client = C, - table = - case wrq:path_info(table, RD) of - undefined -> undefined; - B -> list_to_binary(riak_kv_wm_utils:maybe_decode_uri(RD, B)) - end + client = C }}; - Error -> - {false, wrq:set_resp_body( - flat_format("Unable to connect to Riak: ~p", [Error]), - wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), - Ctx} + {error, Reason} -> + handle_error({riak_client_error, Reason}, RD, Ctx) end. -is_authorized(ReqData, Ctx) -> - case riak_api_web_security:is_authorized(ReqData) of +is_authorized(RD, Ctx) -> + case riak_api_web_security:is_authorized(RD) of false -> - {"Basic realm=\"Riak\"", ReqData, Ctx}; + {"Basic realm=\"Riak\"", RD, Ctx}; {true, SecContext} -> - {true, ReqData, Ctx#ctx{security = SecContext}}; + {true, RD, Ctx#ctx{security = SecContext}}; insecure -> %% XXX 301 may be more appropriate here, but since the http and %% https port are different and configurable, it is hard to figure %% out the redirect URL to serve. - {{halt, 426}, - wrq:append_to_resp_body( - <<"Security is enabled and " - "Riak does not accept credentials over HTTP. Try HTTPS instead.">>, ReqData), - Ctx} + handle_error(insecure_connection, RD, Ctx) end. @@ -135,7 +122,8 @@ forbidden(RD, Ctx) -> true -> {true, RD, Ctx}; false -> - %% plug in early, and just do what it takes to do the job + %% depends on query type, we will check this later; pass + %% for now {false, RD, Ctx} end. %% Because webmachine chooses to (not) call certain callbacks @@ -168,7 +156,7 @@ preexec(RD, Ctx) -> case validate_request(RD, Ctx) of {true, RD1, Ctx1} -> case check_permissions(RD1, Ctx1) of - {false, RD2, Ctx2} -> + {true, RD2, Ctx2} -> call_api_function(RD2, Ctx2); FalseWithDetails -> FalseWithDetails @@ -189,10 +177,8 @@ validate_request(RD, Ctx) -> -spec validate_request_v1(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. validate_request_v1(RD, Ctx = #ctx{method = Method}) -> Json = extract_json(RD), - case {Method, string:tokens(wrq:path(RD), "/"), - extract_query(Json), extract_cover_context(Json)} of - {Method, ["ts", "v1", "query"], - Query, CoverContext} + case {Method, extract_query(Json), extract_cover_context(Json)} of + {Method, Query, CoverContext} when (Method == 'GET' orelse Method == 'POST') andalso is_list(Query) -> case riak_ql_parser:ql_parse( @@ -271,24 +257,24 @@ validate_ts_cover_context(_) -> -spec check_permissions(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. check_permissions(RD, Ctx = #ctx{security = undefined}) -> - {false, RD, Ctx}; + {true, RD, Ctx}; check_permissions(RD, Ctx = #ctx{security = Security, compiled_query = CompiledQry}) -> case riak_core_security:check_permission( decode_query_permissions(CompiledQry), Security) of {false, Error, _} -> handle_error( - {not_permitted, unicode:characters_to_binary(Error, utf8, utf8)}, RD, Ctx); + {not_permitted, utf8_to_binary(Error)}, RD, Ctx); _ -> - {false, RD, Ctx} + {true, RD, Ctx} end. decode_query_permissions(#ddl_v1{table = NewBucketType}) -> - {"riak_kv.ts_create_table", NewBucketType}; + {riak_kv_ts_util:api_call_to_perm(query_create_table), NewBucketType}; decode_query_permissions(?SQL_SELECT{'FROM' = Table}) -> - {"riak_kv.ts_query", Table}; + {riak_kv_ts_util:api_call_to_perm(query_select), Table}; decode_query_permissions(#riak_sql_describe_v1{'DESCRIBE' = Table}) -> - {"riak_kv.ts_describe", Table}. + {riak_kv_ts_util:api_call_to_perm(query_describe), Table}. -spec content_types_provided(#wm_reqdata{}, #ctx{}) -> @@ -458,12 +444,19 @@ error_out(Type, Fmt, Args, RD, Ctx) -> -spec handle_error(atom()|tuple(), #wm_reqdata{}, #ctx{}) -> {tuple(), #wm_reqdata{}, #ctx{}}. handle_error(Error, RD, Ctx) -> case Error of + {riak_client_error, Reason} -> + error_out(false, + "Unable to connect to Riak: ~p", [Reason], RD, Ctx); + insecure_connection -> + error_out({halt, 426}, + "Security is enabled and Riak does not" + " accept credentials over HTTP. Try HTTPS instead.", [], RD, Ctx); {unsupported_version, BadVersion} -> error_out({halt, 412}, "Unsupported API version ~s", [BadVersion], RD, Ctx); {not_permitted, Table} -> error_out({halt, 401}, - "Access to table ~s not allowed", [Table], RD, Ctx); + "Access to table ~ts not allowed", [Table], RD, Ctx); {malformed_request, Method} -> error_out({halt, 400}, "Malformed ~s request", [Method], RD, Ctx); @@ -498,3 +491,6 @@ handle_error(Error, RD, Ctx) -> flat_format(Format, Args) -> lists:flatten(io_lib:format(Format, Args)). + +utf8_to_binary(S) -> + unicode:characters_to_binary(S, utf8, utf8). From 70d10a28736b14cb60e6ce5e903db4966d13c16e Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Tue, 1 Mar 2016 18:00:15 +0200 Subject: [PATCH 093/122] dispatch all api_call branches per method, from one validate_request fun Also, move around the check for no_such_table, and keep Mod and DDL once found. --- src/riak_kv_wm_timeseries.erl | 130 ++++++++++++++++------------------ 1 file changed, 62 insertions(+), 68 deletions(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 3ba7289db1..591706aa1b 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -67,6 +67,8 @@ riak, %% local | {node(), atom()} - params for riak client api_call :: undefined|get|put|delete, table :: undefined | binary(), + mod :: undefined | module(), + ddl :: undefined | #ddl_v1{}, %% data in/out: the following fields are either %% extracted from the JSON/path elements that came in %% the request body in case of a PUT, or filled out by @@ -189,47 +191,41 @@ validate_request(RD, Ctx) -> handle_error({unsupported_version, BadVersion}, RD, Ctx) end. --spec validate_request_v1(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -validate_request_v1(RD, Ctx) -> - case wrq:path_tokens(RD) of - KeysInUrl - when KeysInUrl /= [] -> - KeysInUrlUnquoted = lists:map(fun mochiweb_util:unquote/1, KeysInUrl), - validate_request_v1_with(KeysInUrlUnquoted, RD, Ctx); - _ -> - validate_request_v1_with(json, RD, Ctx) - end. - --spec validate_request_v1_with(json | [string()], #wm_reqdata{}, #ctx{}) -> - ?CB_RV_SPEC. -validate_request_v1_with(json, RD, Ctx = #ctx{method = Method}) -> +-spec validate_request_v1(#wm_reqdata{}, #ctx{}) -> + ?CB_RV_SPEC. +validate_request_v1(RD, Ctx = #ctx{method = 'POST'}) -> Json = extract_json(RD), - case {Method, extract_data(Json)} of - %% batch put - {'POST', Data} - when Data /= undefined -> + case extract_data(Json) of + Data when Data /= undefined -> valid_params( RD, Ctx#ctx{api_version = "v1", api_call = put, data = Data}); _Invalid -> - handle_error({malformed_request, Method}, RD, Ctx) + handle_error({malformed_request, 'POST'}, RD, Ctx) end; -validate_request_v1_with(KeysInUrl, RD, Ctx = #ctx{method = Method, - table = Table}) -> - case {Method, path_elements_to_key(Table, KeysInUrl)} of - {'GET', {ok, Key}} -> +validate_request_v1(RD, Ctx = #ctx{method = 'GET', table = Table, + mod = Mod, ddl = DDL}) -> + KeysInUrl = lists:map(fun mochiweb_util:unquote/1, wrq:path_tokens(RD)), + case path_elements_to_key(Table, KeysInUrl, Mod, DDL) of + {ok, Key} -> valid_params( RD, Ctx#ctx{api_version = "v1", api_call = get, key = Key}); - {'DELETE', {ok, Key}} -> + {error, Reason} -> + handle_error(Reason, RD, Ctx) + end; + +validate_request_v1(RD, Ctx = #ctx{method = 'DELETE', table = Table, + mod = Mod, ddl = DDL}) -> + KeysInUrl = lists:map(fun mochiweb_util:unquote/1, wrq:path_tokens(RD)), + case path_elements_to_key(Table, KeysInUrl, Mod, DDL) of + {ok, Key} -> valid_params( RD, Ctx#ctx{api_version = "v1", api_call = delete, key = Key}); - {_, {error, Reason}} -> - handle_error(Reason, RD, Ctx); - {BadMethod, _} -> - handle_error({url_key_bad_method, BadMethod}, RD, Ctx) + {error, Reason} -> + handle_error(Reason, RD, Ctx) end. extract_json(RD) -> @@ -247,7 +243,7 @@ extract_json(RD) -> extract_data(Json) -> try mochijson2:decode(Json) of {struct, Decoded} when is_list(Decoded) -> - %% key and data (it's a put) + %% (columns and) data for put validate_ts_records( proplists:get_value(<<"data">>, Decoded)) catch @@ -288,14 +284,12 @@ validate_ts_records(_) -> %% [{K1, V1}, {K2, V2}]), check with Table's DDL to make sure keys are %% correct and values are of (convertible to) appropriate types, and %% return the KV list --spec path_elements_to_key(binary(), [string()]) -> +-spec path_elements_to_key(binary(), [string()], module(), #ddl_v1{}) -> {ok, [{string(), riak_pb_ts_codec:ldbvalue()}]} | {error, atom()|tuple()}. -path_elements_to_key(Table, PEList) -> - Mod = riak_ql_ddl:make_module_name(Table), +path_elements_to_key(Table, PEList, Mod, + #ddl_v1{local_key = #key_v1{ast = LK}}) -> try - DDL = Mod:get_ddl(), - #ddl_v1{local_key = #key_v1{ast = LK}} = DDL, TableKeyLength = length(LK), if TableKeyLength * 2 == length(PEList) -> %% values with field names: "f1/v1/f2/v2/f3/v3" @@ -306,7 +300,7 @@ path_elements_to_key(Table, PEList) -> || {K, V} <- empair(PEList, [])], %% 2. possibly reorder field-value pairs to match the LK order OrderedKeyValues = - ensure_lk_order_and_strip(DDL, FVList), + ensure_lk_order_and_strip(LK, FVList), {ok, OrderedKeyValues}; TableKeyLength == length(PEList) -> %% bare values: "v1/v2/v3" @@ -322,8 +316,6 @@ path_elements_to_key(Table, PEList) -> {error, url_unpaired_keys} end catch - error:undef -> - {error, {no_such_table, Table}}; throw:ConvertFailed -> {error, ConvertFailed} end. @@ -333,9 +325,9 @@ empair([K, V | T], Q) -> empair(T, [{K, V}|Q]). convert_fv(Table, Mod, FieldRaw, V) -> Field = [list_to_binary(X) || X <- string:tokens(FieldRaw, ".")], - try - case Mod:is_field_valid(Field) of - true -> + case Mod:is_field_valid(Field) of + true -> + try case Mod:get_field_type(Field) of varchar -> {Field, list_to_binary(V)}; @@ -356,17 +348,17 @@ convert_fv(Table, Mod, FieldRaw, V) -> GoodValue -> {Field, GoodValue} end - end; + end + catch + error:badarg -> + %% rethrow with key, for more informative reporting + throw({url_key_bad_value, Table, Field}); false -> throw({url_key_bad_key, Table, Field}) end - catch - error:badarg -> - %% rethrow with key, for more informative reporting - throw({url_key_bad_value, Table, Field}) end. -ensure_lk_order_and_strip(#ddl_v1{local_key = #key_v1{ast = LK}}, FVList) -> +ensure_lk_order_and_strip(LK, FVList) -> [proplists:get_value(F, FVList) || #param_v1{name = F} <- LK]. @@ -387,7 +379,7 @@ valid_params(RD, Ctx) -> -spec check_permissions(#wm_reqdata{}, #ctx{}) -> {term(), #wm_reqdata{}, #ctx{}}. check_permissions(RD, Ctx = #ctx{security = undefined}) -> - validate_resource(RD, Ctx); + {true, RD, Ctx}; check_permissions(RD, Ctx = #ctx{security = Security, api_call = Call, table = Table}) -> @@ -397,18 +389,7 @@ check_permissions(RD, Ctx = #ctx{security = Security, handle_error( {not_permitted, utf8_to_binary(Error)}, RD, Ctx); _ -> - validate_resource(RD, Ctx) - end. - - --spec validate_resource(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -validate_resource(RD, Ctx = #ctx{table = Table}) -> - %% Ensure the bucket type exists, otherwise 404 early. - case riak_kv_wm_utils:bucket_type_exists(Table) of - true -> - {true, RD, Ctx}; - false -> - handle_error({no_such_table, Table}, RD, Ctx) + {true, RD, Ctx} end. @@ -435,12 +416,14 @@ content_types_accepted(RD, Ctx) -> -spec resource_exists(#wm_reqdata{}, #ctx{}) -> {boolean(), #wm_reqdata{}, #ctx{}}. -resource_exists(RD0, Ctx0) -> - case preexec(RD0, Ctx0) of - {true, RD, Ctx} -> - call_api_function(RD, Ctx); - FalseWithDetails -> - FalseWithDetails +resource_exists(RD, Ctx = #ctx{table = Table}) -> + Mod = riak_ql_ddl:make_module_name(Table), + try + DDL = Mod:get_ddl(), + {true, RD, Ctx#ctx{mod = Mod, ddl = DDL}} + catch + error:undef -> + handle_error({no_such_table, Table}, RD, Ctx) end. -spec process_post(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. @@ -488,13 +471,12 @@ call_api_function(RD, Ctx = #ctx{api_call = put, end; call_api_function(RD, Ctx0 = #ctx{api_call = get, - table = Table, key = Key, + table = Table, key = Key, mod = Mod, timeout = Timeout}) -> Options = if Timeout == undefined -> []; true -> [{timeout, Timeout}] end, - Mod = riak_ql_ddl:make_module_name(Table), case riak_kv_ts_api:get_data(Key, Table, Mod, Options) of {ok, Record} -> {ColumnNames, Row} = lists:unzip(Record), @@ -516,12 +498,12 @@ call_api_function(RD, Ctx0 = #ctx{api_call = get, call_api_function(RD, Ctx = #ctx{api_call = delete, table = Table, key = Key, + mod = Mod, timeout = Timeout}) -> Options = if Timeout == undefined -> []; true -> [{timeout, Timeout}] end, - Mod = riak_ql_ddl:make_module_name(Table), case riak_kv_ts_api:delete_data(Key, Table, Mod, Options) of ok -> prepare_data_in_body(RD, Ctx#ctx{result = ok}); @@ -540,6 +522,18 @@ prepare_data_in_body(RD0, Ctx0) -> -spec produce_doc_body(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +produce_doc_body(RD0, Ctx0 = #ctx{result = undefined}) -> + case preexec(RD0, Ctx0) of + {true, RD1, Ctx1} -> + case call_api_function(RD1, Ctx1) of + {true, RD2, Ctx2} -> + produce_doc_body(RD2, Ctx2); + FalseWithDetails -> + FalseWithDetails + end; + FalseWithDetails -> + FalseWithDetails + end; produce_doc_body(RD, Ctx = #ctx{result = ok}) -> {<<"ok">>, RD, Ctx}; produce_doc_body(RD, Ctx = #ctx{api_call = get, From e2b71585aeb59e89dc01fa805db7af91ffaa96c3 Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Tue, 1 Mar 2016 22:34:43 +0200 Subject: [PATCH 094/122] avoid case clauses in favour of function clauses --- src/riak_kv_wm_timeseries.erl | 43 ++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 591706aa1b..1d087f7a8b 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -328,27 +328,7 @@ convert_fv(Table, Mod, FieldRaw, V) -> case Mod:is_field_valid(Field) of true -> try - case Mod:get_field_type(Field) of - varchar -> - {Field, list_to_binary(V)}; - sint64 -> - {Field, list_to_integer(V)}; - double -> - %% list_to_float("42") will fail, so - try - {Field, list_to_float(V)} - catch - error:badarg -> - {Field, float(list_to_integer(V))} - end; - timestamp -> - case list_to_integer(V) of - BadValue when BadValue < 1 -> - throw({url_key_bad_value, Table, Field}); - GoodValue -> - {Field, GoodValue} - end - end + convert_field(Table, Field, Mod:get_field_type(Field), V) catch error:badarg -> %% rethrow with key, for more informative reporting @@ -358,6 +338,27 @@ convert_fv(Table, Mod, FieldRaw, V) -> end end. +convert_field(_T, F, varchar, V) -> + {F, list_to_binary(V)}; +convert_field(_T, F, sint64, V) -> + {F, list_to_integer(V)}; +convert_field(_T, F, double, V) -> + %% list_to_float("42") will fail, so + try + {F, list_to_float(V)} + catch + error:badarg -> + {F, float(list_to_integer(V))} + end; +convert_field(T, F, timestamp, V) -> + case list_to_integer(V) of + BadValue when BadValue < 1 -> + throw({url_key_bad_value, T, F}); + GoodValue -> + {F, GoodValue} + end. + + ensure_lk_order_and_strip(LK, FVList) -> [proplists:get_value(F, FVList) || #param_v1{name = F} <- LK]. From a626a0813b37caef6bb7ab09270e35982e11ca0b Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Wed, 2 Mar 2016 01:55:48 +0200 Subject: [PATCH 095/122] encode data for batch put as is, not at "data" key in a proplist --- src/riak_kv_wm_timeseries.erl | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 1d087f7a8b..bde3e0da8b 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -242,10 +242,8 @@ extract_json(RD) -> -spec extract_data(binary()) -> term(). extract_data(Json) -> try mochijson2:decode(Json) of - {struct, Decoded} when is_list(Decoded) -> - %% (columns and) data for put - validate_ts_records( - proplists:get_value(<<"data">>, Decoded)) + Decoded when is_list(Decoded) -> + validate_ts_records(Decoded) catch _:_ -> undefined @@ -267,8 +265,6 @@ validate_ts_record(R) when is_list(R) -> validate_ts_record(_) -> undefined. -validate_ts_records(undefined) -> - undefined; validate_ts_records(RR) when is_list(RR) -> case lists:all(fun(R) -> validate_ts_record(R) /= undefined end, RR) of true -> From 41b254d71ba2168f1c66898cbcac841cb95e85b8 Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Wed, 2 Mar 2016 17:36:03 +0200 Subject: [PATCH 096/122] wm_timeseries: simplify batch put data extraction from json --- src/riak_kv_wm_timeseries.erl | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index bde3e0da8b..374c8145ef 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -194,7 +194,7 @@ validate_request(RD, Ctx) -> -spec validate_request_v1(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. validate_request_v1(RD, Ctx = #ctx{method = 'POST'}) -> - Json = extract_json(RD), + Json = binary_to_list(wrq:req_body(RD)), case extract_data(Json) of Data when Data /= undefined -> valid_params( @@ -228,18 +228,8 @@ validate_request_v1(RD, Ctx = #ctx{method = 'DELETE', table = Table, handle_error(Reason, RD, Ctx) end. -extract_json(RD) -> - case proplists:get_value("json", RD#wm_reqdata.req_qs) of - undefined -> - %% if it was a PUT or POST, data is in body - binary_to_list(wrq:req_body(RD)); - BodyInPost -> - BodyInPost - end. -%% because, techically, key and data are 'arguments', we check they -%% are well-formed, too. --spec extract_data(binary()) -> term(). +-spec extract_data([byte()]) -> undefined|any(). extract_data(Json) -> try mochijson2:decode(Json) of Decoded when is_list(Decoded) -> @@ -354,11 +344,11 @@ convert_field(T, F, timestamp, V) -> {F, GoodValue} end. - ensure_lk_order_and_strip(LK, FVList) -> [proplists:get_value(F, FVList) || #param_v1{name = F} <- LK]. + -spec valid_params(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. valid_params(RD, Ctx) -> case wrq:get_qs_value("timeout", none, RD) of @@ -374,6 +364,7 @@ valid_params(RD, Ctx) -> end end. + -spec check_permissions(#wm_reqdata{}, #ctx{}) -> {term(), #wm_reqdata{}, #ctx{}}. check_permissions(RD, Ctx = #ctx{security = undefined}) -> {true, RD, Ctx}; From 7dfed17a6b6cec40cb0ad014a692b24f3e510ca3 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Tue, 8 Mar 2016 23:17:06 +0100 Subject: [PATCH 097/122] WIP - Total rewrite of riak_kv_wm_timeseries * Full usage of webmachine callbacks. --- src/riak_kv_web.erl | 5 +- src/riak_kv_wm_timeseries.erl | 783 ++++++++++++++-------------------- 2 files changed, 317 insertions(+), 471 deletions(-) diff --git a/src/riak_kv_web.erl b/src/riak_kv_web.erl index 6ae950bfcf..ae73a8563f 100644 --- a/src/riak_kv_web.erl +++ b/src/riak_kv_web.erl @@ -125,8 +125,11 @@ raw_dispatch(Name) -> lists:flatten( [ + %% Right now we only have version 1. When we get version 2 we have to + %% decide if we want to dispatch to separate resource modules or handle + %% the different versions inside the same resource handler module. [{["ts", api_version, "tables", table, "list_keys"], riak_kv_wm_timeseries_listkeys, Props}, - {["ts", api_version, "tables", table, "keys", '*'], riak_kv_wm_timeseries, Props}, + {["ts", "v1", "tables", table, "keys", '*'], riak_kv_wm_timeseries, Props}, {["ts", api_version, "query"], riak_kv_wm_timeseries_query, Props} ] || {_Prefix, Props} <- Props2]). diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 374c8145ef..359b9fd974 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -22,6 +22,11 @@ %% @doc Resource for Riak TS operations over HTTP. %% +%% This resource is responsible for everything under +%% ``` +%% ts/v1/table/Table/keys +%% ``` +%% Specific operations supported: %% ``` %% GET /ts/v1/table/Table/keys/K1/V1/... single-key get %% DELETE /ts/v1/table/Table/keys/K1/V1/... single-key delete @@ -29,63 +34,52 @@ %% on the body %% ''' %% -%% Request body is expected to be a JSON containing key and/or value(s). -%% Response is a JSON containing data rows with column headers. +%% Request body is expected to be a JSON containing a struct or structs for the +%% POST. GET and DELETE have no body. +%% +%% Response is a JSON containing full records. %% -module(riak_kv_wm_timeseries). %% webmachine resource exports --export([ - init/1, +-export([init/1, service_available/2, - is_authorized/2, - forbidden/2, allowed_methods/2, - process_post/2, malformed_request/2, - content_types_accepted/2, - resource_exists/2, - delete_resource/2, + is_authorized/2, + forbidden/2, content_types_provided/2, + content_types_accepted/2, encodings_provided/2, - produce_doc_body/2, - accept_doc_body/2 - ]). + post_is_create/2, + process_post/2, + delete_resource/2, + resource_exists/2]). -include_lib("webmachine/include/webmachine.hrl"). -include_lib("riak_ql/include/riak_ql_ddl.hrl"). -include("riak_kv_wm_raw.hrl"). -include("riak_kv_ts.hrl"). --record(ctx, {api_version, - method :: atom(), - prefix, %% string() - prefix for resource uris - timeout, %% integer() - passed-in timeout value in ms - security, %% security context - client, %% riak_client() - the store client - riak, %% local | {node(), atom()} - params for riak client - api_call :: undefined|get|put|delete, - table :: undefined | binary(), - mod :: undefined | module(), - ddl :: undefined | #ddl_v1{}, - %% data in/out: the following fields are either - %% extracted from the JSON/path elements that came in - %% the request body in case of a PUT, or filled out by - %% retrieved values for shipping (as JSON) in response - %% body - key :: undefined | ts_rec(), %% parsed out of JSON that came in the body - data :: undefined | [ts_rec()], %% ditto - result :: undefined | ok | {Headers::[binary()], Rows::[ts_rec()]} - }). +-record(ctx, + {api_call :: 'undefined' | 'get' | 'put' | 'delete', + table :: 'undefined' | binary(), + mod :: 'undefined' | module(), + key :: 'undefined' | ts_rec(), + object, + timeout :: 'undefined' | integer(), + options, %% for the call towards riak. + prefix, + riak}). -define(DEFAULT_TIMEOUT, 60000). -define(TABLE_ACTIVATE_WAIT, 30). %% wait until table's bucket type is activated --define(CB_RV_SPEC, {boolean()|atom()|tuple(), #wm_reqdata{}, #ctx{}}). +-type cb_rv_spec(T) :: {T, #wm_reqdata{}, #ctx{}}. +-type halt() :: {'halt', 200..599} | {'error' , term()}. -type ts_rec() :: [riak_pb_ts_codec:ldbvalue()]. - -spec init(proplists:proplist()) -> {ok, #ctx{}}. %% @doc Initialize this resource. This function extracts the %% 'prefix' and 'riak' properties from the dispatch args. @@ -93,508 +87,357 @@ init(Props) -> {ok, #ctx{prefix = proplists:get_value(prefix, Props), riak = proplists:get_value(riak, Props)}}. --spec service_available(#wm_reqdata{}, #ctx{}) -> - {boolean(), #wm_reqdata{}, #ctx{}}. +-spec service_available(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean() | halt()). %% @doc Determine whether or not a connection to Riak -%% can be established. This function also takes this -%% opportunity to extract the 'table' and 'key' path -%% bindings from the dispatch. -service_available(RD, Ctx = #ctx{riak = RiakProps}) -> +%% can be established. +%% Convert the table name from the part of the URL. +service_available(RD, #ctx{riak = RiakProps}=Ctx) -> case riak_kv_wm_utils:get_riak_client( RiakProps, riak_kv_wm_utils:get_client_id(RD)) of - {ok, C} -> - {true, RD, - Ctx#ctx{api_version = wrq:path_info(api_version, RD), - method = wrq:method(RD), - client = C, - table = utf8_to_binary( - mochiweb_util:unquote( - wrq:path_info(table, RD))) - }}; + {ok, _C} -> + Table = table(RD), + Mod = riak_ql_ddl:make_module_name(Table), + {true, RD, Ctx#ctx{table=Table, mod=Mod}}; {error, Reason} -> - handle_error({riak_client_error, Reason}, RD, Ctx) + ErrorMsg = flat_format("Unable to connect to Riak: ~p", [Reason]), + Resp = set_text_resp_header(ErrorMsg, RD), + {false, Resp, Ctx} end. - -is_authorized(RD, Ctx) -> +is_authorized(RD, #ctx{table=Table}=Ctx) -> + Call = api_call(wrq:path_tokens(RD), wrq:method(RD)), case riak_api_web_security:is_authorized(RD) of false -> {"Basic realm=\"Riak\"", RD, Ctx}; {true, SecContext} -> - {true, RD, Ctx#ctx{security = SecContext}}; + case riak_core_security:check_permission( + {riak_kv_ts_util:api_call_to_perm(Call), Table}, SecContext) of + {false, Error, _} -> + {utf8_to_binary(Error), RD, Ctx}; + _ -> + {true, RD, Ctx#ctx{api_call=Call}} + end; insecure -> - handle_error(insecure_connection, RD, Ctx) + ErrorMsg = "Security is enabled and Riak does not" ++ + " accept credentials over HTTP. Try HTTPS instead.", + Resp = set_text_resp_header(ErrorMsg, RD), + {{halt, 426}, Resp, Ctx} end. - --spec forbidden(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +-spec forbidden(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). forbidden(RD, Ctx) -> - case riak_kv_wm_utils:is_forbidden(RD) of - true -> - {true, RD, Ctx}; - false -> - %%preexec(RD, Ctx) - %%validate_request(RD, Ctx) - %% plug in early, and just do what it takes to do the job - {false, RD, Ctx} - end. - + Result = riak_kv_wm_utils:is_forbidden(RD), + {Result, RD, Ctx}. --spec allowed_methods(#wm_reqdata{}, #ctx{}) -> - {[atom()], #wm_reqdata{}, #ctx{}}. -%% @doc Get the list of methods this resource supports. +-spec allowed_methods(#wm_reqdata{}, #ctx{}) -> cb_rv_spec([atom()]). allowed_methods(RD, Ctx) -> - {['GET', 'POST', 'DELETE'], RD, Ctx}. + allowed_methods(wrq:path_tokens(RD), RD, Ctx). +allowed_methods([], RD, Ctx) -> + {['POST'], RD, Ctx}; +allowed_methods(_KeyInURL, RD, Ctx) -> + {['GET', 'DELETE'], RD, Ctx}. --spec malformed_request(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -%% @doc Determine whether query parameters, request headers, -%% and request body are badly-formed. +-spec malformed_request(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). malformed_request(RD, Ctx) -> - %% this is plugged because requests are validated against - %% effective parameters contained in the body (and hence, we need - %% accept_doc_body to parse and extract things out of JSON in the - %% body) - {false, RD, Ctx}. - - --spec preexec(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -%% * collect any parameters from request body or, failing that, from -%% POST k=v items; -%% * check API version; -%% * validate those parameters against URL and method; -%% * determine which api call to do, and check permissions on that; -preexec(RD, Ctx = #ctx{api_call = Call}) - when Call /= undefined -> - %% been here, figured and executed api call, stored results for - %% shipping to client - {true, RD, Ctx}; -preexec(RD, Ctx) -> - case validate_request(RD, Ctx) of - {true, RD1, Ctx1} -> - case check_permissions(RD1, Ctx1) of - {true, RD2, Ctx2} -> - call_api_function(RD2, Ctx2); - FalseWithDetails -> - FalseWithDetails - end; - FalseWithDetails -> - FalseWithDetails - end. - --spec validate_request(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -validate_request(RD, Ctx) -> - case wrq:path_info(api_version, RD) of - "v1" -> - validate_request_v1(RD, Ctx); - BadVersion -> - handle_error({unsupported_version, BadVersion}, RD, Ctx) - end. - --spec validate_request_v1(#wm_reqdata{}, #ctx{}) -> - ?CB_RV_SPEC. -validate_request_v1(RD, Ctx = #ctx{method = 'POST'}) -> - Json = binary_to_list(wrq:req_body(RD)), - case extract_data(Json) of - Data when Data /= undefined -> - valid_params( - RD, Ctx#ctx{api_version = "v1", api_call = put, - data = Data}); - _Invalid -> - handle_error({malformed_request, 'POST'}, RD, Ctx) - end; - -validate_request_v1(RD, Ctx = #ctx{method = 'GET', table = Table, - mod = Mod, ddl = DDL}) -> - KeysInUrl = lists:map(fun mochiweb_util:unquote/1, wrq:path_tokens(RD)), - case path_elements_to_key(Table, KeysInUrl, Mod, DDL) of - {ok, Key} -> - valid_params( - RD, Ctx#ctx{api_version = "v1", api_call = get, - key = Key}); - {error, Reason} -> - handle_error(Reason, RD, Ctx) - end; - -validate_request_v1(RD, Ctx = #ctx{method = 'DELETE', table = Table, - mod = Mod, ddl = DDL}) -> - KeysInUrl = lists:map(fun mochiweb_util:unquote/1, wrq:path_tokens(RD)), - case path_elements_to_key(Table, KeysInUrl, Mod, DDL) of - {ok, Key} -> - valid_params( - RD, Ctx#ctx{api_version = "v1", api_call = delete, - key = Key}); - {error, Reason} -> - handle_error(Reason, RD, Ctx) + try + Ctx2 = extract_params(wrq:req_qs(RD), Ctx), + malformed_request(wrq:path_tokens(RD), RD, Ctx2) + catch + throw:ParameterError -> + ErrorMsg = flat_format("parameter error: ~p", [ParameterError]), + Resp = set_text_resp_header(ErrorMsg, RD), + {true, Resp, Ctx} end. - --spec extract_data([byte()]) -> undefined|any(). -extract_data(Json) -> - try mochijson2:decode(Json) of - Decoded when is_list(Decoded) -> - validate_ts_records(Decoded) +malformed_request([], RD, Ctx) -> + %% NOTE: if the supplied JSON body is wrong a malformed requset may be + %% issued later. + %% @todo: should the validation of the JSON happen here??? + {false, RD, Ctx}; +malformed_request(KeyInUrl, RD, Ctx) when length(KeyInUrl) rem 2 == 0 -> + {false, RD, Ctx}; +malformed_request(_, RD, Ctx) -> + {true, RD, Ctx}. + +-spec extract_params([{string(), string()}], #ctx{}) -> #ctx{} . +%% @doc right now we only allow a timeout parameter or nothing. +extract_params([], Ctx) -> + Ctx#ctx{options=[]}; +extract_params([{"timeout", TimeoutStr}], Ctx) -> + try + Timeout = list_to_integer(TimeoutStr), + Ctx#ctx{timeout = Timeout, + options = [{timeout, Timeout}]} catch _:_ -> - undefined - end. + throw(flat_format("timeout not an integer value: ~s", [TimeoutStr])) + end; +extract_params(Params, _Ctx) -> + throw(flat_format("incorrect paramters: ~p", [Params])). +-spec content_types_provided(#wm_reqdata{}, #ctx{}) -> cb_rv_spec([{string(), atom()}]). +content_types_provided(RD, Ctx) -> + {[{"application/json", to_json}], + RD, Ctx}. -validate_ts_record(undefined) -> - undefined; -validate_ts_record(R) when is_list(R) -> - case lists:all( - %% check that all list elements are TS types - fun(X) -> is_integer(X) orelse is_float(X) orelse is_binary(X) end, - R) of +-spec content_types_accepted(#wm_reqdata{}, #ctx{}) -> cb_rv_spec([{string(), atom()}]). +content_types_accepted(RD, Ctx) -> + content_types_accepted(wrq:path_tokens(RD), RD, Ctx). + +content_types_accepted([], RD, Ctx) -> + %% the JSON in the POST will be handled by process_post, + %% so this handler will never be called. + {[{"application/json", undefined}], RD, Ctx}; +content_types_accepted(_, RD, Ctx) -> + {[], RD, Ctx}. + +-spec resource_exists(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean() | halt()). +resource_exists(RD, #ctx{mod=Mod} = Ctx) -> + try table_module_exists(Mod) of true -> - R; + Path = wrq:path_tokens(RD), + Key = validate_key(Path, Mod), + resource_exists(Path, wrq:method(RD), RD, Ctx#ctx{key=Key}); false -> - undefined - end; -validate_ts_record(_) -> - undefined. + Resp = set_error_message("table ~p not created", [Mod], RD), + {false, Resp, Ctx} + catch + throw:{key_problem, Reason} -> + Resp = set_error_message("wrong path to element: ~p", [Reason], RD), + {{halt, 400}, Resp, Ctx} + end. -validate_ts_records(RR) when is_list(RR) -> - case lists:all(fun(R) -> validate_ts_record(R) /= undefined end, RR) of - true -> - RR; - false -> - undefined - end; -validate_ts_records(_) -> - undefined. +validate_key(Path, Mod) -> + UnquotedPath = lists:map(fun mochiweb_util:unquote/1, Path), + FVList = path_elements_to_key(Mod, UnquotedPath), + ensure_lk_order_and_strip(Mod, FVList). +resource_exists([], 'POST', RD, Ctx) -> + {true, RD, Ctx}; +resource_exists(Path, 'GET', RD, + #ctx{table=Table, + mod=Mod, + key=Key, + options=Options}=Ctx) -> + %% Would be nice if something cheaper than using get_data existed to check + %% if a key is present. + try riak_kv_ts_util:get_data(Key, Table, Mod, Options) of + {ok, Record} -> + {true, RD, Ctx#ctx{object=Record}}; + {error, Reason} -> + Resp = set_error_message("Internal error: ~p", Reason, RD), + {{halt, 500}, Resp, Ctx} + catch + _:Reason -> + Resp = set_error_message("lookup on ~p failed due to ~p", + [Path, Reason], + RD), + {false, Resp, Ctx} + end; +resource_exists(_Path, 'DELETE', RD, Ctx) -> + %% Since reading the object is expensive we will assume for now that the + %% object exists for a delete, but if it turns out that it does not then the + %% processing of the delete will return 404 at that point. + {true, RD, Ctx}. %% extract keys from path elements in the URL (.../K1/V1/K2/V2 -> %% [{K1, V1}, {K2, V2}]), check with Table's DDL to make sure keys are %% correct and values are of (convertible to) appropriate types, and %% return the KV list --spec path_elements_to_key(binary(), [string()], module(), #ddl_v1{}) -> - {ok, [{string(), riak_pb_ts_codec:ldbvalue()}]} | - {error, atom()|tuple()}. -path_elements_to_key(Table, PEList, Mod, - #ddl_v1{local_key = #key_v1{ast = LK}}) -> +%% @private +-spec path_elements_to_key(module(), [string()]) -> + [{string(), riak_pb_ts_codec:ldbvalue()}]. +path_elements_to_key(_Mod, []) -> + []; +path_elements_to_key(Mod, [F,V|Rest]) -> + [convert_fv(Mod, F, V)|path_elements_to_key(Mod, Rest)]. + +%% @private +convert_fv(Mod, FieldRaw, V) -> + Field = [list_to_binary(X) || X <- string:tokens(FieldRaw, ".")], try - TableKeyLength = length(LK), - if TableKeyLength * 2 == length(PEList) -> - %% values with field names: "f1/v1/f2/v2/f3/v3" - %% 1. check that supplied key fields exist and values - %% supplied are convertible to their types - FVList = - [convert_fv(Table, Mod, K, V) - || {K, V} <- empair(PEList, [])], - %% 2. possibly reorder field-value pairs to match the LK order - OrderedKeyValues = - ensure_lk_order_and_strip(LK, FVList), - {ok, OrderedKeyValues}; - TableKeyLength == length(PEList) -> - %% bare values: "v1/v2/v3" - %% 1. retrieve field values from the DDL - Fields = [F || #param_v1{name = F} <- LK], - FVList = - [convert_fv(Table, Mod, K, V) - || {K, V} <- lists:zip(Fields, PEList)], - {_, OrderedKeyValues} = - lists:unzip(FVList), - {ok, OrderedKeyValues}; - el/=se -> - {error, url_unpaired_keys} - end + true = Mod:is_field_valid(Field), + convert_field_value(Mod:get_field_type(Field), V) catch - throw:ConvertFailed -> - {error, ConvertFailed} - end. - -empair([], Q) -> lists:reverse(Q); -empair([K, V | T], Q) -> empair(T, [{K, V}|Q]). - -convert_fv(Table, Mod, FieldRaw, V) -> - Field = [list_to_binary(X) || X <- string:tokens(FieldRaw, ".")], - case Mod:is_field_valid(Field) of - true -> - try - convert_field(Table, Field, Mod:get_field_type(Field), V) - catch - error:badarg -> - %% rethrow with key, for more informative reporting - throw({url_key_bad_value, Table, Field}); - false -> - throw({url_key_bad_key, Table, Field}) - end + _:_ -> + throw({url_key_bad_value, Field}) end. -convert_field(_T, F, varchar, V) -> - {F, list_to_binary(V)}; -convert_field(_T, F, sint64, V) -> - {F, list_to_integer(V)}; -convert_field(_T, F, double, V) -> - %% list_to_float("42") will fail, so +%% @private +convert_field_value(varchar, V) -> + list_to_binary(V); +convert_field_value(sint64, V) -> + list_to_integer(V); +convert_field_value(double, V) -> try - {F, list_to_float(V)} + list_to_float(V) catch error:badarg -> - {F, float(list_to_integer(V))} + float(list_to_integer(V)) end; -convert_field(T, F, timestamp, V) -> +convert_field_value(timestamp, V) -> case list_to_integer(V) of - BadValue when BadValue < 1 -> - throw({url_key_bad_value, T, F}); - GoodValue -> - {F, GoodValue} + GoodValue when GoodValue > 0 -> + GoodValue; + _ -> + throw(url_key_bad_value) end. + +%% validate_ts_record(undefined) -> +%% undefined; +%% validate_ts_record(R) when is_list(R) -> +%% case lists:all( +%% %% check that all list elements are TS types +%% fun(X) -> is_integer(X) orelse is_float(X) orelse is_binary(X) end, +%% R) of +%% true -> +%% R; +%% false -> +%% undefined +%% end; +%% validate_ts_record(_) -> +%% undefined. + +%% validate_ts_records(RR) when is_list(RR) -> +%% case lists:all(fun(R) -> validate_ts_record(R) /= undefined end, RR) of +%% true -> +%% RR; +%% false -> +%% undefined +%% end; +%% validate_ts_records(_) -> +%% undefined. + ensure_lk_order_and_strip(LK, FVList) -> [proplists:get_value(F, FVList) || #param_v1{name = F} <- LK]. --spec valid_params(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -valid_params(RD, Ctx) -> - case wrq:get_qs_value("timeout", none, RD) of - none -> - {true, RD, Ctx}; - TimeoutStr -> - try - Timeout = list_to_integer(TimeoutStr), - {true, RD, Ctx#ctx{timeout = Timeout}} - catch - _:_ -> - handle_error({bad_parameter, "timeout"}, RD, Ctx) - end - end. - - --spec check_permissions(#wm_reqdata{}, #ctx{}) -> {term(), #wm_reqdata{}, #ctx{}}. -check_permissions(RD, Ctx = #ctx{security = undefined}) -> - {true, RD, Ctx}; -check_permissions(RD, Ctx = #ctx{security = Security, - api_call = Call, - table = Table}) -> - case riak_core_security:check_permission( - {riak_kv_ts_util:api_call_to_perm(Call), Table}, Security) of - {false, Error, _} -> - handle_error( - {not_permitted, utf8_to_binary(Error)}, RD, Ctx); - _ -> - {true, RD, Ctx} - end. - - --spec content_types_provided(#wm_reqdata{}, #ctx{}) -> - {[{ContentType::string(), Producer::atom()}], - #wm_reqdata{}, #ctx{}}. -content_types_provided(RD, Ctx) -> - {[{"application/json", produce_doc_body}], RD, Ctx}. - - -spec encodings_provided(#wm_reqdata{}, #ctx{}) -> {[{Encoding::string(), Producer::function()}], #wm_reqdata{}, #ctx{}}. encodings_provided(RD, Ctx) -> {riak_kv_wm_utils:default_encodings(), RD, Ctx}. - --spec content_types_accepted(#wm_reqdata{}, #ctx{}) -> - {[{ContentType::string(), Acceptor::atom()}], - #wm_reqdata{}, #ctx{}}. -content_types_accepted(RD, Ctx) -> - {[{"application/json", accept_doc_body}], RD, Ctx}. - - --spec resource_exists(#wm_reqdata{}, #ctx{}) -> - {boolean(), #wm_reqdata{}, #ctx{}}. -resource_exists(RD, Ctx = #ctx{table = Table}) -> - Mod = riak_ql_ddl:make_module_name(Table), - try - DDL = Mod:get_ddl(), - {true, RD, Ctx#ctx{mod = Mod, ddl = DDL}} +-spec table_module_exists(module()) -> boolean(). +table_module_exists(Mod) -> + try Mod:get_dll() of + #ddl_v1{} -> + true catch - error:undef -> - handle_error({no_such_table, Table}, RD, Ctx) - end. - --spec process_post(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -%% @doc Pass through requests to allow POST to function -%% as PUT for clients that do not support PUT. -process_post(RD, Ctx) -> - accept_doc_body(RD, Ctx). - --spec delete_resource(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -%% same for DELETE -delete_resource(RD, Ctx) -> - accept_doc_body(RD, Ctx). - --spec accept_doc_body(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -accept_doc_body(RD0, Ctx0) -> - case preexec(RD0, Ctx0) of - {true, RD, Ctx} -> - call_api_function(RD, Ctx); - FalseWithDetails -> - FalseWithDetails + _:_ -> + false end. --spec call_api_function(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -call_api_function(RD, Ctx = #ctx{result = Result}) - when Result /= undefined -> - lager:debug("Function already executed", []), - {true, RD, Ctx}; -call_api_function(RD, Ctx = #ctx{api_call = put, - table = Table, data = Data}) -> - Mod = riak_ql_ddl:make_module_name(Table), - %% convert records to tuples, just for put - Records = [list_to_tuple(R) || R <- Data], - case riak_kv_ts_util:validate_rows(Mod, Records) of - [] -> - case riak_kv_ts_api:put_data(Records, Table, Mod) of - ok -> - prepare_data_in_body(RD, Ctx#ctx{result = ok}); - {error, {some_failed, ErrorCount}} -> - handle_error({failed_some_puts, ErrorCount, Table}, RD, Ctx); - {error, no_ctype} -> - handle_error({no_such_table, Table}, RD, Ctx) - end; - BadRowIdxs when is_list(BadRowIdxs) -> - handle_error({invalid_data, BadRowIdxs}, RD, Ctx) - end; +-spec post_is_create(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). +post_is_create(RD, Ctx) -> + {false, RD, Ctx}. -call_api_function(RD, Ctx0 = #ctx{api_call = get, - table = Table, key = Key, mod = Mod, - timeout = Timeout}) -> - Options = - if Timeout == undefined -> []; - true -> [{timeout, Timeout}] - end, - case riak_kv_ts_api:get_data(Key, Table, Mod, Options) of - {ok, Record} -> - {ColumnNames, Row} = lists:unzip(Record), - %% ColumnTypes = riak_kv_ts_util:get_column_types(ColumnNames, Mod), - %% We don't need column types here as well (for the PB interface, we - %% needed them in order to properly construct tscells) - DataOut = {ColumnNames, [Row]}, - %% all results (from get as well as query) are returned in - %% a uniform 'tabular' form, hence the [] around Row - Ctx = Ctx0#ctx{result = DataOut}, - prepare_data_in_body(RD, Ctx); - {error, notfound} -> - handle_error(notfound, RD, Ctx0); - {error, {bad_key_length, Got, Need}} -> - handle_error({key_element_count_mismatch, Got, Need}, RD, Ctx0); - {error, Reason} -> - handle_error({riak_error, Reason}, RD, Ctx0) - end; +-spec process_post(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). +process_post(RD, #ctx{mod=Mod, + table=Table}=Ctx) -> + try extract_data(RD) of + Data -> + Records = [list_to_tuple(R) || R <- Data], + case riak_kv_ts_util:validate_rows(Mod, Records) of + [] -> + case riak_kv_ts_api:put_data(Records, Table, Mod) of + ok -> + Json = result_to_json(ok), + Resp = set_json_response(Json, RD), + {true, Resp, Ctx}; + {error, {some_failed, ErrorCount}} -> + Resp = set_error_message("failed some puts ~p ~p", + [ErrorCount, Table], + RD), + {{halt, 400}, Resp, Ctx} + end; + BadRowIdxs when is_list(BadRowIdxs) -> + Resp = set_error_message("invalid data: ~p", + [BadRowIdxs], + RD), + {{halt, 400}, Resp, Ctx} + end + catch + throw:{data_problem,Reason} -> + Resp = set_error_message("wrong body: ~p", Reason, RD), + {{halt, 400}, Resp, Ctx} + end. -call_api_function(RD, Ctx = #ctx{api_call = delete, - table = Table, key = Key, - mod = Mod, - timeout = Timeout}) -> - Options = - if Timeout == undefined -> []; - true -> [{timeout, Timeout}] - end, - case riak_kv_ts_api:delete_data(Key, Table, Mod, Options) of +-spec delete_resource(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()|halt()). +delete_resource(RD, #ctx{table=Table, + mod=Mod, + key=Key, + options=Options}=Ctx) -> + try riak_kv_ts_api:delete_data(Key, Table, Mod, Options) of ok -> - prepare_data_in_body(RD, Ctx#ctx{result = ok}); - {error, {bad_key_length, Got, Need}} -> - handle_error({key_element_count_mismatch, Got, Need}, RD, Ctx); + Json = result_to_json(ok), + Resp = set_json_response(Json, RD), + {true, Resp, Ctx}; {error, notfound} -> - handle_error(notfound, RD, Ctx); - {error, Reason} -> - handle_error({riak_error, Reason}, RD, Ctx) + Resp = set_error_message("object not found", [], RD), + {{halt, 404}, Resp, Ctx} + catch + _:Reason -> + Resp = set_error_message("Internal error: ~p", Reason, RD), + {{halt, 500}, Resp, Ctx} end. - -prepare_data_in_body(RD0, Ctx0) -> - {Json, RD1, Ctx1} = produce_doc_body(RD0, Ctx0), - {true, wrq:append_to_response_body(Json, RD1), Ctx1}. - - --spec produce_doc_body(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -produce_doc_body(RD0, Ctx0 = #ctx{result = undefined}) -> - case preexec(RD0, Ctx0) of - {true, RD1, Ctx1} -> - case call_api_function(RD1, Ctx1) of - {true, RD2, Ctx2} -> - produce_doc_body(RD2, Ctx2); - FalseWithDetails -> - FalseWithDetails - end; - FalseWithDetails -> - FalseWithDetails - end; -produce_doc_body(RD, Ctx = #ctx{result = ok}) -> - {<<"ok">>, RD, Ctx}; -produce_doc_body(RD, Ctx = #ctx{api_call = get, - result = {Columns, Rows}}) -> - {mochijson2:encode( - {struct, [{<<"columns">>, Columns}, - {<<"rows">>, Rows}]}), - RD, Ctx}. - - -error_out(Type, Fmt, Args, RD, Ctx) -> - {Type, - wrq:set_resp_header( - "Content-Type", "text/plain", wrq:append_to_response_body( - flat_format(Fmt, Args), RD)), - Ctx}. - --spec handle_error(atom()|tuple(), #wm_reqdata{}, #ctx{}) -> {tuple(), #wm_reqdata{}, #ctx{}}. -handle_error(Error, RD, Ctx) -> - case Error of - {riak_client_error, Reason} -> - error_out(false, - "Unable to connect to Riak: ~p", [Reason], RD, Ctx); - insecure_connection -> - error_out({halt, 426}, - "Security is enabled and Riak does not" - " accept credentials over HTTP. Try HTTPS instead.", [], RD, Ctx); - {unsupported_version, BadVersion} -> - error_out({halt, 412}, - "Unsupported API version ~s", [BadVersion], RD, Ctx); - {not_permitted, Table} -> - error_out({halt, 401}, - "Access to table ~ts not allowed", [Table], RD, Ctx); - {malformed_request, Method} -> - error_out({halt, 400}, - "Malformed ~s request", [Method], RD, Ctx); - {url_key_bad_method, Method} -> - error_out({halt, 400}, - "Inappropriate ~s request", [Method], RD, Ctx); - {bad_parameter, Param} -> - error_out({halt, 400}, - "Bad value for parameter \"~s\"", [Param], RD, Ctx); - {no_such_table, Table} -> - error_out({halt, 404}, - "Table \"~ts\" does not exist", [Table], RD, Ctx); - {failed_some_puts, NoOfFailures, Table} -> - error_out({halt, 400}, - "Failed to put ~b records to table \"~ts\"", [NoOfFailures, Table], RD, Ctx); - {invalid_data, BadRowIdxs} -> - error_out({halt, 400}, - "Invalid record #~s", [hd(BadRowIdxs)], RD, Ctx); - {key_element_count_mismatch, Got, Need} -> - error_out({halt, 400}, - "Incorrect number of elements (~b) for key of length ~b", [Need, Got], RD, Ctx); - {url_key_bad_key, Table, Key} -> - error_out({halt, 400}, - "Table \"~ts\" has no field named \"~s\"", [Table, Key], RD, Ctx); - {url_key_bad_value, Table, Key} -> - error_out({halt, 400}, - "Bad value for field \"~s\" in table \"~ts\"", [Key, Table], RD, Ctx); - url_unpaired_keys -> - error_out({halt, 400}, - "Unpaired field/value for key spec in URL", [], RD, Ctx); - notfound -> - error_out({halt, 404}, - "Key not found", [], RD, Ctx); - {riak_error, Detailed} -> - error_out({halt, 500}, - "Internal riak error: ~p", [Detailed], RD, Ctx) +extract_data(RD) -> + try + JsonStr = binary_to_list(wrq:req_body(RD)), + mochijson2:decode(JsonStr) + catch + _:Reason -> + throw({data_problem, Reason}) end. +%% -spec extract_data([byte()]) -> undefined|any(). +%% extract_data(Json) -> +%% try mochijson2:decode(Json) of +%% Decoded when is_list(Decoded) -> +%% validate_ts_records(Decoded) +%% catch +%% _:_ -> +%% undefined +%% end. + + +result_to_json(ok) -> + mochijson2:encode([{success, true}]); +result_to_json(_) -> + mochijson2:encode([{some_record, one_day}]). + +set_json_response(Json, RD) -> + wrq:set_resp_header("Content-Type", "application/json", + wrq:append_to_response_body(Json, RD)). + +%% @private +table(RD) -> + utf8_to_binary( + mochiweb_util:unquote( + wrq:path_info(table, RD))). + +%% @private +api_call([], 'POST') -> + put; +api_call(_KeyInURL, 'GET') -> + get; +api_call(_KeyInURL, 'DELETE') -> + delete. + +%% move to util module. +utf8_to_binary(S) -> + unicode:characters_to_binary(S, utf8, utf8). + flat_format(Format, Args) -> lists:flatten(io_lib:format(Format, Args)). -utf8_to_binary(S) -> - unicode:characters_to_binary(S, utf8, utf8). +set_text_resp_header(IoList, RD) -> + wrq:set_resp_header( + "Content-Type", "text/plain", wrq:append_to_response_body(IoList,RD)). + +set_error_message(Format, Args, RD) -> + set_text_resp_header(flat_format(Format, Args), RD). From 367e64ce62fb1e70a3b65964e43e5d65fbd0be75 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Wed, 9 Mar 2016 11:31:30 +0100 Subject: [PATCH 098/122] Add case for undefined SecContext --- src/riak_kv_wm_timeseries.erl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 359b9fd974..ad3189e503 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -109,7 +109,10 @@ is_authorized(RD, #ctx{table=Table}=Ctx) -> case riak_api_web_security:is_authorized(RD) of false -> {"Basic realm=\"Riak\"", RD, Ctx}; + {true, undefined} -> %% @todo: why is this returned during testing? + {true, RD, Ctx#ctx{api_call=Call}}; {true, SecContext} -> + io:format("SecContext ~p",SecContext), case riak_core_security:check_permission( {riak_kv_ts_util:api_call_to_perm(Call), Table}, SecContext) of {false, Error, _} -> @@ -324,7 +327,7 @@ encodings_provided(RD, Ctx) -> -spec table_module_exists(module()) -> boolean(). table_module_exists(Mod) -> try Mod:get_dll() of - #ddl_v1{} -> + _ -> %#ddl_v1{} -> true catch _:_ -> From 90f0e85d436d747834ec498d0040c58a4119f8ac Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Thu, 10 Mar 2016 11:08:53 +0100 Subject: [PATCH 099/122] Many small fixes. Basic functionality works. Need to add more tests. --- src/riak_kv_wm_timeseries.erl | 215 ++++++++++++++++++++-------------- 1 file changed, 125 insertions(+), 90 deletions(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index ad3189e503..b558ff9eb7 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -57,6 +57,9 @@ delete_resource/2, resource_exists/2]). +%% webmachine body-producing functions +-export([to_json/2]). + -include_lib("webmachine/include/webmachine.hrl"). -include_lib("riak_ql/include/riak_ql_ddl.hrl"). -include("riak_kv_wm_raw.hrl"). @@ -86,6 +89,9 @@ init(Props) -> {ok, #ctx{prefix = proplists:get_value(prefix, Props), riak = proplists:get_value(riak, Props)}}. + %% {{trace, "/tmp"}, #ctx{prefix = proplists:get_value(prefix, Props), + %% riak = proplists:get_value(riak, Props)}}. +%% wmtrace_resource:add_dispatch_rule("wmtrace", "/tmp"). -spec service_available(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean() | halt()). %% @doc Determine whether or not a connection to Riak @@ -112,7 +118,6 @@ is_authorized(RD, #ctx{table=Table}=Ctx) -> {true, undefined} -> %% @todo: why is this returned during testing? {true, RD, Ctx#ctx{api_call=Call}}; {true, SecContext} -> - io:format("SecContext ~p",SecContext), case riak_core_security:check_permission( {riak_kv_ts_util:api_call_to_perm(Call), Table}, SecContext) of {false, Error, _} -> @@ -197,40 +202,41 @@ content_types_accepted(_, RD, Ctx) -> -spec resource_exists(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean() | halt()). resource_exists(RD, #ctx{mod=Mod} = Ctx) -> - try table_module_exists(Mod) of + case table_module_exists(Mod) of true -> - Path = wrq:path_tokens(RD), - Key = validate_key(Path, Mod), - resource_exists(Path, wrq:method(RD), RD, Ctx#ctx{key=Key}); + resource_exists(wrq:path_tokens(RD), wrq:method(RD), RD, Ctx); false -> Resp = set_error_message("table ~p not created", [Mod], RD), {false, Resp, Ctx} - catch - throw:{key_problem, Reason} -> - Resp = set_error_message("wrong path to element: ~p", [Reason], RD), - {{halt, 400}, Resp, Ctx} end. validate_key(Path, Mod) -> UnquotedPath = lists:map(fun mochiweb_util:unquote/1, Path), - FVList = path_elements_to_key(Mod, UnquotedPath), - ensure_lk_order_and_strip(Mod, FVList). + path_elements(Mod, UnquotedPath). +%% ensure_lk_order_and_strip(Mod, FVList). resource_exists([], 'POST', RD, Ctx) -> {true, RD, Ctx}; resource_exists(Path, 'GET', RD, #ctx{table=Table, mod=Mod, - key=Key, options=Options}=Ctx) -> %% Would be nice if something cheaper than using get_data existed to check %% if a key is present. - try riak_kv_ts_util:get_data(Key, Table, Mod, Options) of - {ok, Record} -> - {true, RD, Ctx#ctx{object=Record}}; - {error, Reason} -> - Resp = set_error_message("Internal error: ~p", Reason, RD), - {{halt, 500}, Resp, Ctx} + try + lager:log(info, self(), "resource_exists(~p, 'GET')", [Path]), + Key = validate_key(Path, Mod), + lager:log(info, self(), "resource_exists: Key=~p", [Key]), + case riak_kv_ts_api:get_data(Key, Table, Mod, Options) of + {ok, Record} -> + {true, RD, Ctx#ctx{object=Record, + key=Key}}; + {error, notfound} -> + {{halt, 404}, RD, Ctx}; + {error, InternalReason} -> + InternalResp = set_error_message("Internal error: ~p", [InternalReason], RD), + {{halt, 500}, InternalResp, Ctx} + end catch _:Reason -> Resp = set_error_message("lookup on ~p failed due to ~p", @@ -238,34 +244,44 @@ resource_exists(Path, 'GET', RD, RD), {false, Resp, Ctx} end; -resource_exists(_Path, 'DELETE', RD, Ctx) -> +resource_exists(Path, 'DELETE', RD, #ctx{mod=Mod}=Ctx) -> %% Since reading the object is expensive we will assume for now that the %% object exists for a delete, but if it turns out that it does not then the %% processing of the delete will return 404 at that point. - {true, RD, Ctx}. + try + Key = validate_key(Path, Mod), + {true, RD, Ctx#ctx{key=Key}} + catch + _:Reason -> + Resp = set_error_message("lookup on ~p failed due to ~p", + [Path, Reason], + RD), + {false, Resp, Ctx} + end. -%% extract keys from path elements in the URL (.../K1/V1/K2/V2 -> -%% [{K1, V1}, {K2, V2}]), check with Table's DDL to make sure keys are +%% extract keys from path elements in the URL (.../K1/V1/K2/V2/... -> +%% [V1, V2, ...]), check with Table's DDL to make sure keys are %% correct and values are of (convertible to) appropriate types, and %% return the KV list %% @private --spec path_elements_to_key(module(), [string()]) -> - [{string(), riak_pb_ts_codec:ldbvalue()}]. -path_elements_to_key(_Mod, []) -> +-spec path_elements(module(), [string()]) -> + [riak_pb_ts_codec:ldbvalue()]. +path_elements(Mod, Path) -> + LK = local_key(Mod), + lager:log(info, self(), "path_elements: LK=~p", [LK]), + Types = [Mod:get_field_type([F]) || F <- LK ], + lager:log(info, self(), "path_elements: Types=~p", [Types]), + LKStr = [ binary_to_list(F) || F <- LK ], + KeyTypes = lists:zip(LKStr, Types), + lager:log(info, self(), "path_elements: KeyTypes=~p, Path=~p", [KeyTypes, Path]), + match_path(Path, KeyTypes). + +match_path([], []) -> []; -path_elements_to_key(Mod, [F,V|Rest]) -> - [convert_fv(Mod, F, V)|path_elements_to_key(Mod, Rest)]. - -%% @private -convert_fv(Mod, FieldRaw, V) -> - Field = [list_to_binary(X) || X <- string:tokens(FieldRaw, ".")], - try - true = Mod:is_field_valid(Field), - convert_field_value(Mod:get_field_type(Field), V) - catch - _:_ -> - throw({url_key_bad_value, Field}) - end. +match_path([F,V|Path], [{F, Type}|KeyTypes]) -> + [convert_field_value(Type, V)|match_path(Path, KeyTypes)]; +match_path(Path, _KeyTypes) -> + throw(io_lib:format("incorrect path ~p", [Path])). %% @private convert_field_value(varchar, V) -> @@ -287,37 +303,6 @@ convert_field_value(timestamp, V) -> throw(url_key_bad_value) end. - -%% validate_ts_record(undefined) -> -%% undefined; -%% validate_ts_record(R) when is_list(R) -> -%% case lists:all( -%% %% check that all list elements are TS types -%% fun(X) -> is_integer(X) orelse is_float(X) orelse is_binary(X) end, -%% R) of -%% true -> -%% R; -%% false -> -%% undefined -%% end; -%% validate_ts_record(_) -> -%% undefined. - -%% validate_ts_records(RR) when is_list(RR) -> -%% case lists:all(fun(R) -> validate_ts_record(R) /= undefined end, RR) of -%% true -> -%% RR; -%% false -> -%% undefined -%% end; -%% validate_ts_records(_) -> -%% undefined. - -ensure_lk_order_and_strip(LK, FVList) -> - [proplists:get_value(F, FVList) - || #param_v1{name = F} <- LK]. - - -spec encodings_provided(#wm_reqdata{}, #ctx{}) -> {[{Encoding::string(), Producer::function()}], #wm_reqdata{}, #ctx{}}. @@ -326,8 +311,8 @@ encodings_provided(RD, Ctx) -> -spec table_module_exists(module()) -> boolean(). table_module_exists(Mod) -> - try Mod:get_dll() of - _ -> %#ddl_v1{} -> + try Mod:get_ddl() of + #ddl_v1{} -> true catch _:_ -> @@ -341,9 +326,9 @@ post_is_create(RD, Ctx) -> -spec process_post(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). process_post(RD, #ctx{mod=Mod, table=Table}=Ctx) -> - try extract_data(RD) of - Data -> - Records = [list_to_tuple(R) || R <- Data], + try extract_data(RD, Mod) of + Records -> + %Records = [], %[list_to_tuple(R) || R <- Data], case riak_kv_ts_util:validate_rows(Mod, Records) of [] -> case riak_kv_ts_api:put_data(Records, Table, Mod) of @@ -365,7 +350,7 @@ process_post(RD, #ctx{mod=Mod, end catch throw:{data_problem,Reason} -> - Resp = set_error_message("wrong body: ~p", Reason, RD), + Resp = set_error_message("wrong body: ~p", [Reason], RD), {{halt, 400}, Resp, Ctx} end. @@ -380,39 +365,89 @@ delete_resource(RD, #ctx{table=Table, Resp = set_json_response(Json, RD), {true, Resp, Ctx}; {error, notfound} -> - Resp = set_error_message("object not found", [], RD), - {{halt, 404}, Resp, Ctx} +% Resp = set_error_message("object not found", [], RD), + {{halt, 404}, RD, Ctx} catch _:Reason -> - Resp = set_error_message("Internal error: ~p", Reason, RD), + Resp = set_error_message("Internal error: ~p", [Reason], RD), {{halt, 500}, Resp, Ctx} end. -extract_data(RD) -> +extract_data(RD, Mod) -> try JsonStr = binary_to_list(wrq:req_body(RD)), - mochijson2:decode(JsonStr) + Json = mochijson2:decode(JsonStr), + lager:log(info, self(), "extract_data: Json=~p", [Json]), + DDLFields = ddl_fields(Mod), + lager:log(info, self(), "extract_data: DDLFields=~p", [DDLFields]), + extract_records(Json, DDLFields) catch - _:Reason -> + Error:Reason -> + lager:log(info, self(), "extract_data: ~p:~p", [Error, Reason]), throw({data_problem, Reason}) end. -%% -spec extract_data([byte()]) -> undefined|any(). -%% extract_data(Json) -> -%% try mochijson2:decode(Json) of -%% Decoded when is_list(Decoded) -> -%% validate_ts_records(Decoded) -%% catch -%% _:_ -> -%% undefined -%% end. +extract_records({struct, _}=Struct, Fields) -> + [json_struct_to_obj(Struct, Fields)]; +extract_records(Structs, Fields) when is_list(Structs) -> + [json_struct_to_obj(S, Fields) || S <- Structs]. + +json_struct_to_obj({struct, FieldValueList}, Fields) -> + List = [ extract_field_value(Field, FieldValueList) + || Field <- Fields], + list_to_tuple(List). + +extract_field_value(#riak_field_v1{name=Name, type=Type}, FVList) -> + case proplists:get_value(Name, FVList) of + undefined -> + throw({data_problem, {missing_field, Name}}); + Value -> + check_field_value(Type, Value) + end. +local_key(Mod) -> + ddl_local_key(Mod:get_ddl()). + +-spec ddl_local_key(#ddl_v1{}) -> [binary()]. +ddl_local_key(#ddl_v1{local_key=LK}) -> + #key_v1{ast=Ast} = LK, + [ param_name(P) || P <- Ast]. + +param_name(#param_v1{name=[Name]}) -> + Name. + +check_field_value(varchar, V) when is_binary(V) -> + V; +check_field_value(sint64, V) when is_integer(V) -> + V; +check_field_value(double, V) when is_number(V) -> + V; +check_field_value(timestamp, V) when is_integer(V), V>0 -> + V; +check_field_value(boolean, V) when is_boolean(V) -> + V; +check_field_value(Type, V) -> + throw({data_problem, {wrong_type, Type, V}}). + +ddl_fields(Mod) -> + #ddl_v1{fields=Fields} = Mod:get_ddl(), + Fields. result_to_json(ok) -> mochijson2:encode([{success, true}]); result_to_json(_) -> mochijson2:encode([{some_record, one_day}]). +to_json(RD, #ctx{api_call=get, object=Object}=Ctx) -> + try + Json = mochijson2:encode(Object), + {Json, RD, Ctx} + catch + _:Reason -> + Resp = set_error_message("object error ~p", [Reason], RD), + {{halt, 500}, Resp, Ctx} + end. + set_json_response(Json, RD) -> wrq:set_resp_header("Content-Type", "application/json", wrq:append_to_response_body(Json, RD)). From cbc9662790d6717b140256ca4d883f6435bb6b49 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Thu, 10 Mar 2016 11:48:43 +0100 Subject: [PATCH 100/122] Changed ddl_fields/1 to ddl_fieldsand_types in prep for moving it to the DDL helper module. --- src/riak_kv_wm_timeseries.erl | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index b558ff9eb7..91aeab6c11 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -369,6 +369,7 @@ delete_resource(RD, #ctx{table=Table, {{halt, 404}, RD, Ctx} catch _:Reason -> + lager:log(info, self(), "delete_resource failed: ~p", Reason), Resp = set_error_message("Internal error: ~p", [Reason], RD), {{halt, 500}, Resp, Ctx} end. @@ -377,10 +378,8 @@ extract_data(RD, Mod) -> try JsonStr = binary_to_list(wrq:req_body(RD)), Json = mochijson2:decode(JsonStr), - lager:log(info, self(), "extract_data: Json=~p", [Json]), - DDLFields = ddl_fields(Mod), - lager:log(info, self(), "extract_data: DDLFields=~p", [DDLFields]), - extract_records(Json, DDLFields) + DDLFieldTypes = ddl_fields_and_types(Mod), + extract_records(Json, DDLFieldTypes) catch Error:Reason -> lager:log(info, self(), "extract_data: ~p:~p", [Error, Reason]), @@ -397,7 +396,7 @@ json_struct_to_obj({struct, FieldValueList}, Fields) -> || Field <- Fields], list_to_tuple(List). -extract_field_value(#riak_field_v1{name=Name, type=Type}, FVList) -> +extract_field_value({Name, Type}, FVList) -> case proplists:get_value(Name, FVList) of undefined -> throw({data_problem, {missing_field, Name}}); @@ -408,6 +407,7 @@ extract_field_value(#riak_field_v1{name=Name, type=Type}, FVList) -> local_key(Mod) -> ddl_local_key(Mod:get_ddl()). +%% this should be in the DDL helper module. -spec ddl_local_key(#ddl_v1{}) -> [binary()]. ddl_local_key(#ddl_v1{local_key=LK}) -> #key_v1{ast=Ast} = LK, @@ -416,22 +416,21 @@ ddl_local_key(#ddl_v1{local_key=LK}) -> param_name(#param_v1{name=[Name]}) -> Name. -check_field_value(varchar, V) when is_binary(V) -> - V; -check_field_value(sint64, V) when is_integer(V) -> - V; -check_field_value(double, V) when is_number(V) -> - V; -check_field_value(timestamp, V) when is_integer(V), V>0 -> - V; -check_field_value(boolean, V) when is_boolean(V) -> - V; +%% @todo: might be better if the DDL helper module had a +%% valid_field_value(Field, Value) -> boolean() function. +check_field_value(varchar, V) when is_binary(V) -> V; +check_field_value(sint64, V) when is_integer(V) -> V; +check_field_value(double, V) when is_number(V) -> V; +check_field_value(timestamp, V) when is_integer(V), V>0 -> V; +check_field_value(boolean, V) when is_boolean(V) -> V; check_field_value(Type, V) -> throw({data_problem, {wrong_type, Type, V}}). -ddl_fields(Mod) -> +%% @todo: this should be in the DDL helper module, so that the records don't +%% leak out of riak_ql. +ddl_fields_and_types(Mod) -> #ddl_v1{fields=Fields} = Mod:get_ddl(), - Fields. + [ {Name, Type} || #riak_field_v1{name=Name, type=Type} <- Fields ]. result_to_json(ok) -> mochijson2:encode([{success, true}]); From f1a27b69dbc45a7f30e5a27715047ef8bfe3c232 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Thu, 10 Mar 2016 12:19:37 +0100 Subject: [PATCH 101/122] Code re-org. Moved all helper modules to the end of the file. --- src/riak_kv_wm_timeseries.erl | 211 +++++++++++++++++----------------- 1 file changed, 104 insertions(+), 107 deletions(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 91aeab6c11..7ffa8f2e14 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -153,8 +153,7 @@ malformed_request(RD, Ctx) -> malformed_request(wrq:path_tokens(RD), RD, Ctx2) catch throw:ParameterError -> - ErrorMsg = flat_format("parameter error: ~p", [ParameterError]), - Resp = set_text_resp_header(ErrorMsg, RD), + Resp = set_error_message("parameter error: ~p", [ParameterError], RD), {true, Resp, Ctx} end. @@ -168,22 +167,6 @@ malformed_request(KeyInUrl, RD, Ctx) when length(KeyInUrl) rem 2 == 0 -> malformed_request(_, RD, Ctx) -> {true, RD, Ctx}. --spec extract_params([{string(), string()}], #ctx{}) -> #ctx{} . -%% @doc right now we only allow a timeout parameter or nothing. -extract_params([], Ctx) -> - Ctx#ctx{options=[]}; -extract_params([{"timeout", TimeoutStr}], Ctx) -> - try - Timeout = list_to_integer(TimeoutStr), - Ctx#ctx{timeout = Timeout, - options = [{timeout, Timeout}]} - catch - _:_ -> - throw(flat_format("timeout not an integer value: ~s", [TimeoutStr])) - end; -extract_params(Params, _Ctx) -> - throw(flat_format("incorrect paramters: ~p", [Params])). - -spec content_types_provided(#wm_reqdata{}, #ctx{}) -> cb_rv_spec([{string(), atom()}]). content_types_provided(RD, Ctx) -> {[{"application/json", to_json}], @@ -210,11 +193,6 @@ resource_exists(RD, #ctx{mod=Mod} = Ctx) -> {false, Resp, Ctx} end. -validate_key(Path, Mod) -> - UnquotedPath = lists:map(fun mochiweb_util:unquote/1, Path), - path_elements(Mod, UnquotedPath). -%% ensure_lk_order_and_strip(Mod, FVList). - resource_exists([], 'POST', RD, Ctx) -> {true, RD, Ctx}; resource_exists(Path, 'GET', RD, @@ -224,9 +202,7 @@ resource_exists(Path, 'GET', RD, %% Would be nice if something cheaper than using get_data existed to check %% if a key is present. try - lager:log(info, self(), "resource_exists(~p, 'GET')", [Path]), Key = validate_key(Path, Mod), - lager:log(info, self(), "resource_exists: Key=~p", [Key]), case riak_kv_ts_api:get_data(Key, Table, Mod, Options) of {ok, Record} -> {true, RD, Ctx#ctx{object=Record, @@ -259,66 +235,11 @@ resource_exists(Path, 'DELETE', RD, #ctx{mod=Mod}=Ctx) -> {false, Resp, Ctx} end. -%% extract keys from path elements in the URL (.../K1/V1/K2/V2/... -> -%% [V1, V2, ...]), check with Table's DDL to make sure keys are -%% correct and values are of (convertible to) appropriate types, and -%% return the KV list -%% @private --spec path_elements(module(), [string()]) -> - [riak_pb_ts_codec:ldbvalue()]. -path_elements(Mod, Path) -> - LK = local_key(Mod), - lager:log(info, self(), "path_elements: LK=~p", [LK]), - Types = [Mod:get_field_type([F]) || F <- LK ], - lager:log(info, self(), "path_elements: Types=~p", [Types]), - LKStr = [ binary_to_list(F) || F <- LK ], - KeyTypes = lists:zip(LKStr, Types), - lager:log(info, self(), "path_elements: KeyTypes=~p, Path=~p", [KeyTypes, Path]), - match_path(Path, KeyTypes). - -match_path([], []) -> - []; -match_path([F,V|Path], [{F, Type}|KeyTypes]) -> - [convert_field_value(Type, V)|match_path(Path, KeyTypes)]; -match_path(Path, _KeyTypes) -> - throw(io_lib:format("incorrect path ~p", [Path])). - -%% @private -convert_field_value(varchar, V) -> - list_to_binary(V); -convert_field_value(sint64, V) -> - list_to_integer(V); -convert_field_value(double, V) -> - try - list_to_float(V) - catch - error:badarg -> - float(list_to_integer(V)) - end; -convert_field_value(timestamp, V) -> - case list_to_integer(V) of - GoodValue when GoodValue > 0 -> - GoodValue; - _ -> - throw(url_key_bad_value) - end. - -spec encodings_provided(#wm_reqdata{}, #ctx{}) -> - {[{Encoding::string(), Producer::function()}], - #wm_reqdata{}, #ctx{}}. + cb_rv_spec([{Encoding::string(), Producer::function()}]). encodings_provided(RD, Ctx) -> {riak_kv_wm_utils:default_encodings(), RD, Ctx}. --spec table_module_exists(module()) -> boolean(). -table_module_exists(Mod) -> - try Mod:get_ddl() of - #ddl_v1{} -> - true - catch - _:_ -> - false - end. - -spec post_is_create(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). post_is_create(RD, Ctx) -> {false, RD, Ctx}. @@ -328,7 +249,6 @@ process_post(RD, #ctx{mod=Mod, table=Table}=Ctx) -> try extract_data(RD, Mod) of Records -> - %Records = [], %[list_to_tuple(R) || R <- Data], case riak_kv_ts_util:validate_rows(Mod, Records) of [] -> case riak_kv_ts_api:put_data(Records, Table, Mod) of @@ -365,7 +285,6 @@ delete_resource(RD, #ctx{table=Table, Resp = set_json_response(Json, RD), {true, Resp, Ctx}; {error, notfound} -> -% Resp = set_error_message("object not found", [], RD), {{halt, 404}, RD, Ctx} catch _:Reason -> @@ -374,6 +293,95 @@ delete_resource(RD, #ctx{table=Table, {{halt, 500}, Resp, Ctx} end. +-spec to_json(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(iolist()|halt()). +to_json(RD, #ctx{api_call=get, object=Object}=Ctx) -> + try + Json = mochijson2:encode(Object), + {Json, RD, Ctx} + catch + _:Reason -> + Resp = set_error_message("object error ~p", [Reason], RD), + {{halt, 500}, Resp, Ctx} + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% helper functions +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% @todo: this should be in riak_ql_ddl and should probably check deeper. +-spec table_module_exists(module()) -> boolean(). +table_module_exists(Mod) -> + try Mod:get_ddl() of + #ddl_v1{} -> + true + catch + _:_ -> + false + end. + +-spec extract_params([{string(), string()}], #ctx{}) -> #ctx{} . +%% @doc right now we only allow a timeout parameter or nothing. +extract_params([], Ctx) -> + Ctx#ctx{options=[]}; +extract_params([{"timeout", TimeoutStr}], Ctx) -> + try + Timeout = list_to_integer(TimeoutStr), + Ctx#ctx{timeout = Timeout, + options = [{timeout, Timeout}]} + catch + _:_ -> + throw(flat_format("timeout not an integer value: ~s", [TimeoutStr])) + end; +extract_params(Params, _Ctx) -> + throw(flat_format("incorrect paramters: ~p", [Params])). + +validate_key(Path, Mod) -> + UnquotedPath = lists:map(fun mochiweb_util:unquote/1, Path), + path_elements(Mod, UnquotedPath). + +%% extract keys from path elements in the URL (.../K1/V1/K2/V2/... -> +%% [V1, V2, ...]), check with Table's DDL to make sure keys are +%% correct and values are of (convertible to) appropriate types, and +%% return the KV list +%% @private +-spec path_elements(module(), [string()]) -> + [riak_pb_ts_codec:ldbvalue()]. +path_elements(Mod, Path) -> + KeyTypes = local_key_fields_and_types(Mod), + match_path(Path, KeyTypes). + +local_key_fields_and_types(Mod) -> + LK = local_key(Mod), + Types = [Mod:get_field_type([F]) || F <- LK ], + LKStr = [ binary_to_list(F) || F <- LK ], + lists:zip(LKStr, Types). + +match_path([], []) -> + []; +match_path([F,V|Path], [{F, Type}|KeyTypes]) -> + [convert_field_value(Type, V)|match_path(Path, KeyTypes)]; +match_path(Path, _KeyTypes) -> + throw(io_lib:format("incorrect path ~p", [Path])). + +%% @private +convert_field_value(varchar, V) -> + list_to_binary(V); +convert_field_value(sint64, V) -> + list_to_integer(V); +convert_field_value(double, V) -> + try + list_to_float(V) + catch + error:badarg -> + float(list_to_integer(V)) + end; +convert_field_value(timestamp, V) -> + case list_to_integer(V) of + GoodValue when GoodValue > 0 -> + GoodValue; + _ -> + throw(url_key_bad_value) + end. + extract_data(RD, Mod) -> try JsonStr = binary_to_list(wrq:req_body(RD)), @@ -426,30 +434,14 @@ check_field_value(boolean, V) when is_boolean(V) -> V; check_field_value(Type, V) -> throw({data_problem, {wrong_type, Type, V}}). + + %% @todo: this should be in the DDL helper module, so that the records don't %% leak out of riak_ql. ddl_fields_and_types(Mod) -> #ddl_v1{fields=Fields} = Mod:get_ddl(), [ {Name, Type} || #riak_field_v1{name=Name, type=Type} <- Fields ]. -result_to_json(ok) -> - mochijson2:encode([{success, true}]); -result_to_json(_) -> - mochijson2:encode([{some_record, one_day}]). - -to_json(RD, #ctx{api_call=get, object=Object}=Ctx) -> - try - Json = mochijson2:encode(Object), - {Json, RD, Ctx} - catch - _:Reason -> - Resp = set_error_message("object error ~p", [Reason], RD), - {{halt, 500}, Resp, Ctx} - end. - -set_json_response(Json, RD) -> - wrq:set_resp_header("Content-Type", "application/json", - wrq:append_to_response_body(Json, RD)). %% @private table(RD) -> @@ -458,12 +450,13 @@ table(RD) -> wrq:path_info(table, RD))). %% @private -api_call([], 'POST') -> - put; -api_call(_KeyInURL, 'GET') -> - get; -api_call(_KeyInURL, 'DELETE') -> - delete. +api_call([] , 'POST') -> put; +api_call(_KeyInURL, 'GET') -> get; +api_call(_KeyInURL, 'DELETE') -> delete. + +%% @private +result_to_json(ok) -> + mochijson2:encode([{success, true}]). %% move to util module. utf8_to_binary(S) -> @@ -478,3 +471,7 @@ set_text_resp_header(IoList, RD) -> set_error_message(Format, Args, RD) -> set_text_resp_header(flat_format(Format, Args), RD). + +set_json_response(Json, RD) -> + wrq:set_resp_header("Content-Type", "application/json", + wrq:append_to_response_body(Json, RD)). From 0dce1015411f5966ec03755a8d12615fdc8f8551 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Thu, 10 Mar 2016 15:16:43 +0100 Subject: [PATCH 102/122] Remove unused macro. --- src/riak_kv_wm_timeseries.erl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 7ffa8f2e14..6fa60969d8 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -77,7 +77,6 @@ riak}). -define(DEFAULT_TIMEOUT, 60000). --define(TABLE_ACTIVATE_WAIT, 30). %% wait until table's bucket type is activated -type cb_rv_spec(T) :: {T, #wm_reqdata{}, #ctx{}}. -type halt() :: {'halt', 200..599} | {'error' , term()}. From 68cb53aec45d84b5bfe4df7da33ccd801951da27 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Thu, 10 Mar 2016 15:30:18 +0100 Subject: [PATCH 103/122] Move common functions to riak_kv_wm_ts_util --- src/riak_kv_wm_timeseries.erl | 101 +++++++++++++--------------------- 1 file changed, 38 insertions(+), 63 deletions(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 6fa60969d8..5362975463 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -100,12 +100,12 @@ service_available(RD, #ctx{riak = RiakProps}=Ctx) -> case riak_kv_wm_utils:get_riak_client( RiakProps, riak_kv_wm_utils:get_client_id(RD)) of {ok, _C} -> - Table = table(RD), + Table = riak_kv_wm_ts_util:table_from_request(RD), Mod = riak_ql_ddl:make_module_name(Table), {true, RD, Ctx#ctx{table=Table, mod=Mod}}; {error, Reason} -> - ErrorMsg = flat_format("Unable to connect to Riak: ~p", [Reason]), - Resp = set_text_resp_header(ErrorMsg, RD), + ErrorMsg = riak_kv_wm_ts_util:flat_format("Unable to connect to Riak: ~p", [Reason]), + Resp = riak_kv_wm_ts_util:set_text_resp_header(ErrorMsg, RD), {false, Resp, Ctx} end. @@ -120,14 +120,14 @@ is_authorized(RD, #ctx{table=Table}=Ctx) -> case riak_core_security:check_permission( {riak_kv_ts_util:api_call_to_perm(Call), Table}, SecContext) of {false, Error, _} -> - {utf8_to_binary(Error), RD, Ctx}; - _ -> - {true, RD, Ctx#ctx{api_call=Call}} + {riak_kv_wm_ts_util:utf8_to_binary(Error), RD, Ctx}; + _ -> + {true, RD, Ctx#ctx{api_call=Call}} end; insecure -> ErrorMsg = "Security is enabled and Riak does not" ++ " accept credentials over HTTP. Try HTTPS instead.", - Resp = set_text_resp_header(ErrorMsg, RD), + Resp = riak_kv_wm_ts_util:set_text_resp_header(ErrorMsg, RD), {{halt, 426}, Resp, Ctx} end. @@ -152,7 +152,7 @@ malformed_request(RD, Ctx) -> malformed_request(wrq:path_tokens(RD), RD, Ctx2) catch throw:ParameterError -> - Resp = set_error_message("parameter error: ~p", [ParameterError], RD), + Resp = riak_kv_wm_ts_util:set_error_message("parameter error: ~p", [ParameterError], RD), {true, Resp, Ctx} end. @@ -188,7 +188,7 @@ resource_exists(RD, #ctx{mod=Mod} = Ctx) -> true -> resource_exists(wrq:path_tokens(RD), wrq:method(RD), RD, Ctx); false -> - Resp = set_error_message("table ~p not created", [Mod], RD), + Resp = riak_kv_wm_ts_util:set_error_message("table ~p not created", [Mod], RD), {false, Resp, Ctx} end. @@ -209,14 +209,14 @@ resource_exists(Path, 'GET', RD, {error, notfound} -> {{halt, 404}, RD, Ctx}; {error, InternalReason} -> - InternalResp = set_error_message("Internal error: ~p", [InternalReason], RD), + InternalResp = riak_kv_wm_ts_util:set_error_message("Internal error: ~p", [InternalReason], RD), {{halt, 500}, InternalResp, Ctx} end catch _:Reason -> - Resp = set_error_message("lookup on ~p failed due to ~p", - [Path, Reason], - RD), + Resp = riak_kv_wm_ts_util:set_error_message("lookup on ~p failed due to ~p", + [Path, Reason], + RD), {false, Resp, Ctx} end; resource_exists(Path, 'DELETE', RD, #ctx{mod=Mod}=Ctx) -> @@ -228,9 +228,9 @@ resource_exists(Path, 'DELETE', RD, #ctx{mod=Mod}=Ctx) -> {true, RD, Ctx#ctx{key=Key}} catch _:Reason -> - Resp = set_error_message("lookup on ~p failed due to ~p", - [Path, Reason], - RD), + Resp = riak_kv_wm_ts_util:set_error_message("lookup on ~p failed due to ~p", + [Path, Reason], + RD), {false, Resp, Ctx} end. @@ -253,23 +253,23 @@ process_post(RD, #ctx{mod=Mod, case riak_kv_ts_api:put_data(Records, Table, Mod) of ok -> Json = result_to_json(ok), - Resp = set_json_response(Json, RD), + Resp = riak_kv_wm_ts_util:set_json_response(Json, RD), {true, Resp, Ctx}; {error, {some_failed, ErrorCount}} -> - Resp = set_error_message("failed some puts ~p ~p", - [ErrorCount, Table], - RD), + Resp = riak_kv_wm_ts_util:set_error_message("failed some puts ~p ~p", + [ErrorCount, Table], + RD), {{halt, 400}, Resp, Ctx} end; BadRowIdxs when is_list(BadRowIdxs) -> - Resp = set_error_message("invalid data: ~p", - [BadRowIdxs], - RD), + Resp = riak_kv_wm_ts_util:set_error_message("invalid data: ~p", + [BadRowIdxs], + RD), {{halt, 400}, Resp, Ctx} end catch throw:{data_problem,Reason} -> - Resp = set_error_message("wrong body: ~p", [Reason], RD), + Resp = riak_kv_wm_ts_util:set_error_message("wrong body: ~p", [Reason], RD), {{halt, 400}, Resp, Ctx} end. @@ -281,16 +281,16 @@ delete_resource(RD, #ctx{table=Table, try riak_kv_ts_api:delete_data(Key, Table, Mod, Options) of ok -> Json = result_to_json(ok), - Resp = set_json_response(Json, RD), - {true, Resp, Ctx}; - {error, notfound} -> - {{halt, 404}, RD, Ctx} - catch - _:Reason -> - lager:log(info, self(), "delete_resource failed: ~p", Reason), - Resp = set_error_message("Internal error: ~p", [Reason], RD), - {{halt, 500}, Resp, Ctx} - end. + Resp = riak_kv_wm_ts_util:set_json_response(Json, RD), + {true, Resp, Ctx}; + {error, notfound} -> + {{halt, 404}, RD, Ctx} + catch + _:Reason -> + lager:log(info, self(), "delete_resource failed: ~p", Reason), + Resp = riak_kv_wm_ts_util:set_error_message("Internal error: ~p", [Reason], RD), + {{halt, 500}, Resp, Ctx} + end. -spec to_json(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(iolist()|halt()). to_json(RD, #ctx{api_call=get, object=Object}=Ctx) -> @@ -299,7 +299,7 @@ to_json(RD, #ctx{api_call=get, object=Object}=Ctx) -> {Json, RD, Ctx} catch _:Reason -> - Resp = set_error_message("object error ~p", [Reason], RD), + Resp = riak_kv_wm_ts_util:set_error_message("object error ~p", [Reason], RD), {{halt, 500}, Resp, Ctx} end. @@ -328,10 +328,10 @@ extract_params([{"timeout", TimeoutStr}], Ctx) -> options = [{timeout, Timeout}]} catch _:_ -> - throw(flat_format("timeout not an integer value: ~s", [TimeoutStr])) + throw(riak_kv_wm_ts_util:flat_format("timeout not an integer value: ~s", [TimeoutStr])) end; extract_params(Params, _Ctx) -> - throw(flat_format("incorrect paramters: ~p", [Params])). + throw(riak_kv_wm_ts_util:flat_format("incorrect paramters: ~p", [Params])). validate_key(Path, Mod) -> UnquotedPath = lists:map(fun mochiweb_util:unquote/1, Path), @@ -441,13 +441,6 @@ ddl_fields_and_types(Mod) -> #ddl_v1{fields=Fields} = Mod:get_ddl(), [ {Name, Type} || #riak_field_v1{name=Name, type=Type} <- Fields ]. - -%% @private -table(RD) -> - utf8_to_binary( - mochiweb_util:unquote( - wrq:path_info(table, RD))). - %% @private api_call([] , 'POST') -> put; api_call(_KeyInURL, 'GET') -> get; @@ -455,22 +448,4 @@ api_call(_KeyInURL, 'DELETE') -> delete. %% @private result_to_json(ok) -> - mochijson2:encode([{success, true}]). - -%% move to util module. -utf8_to_binary(S) -> - unicode:characters_to_binary(S, utf8, utf8). - -flat_format(Format, Args) -> - lists:flatten(io_lib:format(Format, Args)). - -set_text_resp_header(IoList, RD) -> - wrq:set_resp_header( - "Content-Type", "text/plain", wrq:append_to_response_body(IoList,RD)). - -set_error_message(Format, Args, RD) -> - set_text_resp_header(flat_format(Format, Args), RD). - -set_json_response(Json, RD) -> - wrq:set_resp_header("Content-Type", "application/json", - wrq:append_to_response_body(Json, RD)). + mochijson2:encode([{success, true}]). \ No newline at end of file From adf1f705319faf2a8b5d845a0584389715f93804 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Thu, 10 Mar 2016 15:58:25 +0100 Subject: [PATCH 104/122] Use common authorize function in riak_kv_wm_timeseries and riak_kv_wm_timeserise_listkeys --- src/riak_kv_wm_timeseries.erl | 25 ++++---------- src/riak_kv_wm_timeseries_listkeys.erl | 48 +++++++++++++------------- 2 files changed, 31 insertions(+), 42 deletions(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 5362975463..7a35823f97 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -111,24 +111,13 @@ service_available(RD, #ctx{riak = RiakProps}=Ctx) -> is_authorized(RD, #ctx{table=Table}=Ctx) -> Call = api_call(wrq:path_tokens(RD), wrq:method(RD)), - case riak_api_web_security:is_authorized(RD) of - false -> - {"Basic realm=\"Riak\"", RD, Ctx}; - {true, undefined} -> %% @todo: why is this returned during testing? + case riak_kv_wm_ts_util:authorize(Call, Table, RD) of + ok -> {true, RD, Ctx#ctx{api_call=Call}}; - {true, SecContext} -> - case riak_core_security:check_permission( - {riak_kv_ts_util:api_call_to_perm(Call), Table}, SecContext) of - {false, Error, _} -> - {riak_kv_wm_ts_util:utf8_to_binary(Error), RD, Ctx}; - _ -> - {true, RD, Ctx#ctx{api_call=Call}} - end; - insecure -> - ErrorMsg = "Security is enabled and Riak does not" ++ - " accept credentials over HTTP. Try HTTPS instead.", - Resp = riak_kv_wm_ts_util:set_text_resp_header(ErrorMsg, RD), - {{halt, 426}, Resp, Ctx} + {error, ErrorMsg} -> + {ErrorMsg, RD, Ctx}; + {insecure, Halt, Resp} -> + {Halt, Resp, Ctx} end. -spec forbidden(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). @@ -448,4 +437,4 @@ api_call(_KeyInURL, 'DELETE') -> delete. %% @private result_to_json(ok) -> - mochijson2:encode([{success, true}]). \ No newline at end of file + mochijson2:encode([{success, true}]). diff --git a/src/riak_kv_wm_timeseries_listkeys.erl b/src/riak_kv_wm_timeseries_listkeys.erl index 6460b531fa..52adf0e34f 100644 --- a/src/riak_kv_wm_timeseries_listkeys.erl +++ b/src/riak_kv_wm_timeseries_listkeys.erl @@ -50,11 +50,11 @@ -include("riak_kv_wm_raw.hrl"). -include_lib("webmachine/include/webmachine.hrl"). --record(ctx, {api_version, - riak, +-record(ctx, {riak, security, client, - table :: undefined | binary() + table :: undefined | binary(), + mod :: module() }). -define(CB_RV_SPEC, {boolean()|atom()|tuple(), #wm_reqdata{}, #ctx{}}). @@ -65,9 +65,7 @@ %% @doc Initialize this resource. This function extracts the %% 'prefix' and 'riak' properties from the dispatch args. init(Props) -> - {ok, #ctx{api_version = proplists:get_value(api_version, Props), - riak = proplists:get_value(riak, Props), - table = proplists:get_value(table, Props)}}. + {ok, #ctx{riak = proplists:get_value(riak, Props)}}. -spec service_available(#wm_reqdata{}, #ctx{}) -> {boolean(), #wm_reqdata{}, #ctx{}}. @@ -77,29 +75,29 @@ service_available(RD, Ctx = #ctx{riak = RiakProps}) -> case riak_kv_wm_utils:get_riak_client( RiakProps, riak_kv_wm_utils:get_client_id(RD)) of {ok, C} -> + Table = riak_kv_wm_ts_util:table_from_request(RD), + Mod = riak_ql_ddl:make_module_name(Table), {true, RD, - Ctx#ctx{api_version = wrq:path_info(api_version, RD), - client = C, - table = utf8_to_binary( - mochiweb_util:unquote( - wrq:path_info(table, RD))) - }}; + Ctx#ctx{client = C, + table = Table, + mod = Mod}}; {error, Reason} -> handle_error({riak_client_error, Reason}, RD, Ctx) end. -is_authorized(RD, Ctx) -> - case riak_api_web_security:is_authorized(RD) of - false -> - {"Basic realm=\"Riak\"", RD, Ctx}; - {true, SecContext} -> - {true, RD, Ctx#ctx{security = SecContext}}; - insecure -> - handle_error(insecure_connection, RD, Ctx) +is_authorized(RD, #ctx{table=Table}=Ctx) -> + case riak_kv_wm_ts_util:authorize(listkeys, Table, RD) of + ok -> + {true, RD, Ctx}; + {error, ErrorMsg} -> + {ErrorMsg, RD, Ctx}; + {insecure, Halt, Resp} -> + {Halt, Resp, Ctx} end. + -spec forbidden(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. forbidden(RD, Ctx) -> case riak_kv_wm_utils:is_forbidden(RD) of @@ -130,10 +128,12 @@ check_permissions(RD, Ctx = #ctx{security = Security, -spec malformed_request(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -malformed_request(RD, Ctx = #ctx{api_version = "v1"}) -> - {false, RD, Ctx}; -malformed_request(RD, Ctx = #ctx{api_version = UnsupportedVersion}) -> - handle_error({unsupported_version, UnsupportedVersion}, RD, Ctx). +malformed_request(RD, Ctx) -> + {false, RD, Ctx}. +%% malformed_request(RD, Ctx = #ctx{api_version = "v1"}) -> +%% {false, RD, Ctx}; +%% malformed_request(RD, Ctx = #ctx{api_version = UnsupportedVersion}) -> +%% handle_error({unsupported_version, UnsupportedVersion}, RD, Ctx). -spec allowed_methods(#wm_reqdata{}, #ctx{}) -> From 2d2ebb56d0eba850cbc4773026761fa2a656bf91 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Thu, 10 Mar 2016 15:58:57 +0100 Subject: [PATCH 105/122] Initial commit of riak_kv_wm_ts_util --- src/riak_kv_wm_ts_util.erl | 80 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 src/riak_kv_wm_ts_util.erl diff --git a/src/riak_kv_wm_ts_util.erl b/src/riak_kv_wm_ts_util.erl new file mode 100644 index 0000000000..71a6c5de54 --- /dev/null +++ b/src/riak_kv_wm_ts_util.erl @@ -0,0 +1,80 @@ +%% ------------------------------------------------------------------- +%% +%% riak_kv_wm_ts_util: utility functions for riak_kv_wm_timeseries* resources. +%% +%% Copyright (c) 2016 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +-module(riak_kv_wm_ts_util). + + +-export([table_from_request/1]). +-export([utf8_to_binary/1]). +-export([flat_format/2]). +-export([set_text_resp_header/2]). +-export([set_error_message/3]). +-export([set_json_response/2]). + +-export([authorize/3]). + + +%% @private +table_from_request(RD) -> + utf8_to_binary( + mochiweb_util:unquote( + wrq:path_info(table, RD))). + +%% move to util module. +utf8_to_binary(S) -> + unicode:characters_to_binary(S, utf8, utf8). + +flat_format(Format, Args) -> + lists:flatten(io_lib:format(Format, Args)). + +set_text_resp_header(IoList, RD) -> + wrq:set_resp_header( + "Content-Type", "text/plain", wrq:append_to_response_body(IoList,RD)). + +set_error_message(Format, Args, RD) -> + set_text_resp_header(flat_format(Format, Args), RD). + +set_json_response(Json, RD) -> + wrq:set_resp_header("Content-Type", "application/json", + wrq:append_to_response_body(Json, RD)). + + + +authorize(Call, Table, RD) -> + case riak_api_web_security:is_authorized(RD) of + false -> + {error, "Basic realm=\"Riak\""}; + {true, undefined} -> %% @todo: why is this returned during testing? + ok; + {true, SecContext} -> + case riak_core_security:check_permission( + {riak_kv_ts_util:api_call_to_perm(Call), Table}, SecContext) of + {false, Error, _} -> + {error, utf8_to_binary(Error)}; + _ -> + ok + end; + insecure -> + ErrorMsg = "Security is enabled and Riak does not" ++ + " accept credentials over HTTP. Try HTTPS instead.", + Resp = set_text_resp_header(ErrorMsg, RD), + {insecure, {halt, 426}, Resp} + end. \ No newline at end of file From e5a54145bbe63f10d305c2f101f2e391805b5b76 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Thu, 10 Mar 2016 16:11:36 +0100 Subject: [PATCH 106/122] riak_kv_wm_timeseries_listkey refactorings * removed malformed_request * changed CB_RV_SPEC to cb_rv_spec(T) type * simplified forbidden/2 * removed check_permissions/2 as it is no longer needed (is_authorized/2 handles that job now) * removed utf8_to_binary/1 as it is no longer used (riak_kv_wm_ts_util has it for those functions who needs it). --- src/riak_kv_wm_timeseries_listkeys.erl | 54 ++++---------------------- 1 file changed, 8 insertions(+), 46 deletions(-) diff --git a/src/riak_kv_wm_timeseries_listkeys.erl b/src/riak_kv_wm_timeseries_listkeys.erl index 52adf0e34f..a16fc9d4e9 100644 --- a/src/riak_kv_wm_timeseries_listkeys.erl +++ b/src/riak_kv_wm_timeseries_listkeys.erl @@ -40,7 +40,6 @@ allowed_methods/2, is_authorized/2, forbidden/2, - malformed_request/2, resource_exists/2, content_types_provided/2, encodings_provided/2, @@ -57,7 +56,7 @@ mod :: module() }). --define(CB_RV_SPEC, {boolean()|atom()|tuple(), #wm_reqdata{}, #ctx{}}). +-type cb_rv_spec(T) :: {T, #wm_reqdata{}, #ctx{}}. -define(DEFAULT_TIMEOUT, 60000). @@ -98,53 +97,21 @@ is_authorized(RD, #ctx{table=Table}=Ctx) -> --spec forbidden(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +-spec forbidden(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). forbidden(RD, Ctx) -> - case riak_kv_wm_utils:is_forbidden(RD) of - true -> - {true, RD, Ctx}; - false -> - case check_permissions(RD, Ctx) of - {true, RD1, Ctx1} -> - {false, RD1, Ctx1}; - ErrorAlreadyReported -> - ErrorAlreadyReported - end - end. - --spec check_permissions(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -check_permissions(RD, Ctx = #ctx{security = undefined}) -> - {true, RD, Ctx}; -check_permissions(RD, Ctx = #ctx{security = Security, - table = Table}) -> - case riak_core_security:check_permission( - {riak_kv_ts_util:api_call_to_perm(listkeys), Table}, Security) of - {false, Error, _} -> - handle_error( - {not_permitted, utf8_to_binary(Error)}, RD, Ctx); - _ -> - {true, RD, Ctx} - end. + Result = riak_kv_wm_utils:is_forbidden(RD), + {Result, RD, Ctx}. --spec malformed_request(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -malformed_request(RD, Ctx) -> - {false, RD, Ctx}. -%% malformed_request(RD, Ctx = #ctx{api_version = "v1"}) -> -%% {false, RD, Ctx}; -%% malformed_request(RD, Ctx = #ctx{api_version = UnsupportedVersion}) -> -%% handle_error({unsupported_version, UnsupportedVersion}, RD, Ctx). --spec allowed_methods(#wm_reqdata{}, #ctx{}) -> - {[atom()], #wm_reqdata{}, #ctx{}}. +-spec allowed_methods(#wm_reqdata{}, #ctx{}) -> cb_rv_spec([atom()]). %% @doc Get the list of methods this resource supports. allowed_methods(RD, Ctx) -> {['GET'], RD, Ctx}. --spec resource_exists(#wm_reqdata{}, #ctx{}) -> - {boolean(), #wm_reqdata{}, #ctx{}}. +-spec resource_exists(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). resource_exists(RD, #ctx{table = Table} = Ctx) -> Mod = riak_ql_ddl:make_module_name(Table), case catch Mod:get_ddl() of @@ -156,16 +123,14 @@ resource_exists(RD, #ctx{table = Table} = Ctx) -> -spec encodings_provided(#wm_reqdata{}, #ctx{}) -> - {[{Encoding::string(), Producer::function()}], - #wm_reqdata{}, #ctx{}}. + cb_rv_spec([{Encoding::string(), Producer::function()}]). %% @doc List the encodings available for representing this resource. %% "identity" and "gzip" are available. encodings_provided(RD, Ctx) -> {riak_kv_wm_utils:default_encodings(), RD, Ctx}. -spec content_types_provided(#wm_reqdata{}, #ctx{}) -> - {[{ContentType::string(), Producer::atom()}], - #wm_reqdata{}, #ctx{}}. + cb_rv_spec([{ContentType::string(), Producer::atom()}]). %% @doc List the content types available for representing this resource. content_types_provided(RD, Ctx) -> {[{"application/json", produce_doc_body}], RD, Ctx}. @@ -235,6 +200,3 @@ handle_error(Error, RD, Ctx) -> flat_format(Format, Args) -> lists:flatten(io_lib:format(Format, Args)). - -utf8_to_binary(S) -> - unicode:characters_to_binary(S, utf8, utf8). From a98fdfa082b89b492388526cc2f71031c173007b Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Thu, 10 Mar 2016 17:12:03 +0100 Subject: [PATCH 107/122] Moved table_module_exists to util module. Simpler resource_exists for riak_kv_wm_timeseries_listkeys. --- src/riak_kv_wm_timeseries.erl | 16 +--------------- src/riak_kv_wm_timeseries_listkeys.erl | 10 ++-------- src/riak_kv_wm_ts_util.erl | 18 +++++++++++++++++- 3 files changed, 20 insertions(+), 24 deletions(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 7a35823f97..27c3a2c27f 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -173,7 +173,7 @@ content_types_accepted(_, RD, Ctx) -> -spec resource_exists(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean() | halt()). resource_exists(RD, #ctx{mod=Mod} = Ctx) -> - case table_module_exists(Mod) of + case riak_kv_wm_ts_util:table_module_exists(Mod) of true -> resource_exists(wrq:path_tokens(RD), wrq:method(RD), RD, Ctx); false -> @@ -292,20 +292,6 @@ to_json(RD, #ctx{api_call=get, object=Object}=Ctx) -> {{halt, 500}, Resp, Ctx} end. -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% helper functions -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% @todo: this should be in riak_ql_ddl and should probably check deeper. --spec table_module_exists(module()) -> boolean(). -table_module_exists(Mod) -> - try Mod:get_ddl() of - #ddl_v1{} -> - true - catch - _:_ -> - false - end. - -spec extract_params([{string(), string()}], #ctx{}) -> #ctx{} . %% @doc right now we only allow a timeout parameter or nothing. extract_params([], Ctx) -> diff --git a/src/riak_kv_wm_timeseries_listkeys.erl b/src/riak_kv_wm_timeseries_listkeys.erl index a16fc9d4e9..37416e29c7 100644 --- a/src/riak_kv_wm_timeseries_listkeys.erl +++ b/src/riak_kv_wm_timeseries_listkeys.erl @@ -112,14 +112,8 @@ allowed_methods(RD, Ctx) -> -spec resource_exists(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). -resource_exists(RD, #ctx{table = Table} = Ctx) -> - Mod = riak_ql_ddl:make_module_name(Table), - case catch Mod:get_ddl() of - {_, {undef, _}} -> - handle_error({no_such_table, Table}, RD, Ctx); - _ -> - {true, RD, Ctx} - end. +resource_exists(RD, #ctx{mod=Mod} = Ctx) -> + {riak_kv_wm_utils:table_module_exists(Mod), RD, Ctx}. -spec encodings_provided(#wm_reqdata{}, #ctx{}) -> diff --git a/src/riak_kv_wm_ts_util.erl b/src/riak_kv_wm_ts_util.erl index 71a6c5de54..6265502085 100644 --- a/src/riak_kv_wm_ts_util.erl +++ b/src/riak_kv_wm_ts_util.erl @@ -31,6 +31,8 @@ -export([authorize/3]). +-export([table_module_exists/1]). + %% @private table_from_request(RD) -> @@ -77,4 +79,18 @@ authorize(Call, Table, RD) -> " accept credentials over HTTP. Try HTTPS instead.", Resp = set_text_resp_header(ErrorMsg, RD), {insecure, {halt, 426}, Resp} - end. \ No newline at end of file + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% helper functions +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% @todo: this should be in riak_ql_ddl and should probably check deeper. +-spec table_module_exists(module()) -> boolean(). +table_module_exists(Mod) -> + try Mod:get_ddl() of + _DDL -> %#ddl_v1{} -> + true + catch + _:_ -> + false + end. \ No newline at end of file From b854b562319ea0be8c42d49eeabd31d1c6c0bd06 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Thu, 10 Mar 2016 17:17:45 +0100 Subject: [PATCH 108/122] rtimeseries_listkeys service available simplified and functions removed. * error_out, handle_error and flat_format removed as they are no longer used. --- src/riak_kv_wm_timeseries_listkeys.erl | 35 ++------------------------ 1 file changed, 2 insertions(+), 33 deletions(-) diff --git a/src/riak_kv_wm_timeseries_listkeys.erl b/src/riak_kv_wm_timeseries_listkeys.erl index 37416e29c7..7e6ef6e7b2 100644 --- a/src/riak_kv_wm_timeseries_listkeys.erl +++ b/src/riak_kv_wm_timeseries_listkeys.erl @@ -81,7 +81,8 @@ service_available(RD, Ctx = #ctx{riak = RiakProps}) -> table = Table, mod = Mod}}; {error, Reason} -> - handle_error({riak_client_error, Reason}, RD, Ctx) + Resp = riak_kv_wm_ts_util:set_error_message("Unable to connect to Riak: ~p", [Reason], RD), + {false, Resp, Ctx} end. @@ -162,35 +163,3 @@ ts_keys_to_json(Keys) -> KeysTerm = [tuple_to_list(sext:decode(A)) || A <- Keys, A /= []], mochijson2:encode({struct, [{<<"keys">>, KeysTerm}]}). - - -error_out(Type, Fmt, Args, RD, Ctx) -> - {Type, - wrq:set_resp_header( - "Content-Type", "text/plain", wrq:append_to_response_body( - flat_format(Fmt, Args), RD)), - Ctx}. - --spec handle_error(atom()|tuple(), #wm_reqdata{}, #ctx{}) -> {tuple(), #wm_reqdata{}, #ctx{}}. -handle_error(Error, RD, Ctx) -> - case Error of - {riak_client_error, Reason} -> - error_out(false, - "Unable to connect to Riak: ~p", [Reason], RD, Ctx); - insecure_connection -> - error_out({halt, 426}, - "Security is enabled and Riak does not" - " accept credentials over HTTP. Try HTTPS instead.", [], RD, Ctx); - {unsupported_version, BadVersion} -> - error_out({halt, 412}, - "Unsupported API version ~s", [BadVersion], RD, Ctx); - {not_permitted, Table} -> - error_out({halt, 401}, - "Access to table ~ts not allowed", [Table], RD, Ctx); - {no_such_table, Table} -> - error_out({halt, 404}, - "Table \"~ts\" does not exist", [Table], RD, Ctx) - end. - -flat_format(Format, Args) -> - lists:flatten(io_lib:format(Format, Args)). From d81cadc6eadc14854503f5d77acd2166d8e77f64 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Fri, 11 Mar 2016 12:17:25 +0100 Subject: [PATCH 109/122] moved local_key(Mod) to wm_ts_util --- src/riak_kv_wm_timeseries.erl | 25 +++---------------------- src/riak_kv_wm_ts_util.erl | 33 +++++++++++++++++++++++++++++---- 2 files changed, 32 insertions(+), 26 deletions(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 27c3a2c27f..02a36f290a 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -104,8 +104,7 @@ service_available(RD, #ctx{riak = RiakProps}=Ctx) -> Mod = riak_ql_ddl:make_module_name(Table), {true, RD, Ctx#ctx{table=Table, mod=Mod}}; {error, Reason} -> - ErrorMsg = riak_kv_wm_ts_util:flat_format("Unable to connect to Riak: ~p", [Reason]), - Resp = riak_kv_wm_ts_util:set_text_resp_header(ErrorMsg, RD), + Resp = riak_kv_wm_ts_util:set_error_message("Unable to connect to Riak: ~p", [Reason], RD), {false, Resp, Ctx} end. @@ -276,7 +275,7 @@ delete_resource(RD, #ctx{table=Table, {{halt, 404}, RD, Ctx} catch _:Reason -> - lager:log(info, self(), "delete_resource failed: ~p", Reason), + lager:log(info, self(), "delete_resource failed: ~p", [Reason]), Resp = riak_kv_wm_ts_util:set_error_message("Internal error: ~p", [Reason], RD), {{halt, 500}, Resp, Ctx} end. @@ -320,15 +319,9 @@ validate_key(Path, Mod) -> -spec path_elements(module(), [string()]) -> [riak_pb_ts_codec:ldbvalue()]. path_elements(Mod, Path) -> - KeyTypes = local_key_fields_and_types(Mod), + KeyTypes = riak_kv_wm_ts_util:local_key_fields_and_types(Mod), match_path(Path, KeyTypes). -local_key_fields_and_types(Mod) -> - LK = local_key(Mod), - Types = [Mod:get_field_type([F]) || F <- LK ], - LKStr = [ binary_to_list(F) || F <- LK ], - lists:zip(LKStr, Types). - match_path([], []) -> []; match_path([F,V|Path], [{F, Type}|KeyTypes]) -> @@ -386,18 +379,6 @@ extract_field_value({Name, Type}, FVList) -> check_field_value(Type, Value) end. -local_key(Mod) -> - ddl_local_key(Mod:get_ddl()). - -%% this should be in the DDL helper module. --spec ddl_local_key(#ddl_v1{}) -> [binary()]. -ddl_local_key(#ddl_v1{local_key=LK}) -> - #key_v1{ast=Ast} = LK, - [ param_name(P) || P <- Ast]. - -param_name(#param_v1{name=[Name]}) -> - Name. - %% @todo: might be better if the DDL helper module had a %% valid_field_value(Field, Value) -> boolean() function. check_field_value(varchar, V) when is_binary(V) -> V; diff --git a/src/riak_kv_wm_ts_util.erl b/src/riak_kv_wm_ts_util.erl index 6265502085..c11615e026 100644 --- a/src/riak_kv_wm_ts_util.erl +++ b/src/riak_kv_wm_ts_util.erl @@ -33,6 +33,15 @@ -export([table_module_exists/1]). +-export([local_key/1]). + +-export([local_key_fields_and_types/1]). + +-include_lib("webmachine/include/webmachine.hrl"). +-include_lib("riak_ql/include/riak_ql_ddl.hrl"). +-include("riak_kv_wm_raw.hrl"). +-include("riak_kv_ts.hrl"). + %% @private table_from_request(RD) -> @@ -81,9 +90,6 @@ authorize(Call, Table, RD) -> {insecure, {halt, 426}, Resp} end. -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% helper functions -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% @todo: this should be in riak_ql_ddl and should probably check deeper. -spec table_module_exists(module()) -> boolean(). table_module_exists(Mod) -> @@ -93,4 +99,23 @@ table_module_exists(Mod) -> catch _:_ -> false - end. \ No newline at end of file + end. + + +local_key(Mod) -> + ddl_local_key(Mod:get_ddl()). + +%% this should be in the DDL helper module. +-spec ddl_local_key(#ddl_v1{}) -> [binary()]. +ddl_local_key(#ddl_v1{local_key=LK}) -> + #key_v1{ast=Ast} = LK, + [ param_name(P) || P <- Ast]. + +param_name(#param_v1{name=[Name]}) -> + Name. + +local_key_fields_and_types(Mod) -> + LK = local_key(Mod), + Types = [Mod:get_field_type([F]) || F <- LK ], + LKStr = [ binary_to_list(F) || F <- LK ], + lists:zip(LKStr, Types). \ No newline at end of file From a39347295e8e1ee2964da9d2c1ec3193a95329f4 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Fri, 11 Mar 2016 12:18:09 +0100 Subject: [PATCH 110/122] timeseries_listkeys refactored and produces html --- src/riak_kv_wm_timeseries_listkeys.erl | 86 ++++++++++++++++---------- 1 file changed, 53 insertions(+), 33 deletions(-) diff --git a/src/riak_kv_wm_timeseries_listkeys.erl b/src/riak_kv_wm_timeseries_listkeys.erl index 7e6ef6e7b2..69e33f443e 100644 --- a/src/riak_kv_wm_timeseries_listkeys.erl +++ b/src/riak_kv_wm_timeseries_listkeys.erl @@ -42,9 +42,10 @@ forbidden/2, resource_exists/2, content_types_provided/2, - encodings_provided/2, - produce_doc_body/2 - ]). + encodings_provided/2]). + +%% webmachine body-producing functions +-export([produce_doc_body/2]). -include("riak_kv_wm_raw.hrl"). -include_lib("webmachine/include/webmachine.hrl"). @@ -85,7 +86,6 @@ service_available(RD, Ctx = #ctx{riak = RiakProps}) -> {false, Resp, Ctx} end. - is_authorized(RD, #ctx{table=Table}=Ctx) -> case riak_kv_wm_ts_util:authorize(listkeys, Table, RD) of ok -> @@ -96,26 +96,19 @@ is_authorized(RD, #ctx{table=Table}=Ctx) -> {Halt, Resp, Ctx} end. - - -spec forbidden(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). forbidden(RD, Ctx) -> Result = riak_kv_wm_utils:is_forbidden(RD), {Result, RD, Ctx}. - - - -spec allowed_methods(#wm_reqdata{}, #ctx{}) -> cb_rv_spec([atom()]). %% @doc Get the list of methods this resource supports. allowed_methods(RD, Ctx) -> {['GET'], RD, Ctx}. - -spec resource_exists(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). resource_exists(RD, #ctx{mod=Mod} = Ctx) -> - {riak_kv_wm_utils:table_module_exists(Mod), RD, Ctx}. - + {riak_kv_wm_ts_util:table_module_exists(Mod), RD, Ctx}. -spec encodings_provided(#wm_reqdata{}, #ctx{}) -> cb_rv_spec([{Encoding::string(), Producer::function()}]). @@ -128,38 +121,65 @@ encodings_provided(RD, Ctx) -> cb_rv_spec([{ContentType::string(), Producer::atom()}]). %% @doc List the content types available for representing this resource. content_types_provided(RD, Ctx) -> - {[{"application/json", produce_doc_body}], RD, Ctx}. + {[{"text/html", produce_doc_body}], RD, Ctx}. - -produce_doc_body(RD, Ctx = #ctx{table = Table, +produce_doc_body(RD, Ctx = #ctx{table = Table, mod=Mod, client = Client}) -> - F = fun() -> - {ok, ReqId} = riak_client:stream_list_keys( - {Table, Table}, undefined, Client), - stream_keys(ReqId) - end, - {{stream, {<<>>, F}}, RD, Ctx}. - -stream_keys(ReqId) -> + {ok, ReqId} = riak_client:stream_list_keys( + {Table, Table}, undefined, Client), + lager:log(info, self(), "in produce_doc_body ~p", [Table]), + {{halt, 200}, wrq:set_resp_body({stream, prepare_stream(ReqId, Table, Mod)}, RD), Ctx}. + +prepare_stream(ReqId, Table, Mod) -> + {<<"">>, fun() -> stream_keys(ReqId, Table, Mod) end}. + +stream_keys(ReqId, Table, Mod) -> receive %% skip empty shipments {ReqId, {keys, []}} -> - stream_keys(ReqId); + stream_keys(ReqId, Table, Mod); {ReqId, From, {keys, []}} -> _ = riak_kv_keys_fsm:ack_keys(From), - stream_keys(ReqId); + stream_keys(ReqId, Table, Mod); {ReqId, From, {keys, Keys}} -> _ = riak_kv_keys_fsm:ack_keys(From), - {ts_keys_to_json(Keys), fun() -> stream_keys(ReqId) end}; + {ts_keys_to_html(Keys, Table, Mod), fun() -> stream_keys(ReqId, Table, Mod) end}; {ReqId, {keys, Keys}} -> - {ts_keys_to_json(Keys), fun() -> stream_keys(ReqId) end}; + {ts_keys_to_html(Keys, Table, Mod), fun() -> stream_keys(ReqId, Table, Mod) end}; {ReqId, done} -> - {<<>>, done}; + {<<"">>, done}; {ReqId, {error, timeout}} -> - {mochijson2:encode({struct, [{error, timeout}]}), done} + {mochijson2:encode({struct, [{error, timeout}]}), done}; + Weird -> + lager:log(info, self(), "stream_keys got totally Weird=~p", [Weird]), + stream_keys(ReqId, Table, Mod) end. -ts_keys_to_json(Keys) -> - KeysTerm = [tuple_to_list(sext:decode(A)) - || A <- Keys, A /= []], - mochijson2:encode({struct, [{<<"keys">>, KeysTerm}]}). +ts_keys_to_html(EncodedKeys, Table, Mod) -> + BaseUrl = base_url(Table), + Keys = decode_keys(EncodedKeys), + KeyTypes = riak_kv_wm_ts_util:local_key_fields_and_types(Mod), + URLs = [io_lib:format("~s~s", [BaseUrl, key_to_string(Key, KeyTypes)]) + || Key <- Keys], + Hrefs = [ io_lib:format("~s", [URL, URL]) + || URL <- URLs], + list_to_binary(lists:flatten(Hrefs)). + +decode_keys(Keys) -> + [tuple_to_list(sext:decode(A)) + || A <- Keys, A /= []]. + +key_to_string([], []) -> + ""; +key_to_string([Key|Keys], [{Field, Type}|KeyTypes]) -> + Field ++ "/" ++ value_to_url_string(Key, Type) ++ "/" ++ key_to_string(Keys, KeyTypes). + +value_to_url_string(V, varchar) -> + binary_to_list(V); +value_to_url_string(V, timestamp) -> + erlang:integer_to_list(V). + +base_url(Table) -> + {ok, [{Server, Port}]} = application:get_env(riak_api, http), + io_lib:format("http://~s:~B/ts/v1/tables/~s/keys/", + [Server, Port, Table]). From b57af6f535a8636d66f635ea8f618199cc7c2eb1 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Sun, 13 Mar 2016 20:32:31 +0100 Subject: [PATCH 111/122] Uniform disptach routes for timeseries in riak_kv_web --- src/riak_kv_web.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/riak_kv_web.erl b/src/riak_kv_web.erl index ae73a8563f..fc794f567e 100644 --- a/src/riak_kv_web.erl +++ b/src/riak_kv_web.erl @@ -129,7 +129,7 @@ raw_dispatch(Name) -> %% decide if we want to dispatch to separate resource modules or handle %% the different versions inside the same resource handler module. [{["ts", api_version, "tables", table, "list_keys"], riak_kv_wm_timeseries_listkeys, Props}, - {["ts", "v1", "tables", table, "keys", '*'], riak_kv_wm_timeseries, Props}, + {["ts", api_version, "tables", table, "keys", '*'], riak_kv_wm_timeseries, Props}, {["ts", api_version, "query"], riak_kv_wm_timeseries_query, Props} ] || {_Prefix, Props} <- Props2]). From 2fb7c9a9e8a19ed9d86e72912ade1fca7462b0fe Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Sun, 13 Mar 2016 20:33:12 +0100 Subject: [PATCH 112/122] WIP. TS query now works for create table --- src/riak_kv_wm_timeseries_query.erl | 642 +++++++++++++--------------- 1 file changed, 294 insertions(+), 348 deletions(-) diff --git a/src/riak_kv_wm_timeseries_query.erl b/src/riak_kv_wm_timeseries_query.erl index 3ba77d937e..5074aa2e29 100644 --- a/src/riak_kv_wm_timeseries_query.erl +++ b/src/riak_kv_wm_timeseries_query.erl @@ -37,16 +37,17 @@ init/1, service_available/2, is_authorized/2, + malformed_request/2, forbidden/2, allowed_methods/2, + post_is_create/2, process_post/2, - malformed_request/2, content_types_accepted/2, - resource_exists/2, content_types_provided/2, - encodings_provided/2, - produce_doc_body/2, - accept_doc_body/2 + encodings_provided/2 + ]). + +-export([produce_doc_body/2 ]). -include_lib("webmachine/include/webmachine.hrl"). @@ -54,23 +55,25 @@ -include("riak_kv_wm_raw.hrl"). -include("riak_kv_ts.hrl"). --record(ctx, {api_version, - method :: atom(), - prefix, %% string() - prefix for resource uris - timeout, %% integer() - passed-in timeout value in ms - security, %% security context - client, %% riak_client() - the store client - riak, %% local | {node(), atom()} - params for riak client - query :: undefined | string(), - compiled_query :: undefined | #ddl_v1{} | #riak_sql_describe_v1{} | ?SQL_SELECT{}, - result :: undefined | ok | {Headers::[binary()], Rows::[ts_rec()]} | - [{entry, proplists:proplist()}] - }). +-record(ctx, { + table :: 'undefined' | string(), + mod :: 'undefined' | module(), + method :: atom(), + prefix, %% string() - prefix for resource uris + timeout, %% integer() - passed-in timeout value in ms + security, %% security context + riak, %% local | {node(), atom()} - params for riak client + sql_type, + compiled_query :: undefined | #ddl_v1{} | #riak_sql_describe_v1{} | #riak_select_v1{}, + result :: undefined | ok | {Headers::[binary()], Rows::[ts_rec()]} | + [{entry, proplists:proplist()}] + }). -define(DEFAULT_TIMEOUT, 60000). -define(TABLE_ACTIVATE_WAIT, 30). %% wait until table's bucket type is activated --define(CB_RV_SPEC, {boolean()|atom()|tuple(), #wm_reqdata{}, #ctx{}}). +-type cb_rv_spec(T) :: {T, #wm_reqdata{}, #ctx{}}. +-type halt() :: {'halt', 200..599} | {'error' , term()}. -type ts_rec() :: [riak_pb_ts_codec:ldbvalue()]. @@ -81,42 +84,62 @@ init(Props) -> {ok, #ctx{prefix = proplists:get_value(prefix, Props), riak = proplists:get_value(riak, Props)}}. --spec service_available(#wm_reqdata{}, #ctx{}) -> - {boolean(), #wm_reqdata{}, #ctx{}}. +-spec service_available(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). %% @doc Determine whether or not a connection to Riak -%% can be established. This function also takes this -%% opportunity to extract the 'bucket' and 'key' path -%% bindings from the dispatch, as well as any vtag -%% query parameter. +%% can be established. service_available(RD, Ctx = #ctx{riak = RiakProps}) -> + checkpoint("service_available: RD=~p", [RD]), case riak_kv_wm_utils:get_riak_client( RiakProps, riak_kv_wm_utils:get_client_id(RD)) of - {ok, C} -> - {true, RD, - Ctx#ctx{api_version = wrq:path_info(api_version, RD), - method = wrq:method(RD), - client = C - }}; + {ok, _C} -> + {true, RD, Ctx}; {error, Reason} -> - handle_error({riak_client_error, Reason}, RD, Ctx) + Resp = riak_kv_wm_ts_util:set_error_message("Unable to connect to Riak: ~p", + [Reason], RD), + {false, Resp, Ctx} end. -is_authorized(RD, Ctx) -> - case riak_api_web_security:is_authorized(RD) of - false -> - {"Basic realm=\"Riak\"", RD, Ctx}; - {true, SecContext} -> - {true, RD, Ctx#ctx{security = SecContext}}; - insecure -> - %% XXX 301 may be more appropriate here, but since the http and - %% https port are different and configurable, it is hard to figure - %% out the redirect URL to serve. - handle_error(insecure_connection, RD, Ctx) +malformed_request(RD, Ctx) -> + try + {SqlType, SQL} = query_from_request(RD), + Table = table_from_sql(SQL), + Mod = riak_ql_ddl:make_module_name(Table), + {false, RD, Ctx#ctx{sql_type=SqlType, + compiled_query=SQL, + table=Table, + mod=Mod}} + catch + throw:{query, Reason} -> + lager:log(info, self(), "try in malformed_request backfired: ~p", [Reason]), + Response = riak_kv_wm_ts_util:set_error_message("bad query: ~p", [Reason], RD), + {true, Response, Ctx} + end. + +-spec is_authorized(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()|halt()). +is_authorized(RD, #ctx{sql_type=SqlType, table=Table}=Ctx) -> + checkpoint("is_authorized", RD), + Call = call_from_sql_type(SqlType), + lager:log(info, self(), "is_authorized type:~p", [SqlType]), + case riak_kv_wm_ts_util:authorize(Call, Table, RD) of + ok -> + {true, RD, Ctx}; + {error, ErrorMsg} -> + {ErrorMsg, RD, Ctx}; + {insecure, Halt, Resp} -> + {Halt, Resp, Ctx} end. --spec forbidden(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +%% method_to_intended_api_call('POST') -> +%% query_create_table; +%% method_to_intended_api_call('PUT') -> +%% query_select; +%% method_to_intended_api_call('GET') -> +%% query_describe. + + +-spec forbidden(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). forbidden(RD, Ctx) -> case riak_kv_wm_utils:is_forbidden(RD) of true -> @@ -126,305 +149,224 @@ forbidden(RD, Ctx) -> %% for now {false, RD, Ctx} end. -%% Because webmachine chooses to (not) call certain callbacks -%% depending on request method used, sometimes accept_doc_body is not -%% called at all, and we arrive at produce_doc_body empty-handed. -%% This is the case when curl is executed with -G and --data. --spec allowed_methods(#wm_reqdata{}, #ctx{}) -> - {[atom()], #wm_reqdata{}, #ctx{}}. +-spec allowed_methods(#wm_reqdata{}, #ctx{}) -> cb_rv_spec([atom()]). allowed_methods(RD, Ctx) -> {['GET', 'POST'], RD, Ctx}. --spec malformed_request(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -malformed_request(RD, Ctx) -> - %% this is plugged because requests are validated against - %% effective query contained in the body (and hence, we need - %% accept_doc_body to parse and extract things out of JSON first) - {false, RD, Ctx}. - --spec preexec(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -%% * extract query from request body or, failing that, from -%% POST k=v items, try to compile it; -%% * check API version; -%% * validate query type against HTTP method; -%% * check permissions on the query type. -preexec(RD, Ctx) -> - case validate_request(RD, Ctx) of - {true, RD1, Ctx1} -> - case check_permissions(RD1, Ctx1) of - {true, RD2, Ctx2} -> - call_api_function(RD2, Ctx2); - FalseWithDetails -> - FalseWithDetails - end; - FalseWithDetails -> - FalseWithDetails - end. - --spec validate_request(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -validate_request(RD, Ctx) -> - case wrq:path_info(api_version, RD) of - "v1" -> - validate_request_v1(RD, Ctx); - BadVersion -> - handle_error({unsupported_version, BadVersion}, RD, Ctx) - end. - --spec validate_request_v1(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -validate_request_v1(RD, Ctx = #ctx{method = Method}) -> - Json = extract_json(RD), - case {Method, extract_query(Json), extract_cover_context(Json)} of - {Method, Query, CoverContext} - when (Method == 'GET' orelse Method == 'POST') - andalso is_list(Query) -> - case riak_ql_parser:ql_parse( - riak_ql_lexer:get_tokens(Query)) of - {error, Reason} -> - handle_error({query_parse_error, Reason}, RD, Ctx); - {ddl, DDL} -> - valid_params( - RD, Ctx#ctx{api_version = "v1", - query = Query, - compiled_query = DDL}); - {Type, Compiled} when Type == select; - Type == describe -> - {ok, SQL} = riak_kv_ts_util:build_sql_record( - Type, Compiled, CoverContext), - valid_params( - RD, Ctx#ctx{api_version = "v1", - query = Query, - compiled_query = SQL}) - end; - _Invalid -> - handle_error({malformed_request, Method}, RD, Ctx) - end. +query_from_request(RD) -> + QueryStr = query_string_from_request(RD), + lager:log(info, self(), "query_from_request: ~p", [QueryStr]), + compile_query(QueryStr). - --spec valid_params(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -valid_params(RD, Ctx) -> - %% no params currently for query - {true, RD, Ctx}. - -%% This is a special case for curl -G. `curl -G host --data $data` -%% will send the $data in URL instead of in the body, so we try to -%% look for it in req_qs. -extract_json(RD) -> - case proplists:get_value("json", RD#wm_reqdata.req_qs) of +query_string_from_request(RD) -> + case wrq:get_qs_value("query", RD) of undefined -> - %% if it was a PUT or POST, data is in body - binary_to_list(wrq:req_body(RD)); - BodyInPost -> - BodyInPost + throw({query, "no query key in query string"}); + Str -> + Str end. --spec extract_query(binary()) -> term(). -extract_query(Json) -> - try mochijson2:decode(Json) of - {struct, Decoded} when is_list(Decoded) -> - validate_ts_query( - proplists:get_value(<<"query">>, Decoded)) - catch - _:_ -> - undefined +compile_query(QueryStr) -> + case riak_ql_parser:ql_parse( + riak_ql_lexer:get_tokens(QueryStr)) of + {error, Reason} -> + ErrorMsg = lists:flatten(io_lib:format("parse error: ~p", [Reason])), + throw({query, ErrorMsg}); + ValidRes -> + ValidRes end. --spec extract_cover_context(binary()) -> term(). -extract_cover_context(Json) -> - try mochijson2:decode(Json) of - {struct, Decoded} when is_list(Decoded) -> - validate_ts_cover_context( - proplists:get_value(<<"coverage_context">>, Decoded)) - catch - _:_ -> - undefined - end. -validate_ts_query(Q) when is_binary(Q) -> - binary_to_list(Q); -validate_ts_query(_) -> - undefined. - -validate_ts_cover_context(C) when is_binary(C) -> - C; -validate_ts_cover_context(_) -> - undefined. - - --spec check_permissions(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -check_permissions(RD, Ctx = #ctx{security = undefined}) -> - {true, RD, Ctx}; -check_permissions(RD, Ctx = #ctx{security = Security, - compiled_query = CompiledQry}) -> - case riak_core_security:check_permission( - decode_query_permissions(CompiledQry), Security) of - {false, Error, _} -> - handle_error( - {not_permitted, utf8_to_binary(Error)}, RD, Ctx); - _ -> - {true, RD, Ctx} - end. +%% @todo: should really be in riak_ql somewhere +table_from_sql(#ddl_v1{table=Table}) -> Table; +table_from_sql(#riak_select_v1{'FROM'=Table}) -> Table; +table_from_sql(#riak_sql_describe_v1{'DESCRIBE'=Table}) -> Table. + +call_from_sql_type(ddl) -> query_create_table; +call_from_sql_type(select) -> query_select; +call_from_sql_type(describe) -> query_describe. -decode_query_permissions(#ddl_v1{table = NewBucketType}) -> - {riak_kv_ts_util:api_call_to_perm(query_create_table), NewBucketType}; -decode_query_permissions(?SQL_SELECT{'FROM' = Table}) -> - {riak_kv_ts_util:api_call_to_perm(query_select), Table}; -decode_query_permissions(#riak_sql_describe_v1{'DESCRIBE' = Table}) -> - {riak_kv_ts_util:api_call_to_perm(query_describe), Table}. -spec content_types_provided(#wm_reqdata{}, #ctx{}) -> - {[{ContentType::string(), Producer::atom()}], - #wm_reqdata{}, #ctx{}}. + cb_rv_spec([{ContentType::string(), Producer::atom()}]). content_types_provided(RD, Ctx) -> {[{"application/json", produce_doc_body}], RD, Ctx}. -spec encodings_provided(#wm_reqdata{}, #ctx{}) -> - {[{Encoding::string(), Producer::function()}], - #wm_reqdata{}, #ctx{}}. + cb_rv_spec([{Encoding::string(), Producer::function()}]). encodings_provided(RD, Ctx) -> {riak_kv_wm_utils:default_encodings(), RD, Ctx}. -spec content_types_accepted(#wm_reqdata{}, #ctx{}) -> - {[{ContentType::string(), Acceptor::atom()}], - #wm_reqdata{}, #ctx{}}. + cb_rv_spec([{ContentType::string(), Acceptor::atom()}]). content_types_accepted(RD, Ctx) -> - {[{"application/json", accept_doc_body}], RD, Ctx}. +% {[{"application/json", accept_doc_body}], RD, Ctx}. +%% @todo: if we end up without a body in the request this function should be deleted. + {[], RD, Ctx}. --spec resource_exists(#wm_reqdata{}, #ctx{}) -> - {boolean(), #wm_reqdata{}, #ctx{}}. -resource_exists(RD0, Ctx0) -> - case preexec(RD0, Ctx0) of - {true, RD, Ctx} -> - call_api_function(RD, Ctx); - FalseWithDetails -> - FalseWithDetails - end. - --spec process_post(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -%% @doc Pass through requests to allow POST to function -%% as PUT for clients that do not support PUT. -process_post(RD, Ctx) -> - accept_doc_body(RD, Ctx). - --spec accept_doc_body(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -accept_doc_body(RD0, Ctx0) -> - case preexec(RD0, Ctx0) of - {true, RD, Ctx} -> - call_api_function(RD, Ctx); - FalseWithDetails -> - FalseWithDetails - end. +-spec post_is_create(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). +post_is_create(RD, Ctx) -> + {false, RD, Ctx}. --spec call_api_function(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. -call_api_function(RD, Ctx = #ctx{result = Result}) - when Result /= undefined -> - lager:debug("Function already executed", []), - {true, RD, Ctx}; -call_api_function(RD, Ctx = #ctx{method = Method, - compiled_query = CompiledQry}) -> - case CompiledQry of - SQL = ?SQL_SELECT{} when Method == 'GET' -> - %% inject coverage context - process_query(SQL, RD, Ctx); - Other when (is_record(Other, ddl_v1) andalso Method == 'POST') orelse - (is_record(Other, riak_sql_describe_v1) andalso Method == 'GET') -> - process_query(Other, RD, Ctx); - _Other -> - handle_error({inappropriate_sql_for_method, Method}, RD, Ctx) +-spec process_post(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). +process_post(RD, #ctx{sql_type=ddl, compiled_query=SQL}=Ctx) -> + case create_table(SQL) of + ok -> + Result = [{success, true}], %% represents ok + Json = to_json(Result), + {true, wrq:append_to_response_body(Json, RD), Ctx}; + {error, Reason} -> + Resp = riak_kv_wm_ts_util:set_error_message("query error: ~p", + [Reason], + RD), + {{halt, 500}, Resp, Ctx} end. - -process_query(DDL = #ddl_v1{table = Table}, RD, Ctx) -> +%% -spec accept_doc_body(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). +%% accept_doc_body(RD0, Ctx0) -> +%% {true, RD0, Ctx0}. + +%% -spec call_api_function(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). +%% call_api_function(RD, Ctx = #ctx{result = Result}) +%% when Result /= undefined -> +%% lager:debug("Function already executed", []), +%% {true, RD, Ctx}; +%% call_api_function(RD, Ctx = #ctx{method = Method, +%% compiled_query = CompiledQry}) -> +%% case CompiledQry of +%% SQL = #riak_select_v1{} when Method == 'GET' -> +%% %% inject coverage context +%% process_query(SQL, RD, Ctx); +%% Other when (is_record(Other, ddl_v1) andalso Method == 'POST') orelse +%% (is_record(Other, riak_sql_describe_v1) andalso Method == 'GET') -> +%% process_query(Other, RD, Ctx); +%% _Other -> +%% handle_error({inappropriate_sql_for_method, Method}, RD, Ctx) +%% end. + + +create_table(DDL = #ddl_v1{table = Table}) -> + %% would be better to use a function to get the table out. {ok, Props1} = riak_kv_ts_util:apply_timeseries_bucket_props(DDL, []), Props2 = [riak_kv_wm_utils:erlify_bucket_prop(P) || P <- Props1], - %% TODO: let's not bother collecting user properties from (say) - %% sidecar object in body JSON: when #ddl_v2 work is merged, we - %% will have a way to collect those bespoke table properties from - %% WITH clause. case riak_core_bucket_type:create(Table, Props2) of ok -> - wait_until_active(Table, RD, Ctx, ?TABLE_ACTIVATE_WAIT); + wait_until_active(Table, ?TABLE_ACTIVATE_WAIT); {error, Reason} -> - handle_error({table_create_fail, Table, Reason}, RD, Ctx) - end; - -process_query(SQL = ?SQL_SELECT{'FROM' = Table}, RD, Ctx0 = #ctx{}) -> - Mod = riak_ql_ddl:make_module_name(Table), - case catch Mod:get_ddl() of - {_, {undef, _}} -> - handle_error({no_such_table, Table}, RD, Ctx0); - DDL -> - case riak_kv_ts_api:query(SQL, DDL) of - {ok, Data} -> - {ColumnNames, _ColumnTypes, Rows} = Data, - Ctx = Ctx0#ctx{result = {ColumnNames, Rows}}, - prepare_data_in_body(RD, Ctx); - %% the following timeouts are known and distinguished: - {error, qry_worker_timeout} -> - %% the eleveldb process didn't send us any response after - %% 10 sec (hardcoded in riak_kv_qry), and probably died - handle_error(query_worker_timeout, RD, Ctx0); - {error, backend_timeout} -> - %% the eleveldb process did manage to send us a timeout - %% response - handle_error(backend_timeout, RD, Ctx0); - - {error, Reason} -> - handle_error({query_exec_error, Reason}, RD, Ctx0) - end - end; - -process_query(SQL = #riak_sql_describe_v1{'DESCRIBE' = Table}, RD, Ctx0 = #ctx{}) -> - Mod = riak_ql_ddl:make_module_name(Table), - case catch Mod:get_ddl() of - {_, {undef, _}} -> - handle_error({no_such_table, Table}, RD, Ctx0); - DDL -> - case riak_kv_ts_api:query(SQL, DDL) of - {ok, Data} -> - ColumnNames = [<<"Column">>, <<"Type">>, <<"Is Null">>, - <<"Primary Key">>, <<"Local Key">>], - Ctx = Ctx0#ctx{result = {ColumnNames, Data}}, - prepare_data_in_body(RD, Ctx); - {error, Reason} -> - handle_error({query_exec_error, Reason}, RD, Ctx0) - end + {error,{table_create_fail, Table, Reason}} end. - -wait_until_active(Table, RD, Ctx, 0) -> - handle_error({table_activate_fail, Table}, RD, Ctx); -wait_until_active(Table, RD, Ctx, Seconds) -> +%% process_query(DDL = #ddl_v1{table = Table}, RD, Ctx) -> +%% {ok, Props1} = riak_kv_ts_util:apply_timeseries_bucket_props(DDL, []), +%% Props2 = [riak_kv_wm_utils:erlify_bucket_prop(P) || P <- Props1], +%% %% TODO: let's not bother collecting user properties from (say) +%% %% sidecar object in body JSON: when #ddl_v2 work is merged, we +%% %% will have a way to collect those bespoke table properties from +%% %% WITH clause. +%% case riak_core_bucket_type:create(Table, Props2) of +%% ok -> +%% wait_until_active(Table, RD, Ctx, ?TABLE_ACTIVATE_WAIT); +%% {error, Reason} -> +%% handle_error({table_create_fail, Table, Reason}, RD, Ctx) +%% end; + +%% process_query(SQL = #riak_select_v1{'FROM' = Table}, RD, Ctx0 = #ctx{}) -> +%% Mod = riak_ql_ddl:make_module_name(Table), +%% case catch Mod:get_ddl() of +%% {_, {undef, _}} -> +%% handle_error({no_such_table, Table}, RD, Ctx0); +%% DDL -> +%% case riak_kv_ts_api:query(SQL, DDL) of +%% {ok, Data} -> +%% {ColumnNames, _ColumnTypes, Rows} = Data, +%% Ctx = Ctx0#ctx{result = {ColumnNames, Rows}}, +%% prepare_data_in_body(RD, Ctx); +%% %% the following timeouts are known and distinguished: +%% {error, qry_worker_timeout} -> +%% %% the eleveldb process didn't send us any response after +%% %% 10 sec (hardcoded in riak_kv_qry), and probably died +%% handle_error(query_worker_timeout, RD, Ctx0); +%% {error, backend_timeout} -> +%% %% the eleveldb process did manage to send us a timeout +%% %% response +%% handle_error(backend_timeout, RD, Ctx0); + +%% {error, Reason} -> +%% handle_error({query_exec_error, Reason}, RD, Ctx0) +%% end +%% end; + +%% process_query(SQL = #riak_sql_describe_v1{'DESCRIBE' = Table}, RD, Ctx0 = #ctx{}) -> +%% Mod = riak_ql_ddl:make_module_name(Table), +%% case catch Mod:get_ddl() of +%% {_, {undef, _}} -> +%% handle_error({no_such_table, Table}, RD, Ctx0); +%% DDL -> +%% case riak_kv_ts_api:query(SQL, DDL) of +%% {ok, Data} -> +%% ColumnNames = [<<"Column">>, <<"Type">>, <<"Is Null">>, +%% <<"Primary Key">>, <<"Local Key">>], +%% Ctx = Ctx0#ctx{result = {ColumnNames, Data}}, +%% prepare_data_in_body(RD, Ctx); +%% {error, Reason} -> +%% handle_error({query_exec_error, Reason}, RD, Ctx0) +%% end +%% end. + + +wait_until_active(Table, 0) -> + {error, {table_activate_fail, Table}}; +wait_until_active(Table, Seconds) -> case riak_core_bucket_type:activate(Table) of ok -> - prepare_data_in_body(RD, Ctx#ctx{result = {[],[]}}); - %% a way for CREATE TABLE queries to return 'ok' on success + ok; {error, not_ready} -> timer:sleep(1000), - wait_until_active(Table, RD, Ctx, Seconds - 1); + wait_until_active(Table, Seconds - 1); {error, undefined} -> %% this is inconceivable because create(Table) has %% just succeeded, so it's here mostly to pacify %% the dialyzer (and of course, for the odd chance %% of Erlang imps crashing nodes between create %% and activate calls) - handle_error({table_created_missing, Table}, RD, Ctx) + {error, {table_created_missing, Table}} end. -prepare_data_in_body(RD0, Ctx0) -> - {Json, RD1, Ctx1} = produce_doc_body(RD0, Ctx0), - {true, wrq:append_to_response_body(Json, RD1), Ctx1}. - --spec produce_doc_body(#wm_reqdata{}, #ctx{}) -> ?CB_RV_SPEC. +%% wait_until_active(Table, RD, Ctx, 0) -> +%% handle_error({table_activate_fail, Table}, RD, Ctx); +%% wait_until_active(Table, RD, Ctx, Seconds) -> +%% case riak_core_bucket_type:activate(Table) of +%% ok -> +%% prepare_data_in_body(RD, Ctx#ctx{result = {[],[]}}); +%% %% a way for CREATE TABLE queries to return 'ok' on success +%% {error, not_ready} -> +%% timer:sleep(1000), +%% wait_until_active(Table, RD, Ctx, Seconds - 1); +%% {error, undefined} -> +%% %% this is inconceivable because create(Table) has +%% %% just succeeded, so it's here mostly to pacify +%% %% the dialyzer (and of course, for the odd chance +%% %% of Erlang imps crashing nodes between create +%% %% and activate calls) +%% handle_error({table_created_missing, Table}, RD, Ctx) +%% end. + +%% prepare_data_in_body(RD0, Ctx0) -> +%% {Json, RD1, Ctx1} = produce_doc_body(RD0, Ctx0), +%% {true, wrq:append_to_response_body(Json, RD1), Ctx1}. + + +-spec produce_doc_body(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(iolist()). %% @doc Extract the value of the document, and place it in the %% response body of the request. produce_doc_body(RD, Ctx = #ctx{result = {Columns, Rows}}) -> @@ -433,64 +375,68 @@ produce_doc_body(RD, Ctx = #ctx{result = {Columns, Rows}}) -> {<<"rows">>, Rows}]}), RD, Ctx}. - -error_out(Type, Fmt, Args, RD, Ctx) -> - {Type, - wrq:set_resp_header( - "Content-Type", "text/plain", wrq:append_to_response_body( - flat_format(Fmt, Args), RD)), - Ctx}. - --spec handle_error(atom()|tuple(), #wm_reqdata{}, #ctx{}) -> {tuple(), #wm_reqdata{}, #ctx{}}. -handle_error(Error, RD, Ctx) -> - case Error of - {riak_client_error, Reason} -> - error_out(false, - "Unable to connect to Riak: ~p", [Reason], RD, Ctx); - insecure_connection -> - error_out({halt, 426}, - "Security is enabled and Riak does not" - " accept credentials over HTTP. Try HTTPS instead.", [], RD, Ctx); - {unsupported_version, BadVersion} -> - error_out({halt, 412}, - "Unsupported API version ~s", [BadVersion], RD, Ctx); - {not_permitted, Table} -> - error_out({halt, 401}, - "Access to table ~ts not allowed", [Table], RD, Ctx); - {malformed_request, Method} -> - error_out({halt, 400}, - "Malformed ~s request", [Method], RD, Ctx); - {no_such_table, Table} -> - error_out({halt, 404}, - "Table \"~ts\" does not exist", [Table], RD, Ctx); - {query_parse_error, Detailed} -> - error_out({halt, 400}, - "Malformed query: ~ts", [Detailed], RD, Ctx); - {table_create_fail, Table, Reason} -> - error_out({halt, 500}, - "Failed to create table \"~ts\": ~p", [Table, Reason], RD, Ctx); - query_worker_timeout -> - error_out({halt, 503}, - "Query worker timeout", [], RD, Ctx); - backend_timeout -> - error_out({halt, 503}, - "Storage backend timeout", [], RD, Ctx); - {query_exec_error, Detailed} -> - error_out({halt, 400}, - "Query execution failed: ~ts", [Detailed], RD, Ctx); - {table_activate_fail, Table} -> - error_out({halt, 500}, - "Failed to activate bucket type for table \"~ts\"", [Table], RD, Ctx); - {table_created_missing, Table} -> - error_out({halt, 500}, - "Bucket type for table \"~ts\" disappeared suddenly before activation", [Table], RD, Ctx); - {inappropriate_sql_for_method, Method} -> - error_out({halt, 400}, - "Inappropriate method ~s for SQL query type", [Method], RD, Ctx) - end. - -flat_format(Format, Args) -> - lists:flatten(io_lib:format(Format, Args)). - -utf8_to_binary(S) -> - unicode:characters_to_binary(S, utf8, utf8). +to_json({Columns, Rows}) when is_list(Columns), is_list(Rows) -> + mochijson2:encode( + {struct, [{<<"columns">>, Columns}, + {<<"rows">>, Rows}]}); +to_json(Other) -> + mochijson2:encode(Other). + + + +%% -spec handle_error(atom()|tuple(), #wm_reqdata{}, #ctx{}) -> {tuple(), #wm_reqdata{}, #ctx{}}. +%% handle_error(Error, RD, Ctx) -> +%% case Error of +%% {riak_client_error, Reason} -> +%% error_out(false, +%% "Unable to connect to Riak: ~p", [Reason], RD, Ctx); +%% insecure_connection -> +%% error_out({halt, 426}, +%% "Security is enabled and Riak does not" +%% " accept credentials over HTTP. Try HTTPS instead.", [], RD, Ctx); +%% {unsupported_version, BadVersion} -> +%% error_out({halt, 412}, +%% "Unsupported API version ~s", [BadVersion], RD, Ctx); +%% {not_permitted, Table} -> +%% error_out({halt, 401}, +%% "Access to table ~ts not allowed", [Table], RD, Ctx); +%% {malformed_request, Method} -> +%% error_out({halt, 400}, +%% "Malformed ~s request", [Method], RD, Ctx); +%% {no_such_table, Table} -> +%% error_out({halt, 404}, +%% "Table \"~ts\" does not exist", [Table], RD, Ctx); +%% {query_parse_error, Detailed} -> +%% error_out({halt, 400}, +%% "Malformed query: ~ts", [Detailed], RD, Ctx); +%% {table_create_fail, Table, Reason} -> +%% error_out({halt, 500}, +%% "Failed to create table \"~ts\": ~p", [Table, Reason], RD, Ctx); +%% query_worker_timeout -> +%% error_out({halt, 503}, +%% "Query worker timeout", [], RD, Ctx); +%% backend_timeout -> +%% error_out({halt, 503}, +%% "Storage backend timeout", [], RD, Ctx); +%% {query_exec_error, Detailed} -> +%% error_out({halt, 400}, +%% "Query execution failed: ~ts", [Detailed], RD, Ctx); +%% {table_activate_fail, Table} -> +%% error_out({halt, 500}, +%% "Failed to activate bucket type for table \"~ts\"", [Table], RD, Ctx); +%% {table_created_missing, Table} -> +%% error_out({halt, 500}, +%% "Bucket type for table \"~ts\" disappeared suddenly before activation", [Table], RD, Ctx); +%% {inappropriate_sql_for_method, Method} -> +%% error_out({halt, 400}, +%% "Inappropriate method ~s for SQL query type", [Method], RD, Ctx) +%% end. + +%% flat_format(Format, Args) -> +%% lists:flatten(io_lib:format(Format, Args)). + +%% utf8_to_binary(S) -> +%% unicode:characters_to_binary(S, utf8, utf8). + +checkpoint(Format, Args) -> + lager:log(info, self(), Format, Args). From 00e15f9aede55378f763e6f246a590c6e4527e7c Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Sun, 13 Mar 2016 21:00:15 +0100 Subject: [PATCH 113/122] WIP. timeseries describe query now works. --- src/riak_kv_wm_timeseries_query.erl | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/src/riak_kv_wm_timeseries_query.erl b/src/riak_kv_wm_timeseries_query.erl index 5074aa2e29..6078fc3c29 100644 --- a/src/riak_kv_wm_timeseries_query.erl +++ b/src/riak_kv_wm_timeseries_query.erl @@ -103,6 +103,7 @@ service_available(RD, Ctx = #ctx{riak = RiakProps}) -> malformed_request(RD, Ctx) -> try {SqlType, SQL} = query_from_request(RD), + checkpoint("malformed_request SqlType=~p, SQL=~p", [SqlType, SQL]), Table = table_from_sql(SQL), Mod = riak_ql_ddl:make_module_name(Table), {false, RD, Ctx#ctx{sql_type=SqlType, @@ -176,8 +177,12 @@ compile_query(QueryStr) -> {error, Reason} -> ErrorMsg = lists:flatten(io_lib:format("parse error: ~p", [Reason])), throw({query, ErrorMsg}); - ValidRes -> - ValidRes + {ddl, _ } = Res -> + Res; + {Type, Compiled} when Type==select; Type==describe -> + {ok, SQL} = riak_kv_ts_util:build_sql_record( + Type, Compiled, undefined), + {Type, SQL} end. @@ -229,8 +234,24 @@ process_post(RD, #ctx{sql_type=ddl, compiled_query=SQL}=Ctx) -> [Reason], RD), {{halt, 500}, Resp, Ctx} + end; +process_post(RD, #ctx{sql_type=describe, + compiled_query=SQL, + mod=Mod}=Ctx) -> + DDL = Mod:get_ddl(), %% might be faster to store this earlier on + case riak_kv_ts_api:query(SQL, DDL) of + {ok, Data} -> + ColumnNames = [<<"Column">>, <<"Type">>, <<"Is Null">>, + <<"Primary Key">>, <<"Local Key">>], + Json = to_json({ColumnNames, Data}), + {true, wrq:append_to_response_body(Json, RD), Ctx}; + {error, Reason} -> + Resp = riak_kv_wm_ts_util:set_error_message( + "describe failed: ~p", [Reason], RD), + {{halt, 500}, Resp, Ctx} end. + %% -spec accept_doc_body(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). %% accept_doc_body(RD0, Ctx0) -> %% {true, RD0, Ctx0}. From 66b4249d646e02af19b5706f29f5d26ba5f0d62f Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Sun, 13 Mar 2016 21:14:20 +0100 Subject: [PATCH 114/122] timeseries select query works. --- src/riak_kv_wm_timeseries_query.erl | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/riak_kv_wm_timeseries_query.erl b/src/riak_kv_wm_timeseries_query.erl index 6078fc3c29..d85636215b 100644 --- a/src/riak_kv_wm_timeseries_query.erl +++ b/src/riak_kv_wm_timeseries_query.erl @@ -249,9 +249,37 @@ process_post(RD, #ctx{sql_type=describe, Resp = riak_kv_wm_ts_util:set_error_message( "describe failed: ~p", [Reason], RD), {{halt, 500}, Resp, Ctx} + end; +process_post(RD, #ctx{sql_type=select, + compiled_query=SQL, + mod=Mod}=Ctx) -> + DDL = Mod:get_ddl(), %% might be faster to store this earlier on + case riak_kv_ts_api:query(SQL, DDL) of + {ok, Data} -> + {ColumnNames, _ColumnTypes, Rows} = Data, + Json = to_json({ColumnNames, Rows}), + {true, wrq:append_to_response_body(Json, RD), Ctx}; + %% the following timeouts are known and distinguished: + {error, qry_worker_timeout} -> + %% the eleveldb process didn't send us any response after + %% 10 sec (hardcoded in riak_kv_qry), and probably died + Resp = riak_kv_wm_ts_util:set_error_message( + "qry_worker_timeout", [], RD), + {false, Resp, Ctx}; + {error, backend_timeout} -> + %% the eleveldb process did manage to send us a timeout + %% response + Resp = riak_kv_wm_ts_util:set_error_message( + "backend_timeout", [], RD), + {false, Resp, Ctx}; + {error, Reason} -> + Resp = riak_kv_wm_ts_util:set_error_message( + "select query execution error: ~p", [Reason], RD), + {false, Resp, Ctx} end. + %% -spec accept_doc_body(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). %% accept_doc_body(RD0, Ctx0) -> %% {true, RD0, Ctx0}. From f80f02b32b0f2cab564300a5436def9c329cc744 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Sun, 13 Mar 2016 21:26:50 +0100 Subject: [PATCH 115/122] riak_kv_vm_timeseries remove dead code --- src/riak_kv_wm_timeseries_query.erl | 177 ---------------------------- 1 file changed, 177 deletions(-) diff --git a/src/riak_kv_wm_timeseries_query.erl b/src/riak_kv_wm_timeseries_query.erl index d85636215b..b21a6fefe7 100644 --- a/src/riak_kv_wm_timeseries_query.erl +++ b/src/riak_kv_wm_timeseries_query.erl @@ -131,15 +131,6 @@ is_authorized(RD, #ctx{sql_type=SqlType, table=Table}=Ctx) -> {Halt, Resp, Ctx} end. - -%% method_to_intended_api_call('POST') -> -%% query_create_table; -%% method_to_intended_api_call('PUT') -> -%% query_select; -%% method_to_intended_api_call('GET') -> -%% query_describe. - - -spec forbidden(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). forbidden(RD, Ctx) -> case riak_kv_wm_utils:is_forbidden(RD) of @@ -156,8 +147,6 @@ forbidden(RD, Ctx) -> allowed_methods(RD, Ctx) -> {['GET', 'POST'], RD, Ctx}. - - query_from_request(RD) -> QueryStr = query_string_from_request(RD), lager:log(info, self(), "query_from_request: ~p", [QueryStr]), @@ -185,8 +174,6 @@ compile_query(QueryStr) -> {Type, SQL} end. - - %% @todo: should really be in riak_ql somewhere table_from_sql(#ddl_v1{table=Table}) -> Table; table_from_sql(#riak_select_v1{'FROM'=Table}) -> Table; @@ -278,31 +265,6 @@ process_post(RD, #ctx{sql_type=select, {false, Resp, Ctx} end. - - -%% -spec accept_doc_body(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). -%% accept_doc_body(RD0, Ctx0) -> -%% {true, RD0, Ctx0}. - -%% -spec call_api_function(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). -%% call_api_function(RD, Ctx = #ctx{result = Result}) -%% when Result /= undefined -> -%% lager:debug("Function already executed", []), -%% {true, RD, Ctx}; -%% call_api_function(RD, Ctx = #ctx{method = Method, -%% compiled_query = CompiledQry}) -> -%% case CompiledQry of -%% SQL = #riak_select_v1{} when Method == 'GET' -> -%% %% inject coverage context -%% process_query(SQL, RD, Ctx); -%% Other when (is_record(Other, ddl_v1) andalso Method == 'POST') orelse -%% (is_record(Other, riak_sql_describe_v1) andalso Method == 'GET') -> -%% process_query(Other, RD, Ctx); -%% _Other -> -%% handle_error({inappropriate_sql_for_method, Method}, RD, Ctx) -%% end. - - create_table(DDL = #ddl_v1{table = Table}) -> %% would be better to use a function to get the table out. {ok, Props1} = riak_kv_ts_util:apply_timeseries_bucket_props(DDL, []), @@ -314,64 +276,6 @@ create_table(DDL = #ddl_v1{table = Table}) -> {error,{table_create_fail, Table, Reason}} end. -%% process_query(DDL = #ddl_v1{table = Table}, RD, Ctx) -> -%% {ok, Props1} = riak_kv_ts_util:apply_timeseries_bucket_props(DDL, []), -%% Props2 = [riak_kv_wm_utils:erlify_bucket_prop(P) || P <- Props1], -%% %% TODO: let's not bother collecting user properties from (say) -%% %% sidecar object in body JSON: when #ddl_v2 work is merged, we -%% %% will have a way to collect those bespoke table properties from -%% %% WITH clause. -%% case riak_core_bucket_type:create(Table, Props2) of -%% ok -> -%% wait_until_active(Table, RD, Ctx, ?TABLE_ACTIVATE_WAIT); -%% {error, Reason} -> -%% handle_error({table_create_fail, Table, Reason}, RD, Ctx) -%% end; - -%% process_query(SQL = #riak_select_v1{'FROM' = Table}, RD, Ctx0 = #ctx{}) -> -%% Mod = riak_ql_ddl:make_module_name(Table), -%% case catch Mod:get_ddl() of -%% {_, {undef, _}} -> -%% handle_error({no_such_table, Table}, RD, Ctx0); -%% DDL -> -%% case riak_kv_ts_api:query(SQL, DDL) of -%% {ok, Data} -> -%% {ColumnNames, _ColumnTypes, Rows} = Data, -%% Ctx = Ctx0#ctx{result = {ColumnNames, Rows}}, -%% prepare_data_in_body(RD, Ctx); -%% %% the following timeouts are known and distinguished: -%% {error, qry_worker_timeout} -> -%% %% the eleveldb process didn't send us any response after -%% %% 10 sec (hardcoded in riak_kv_qry), and probably died -%% handle_error(query_worker_timeout, RD, Ctx0); -%% {error, backend_timeout} -> -%% %% the eleveldb process did manage to send us a timeout -%% %% response -%% handle_error(backend_timeout, RD, Ctx0); - -%% {error, Reason} -> -%% handle_error({query_exec_error, Reason}, RD, Ctx0) -%% end -%% end; - -%% process_query(SQL = #riak_sql_describe_v1{'DESCRIBE' = Table}, RD, Ctx0 = #ctx{}) -> -%% Mod = riak_ql_ddl:make_module_name(Table), -%% case catch Mod:get_ddl() of -%% {_, {undef, _}} -> -%% handle_error({no_such_table, Table}, RD, Ctx0); -%% DDL -> -%% case riak_kv_ts_api:query(SQL, DDL) of -%% {ok, Data} -> -%% ColumnNames = [<<"Column">>, <<"Type">>, <<"Is Null">>, -%% <<"Primary Key">>, <<"Local Key">>], -%% Ctx = Ctx0#ctx{result = {ColumnNames, Data}}, -%% prepare_data_in_body(RD, Ctx); -%% {error, Reason} -> -%% handle_error({query_exec_error, Reason}, RD, Ctx0) -%% end -%% end. - - wait_until_active(Table, 0) -> {error, {table_activate_fail, Table}}; wait_until_active(Table, Seconds) -> @@ -390,31 +294,6 @@ wait_until_active(Table, Seconds) -> {error, {table_created_missing, Table}} end. - -%% wait_until_active(Table, RD, Ctx, 0) -> -%% handle_error({table_activate_fail, Table}, RD, Ctx); -%% wait_until_active(Table, RD, Ctx, Seconds) -> -%% case riak_core_bucket_type:activate(Table) of -%% ok -> -%% prepare_data_in_body(RD, Ctx#ctx{result = {[],[]}}); -%% %% a way for CREATE TABLE queries to return 'ok' on success -%% {error, not_ready} -> -%% timer:sleep(1000), -%% wait_until_active(Table, RD, Ctx, Seconds - 1); -%% {error, undefined} -> -%% %% this is inconceivable because create(Table) has -%% %% just succeeded, so it's here mostly to pacify -%% %% the dialyzer (and of course, for the odd chance -%% %% of Erlang imps crashing nodes between create -%% %% and activate calls) -%% handle_error({table_created_missing, Table}, RD, Ctx) -%% end. - -%% prepare_data_in_body(RD0, Ctx0) -> -%% {Json, RD1, Ctx1} = produce_doc_body(RD0, Ctx0), -%% {true, wrq:append_to_response_body(Json, RD1), Ctx1}. - - -spec produce_doc_body(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(iolist()). %% @doc Extract the value of the document, and place it in the %% response body of the request. @@ -431,61 +310,5 @@ to_json({Columns, Rows}) when is_list(Columns), is_list(Rows) -> to_json(Other) -> mochijson2:encode(Other). - - -%% -spec handle_error(atom()|tuple(), #wm_reqdata{}, #ctx{}) -> {tuple(), #wm_reqdata{}, #ctx{}}. -%% handle_error(Error, RD, Ctx) -> -%% case Error of -%% {riak_client_error, Reason} -> -%% error_out(false, -%% "Unable to connect to Riak: ~p", [Reason], RD, Ctx); -%% insecure_connection -> -%% error_out({halt, 426}, -%% "Security is enabled and Riak does not" -%% " accept credentials over HTTP. Try HTTPS instead.", [], RD, Ctx); -%% {unsupported_version, BadVersion} -> -%% error_out({halt, 412}, -%% "Unsupported API version ~s", [BadVersion], RD, Ctx); -%% {not_permitted, Table} -> -%% error_out({halt, 401}, -%% "Access to table ~ts not allowed", [Table], RD, Ctx); -%% {malformed_request, Method} -> -%% error_out({halt, 400}, -%% "Malformed ~s request", [Method], RD, Ctx); -%% {no_such_table, Table} -> -%% error_out({halt, 404}, -%% "Table \"~ts\" does not exist", [Table], RD, Ctx); -%% {query_parse_error, Detailed} -> -%% error_out({halt, 400}, -%% "Malformed query: ~ts", [Detailed], RD, Ctx); -%% {table_create_fail, Table, Reason} -> -%% error_out({halt, 500}, -%% "Failed to create table \"~ts\": ~p", [Table, Reason], RD, Ctx); -%% query_worker_timeout -> -%% error_out({halt, 503}, -%% "Query worker timeout", [], RD, Ctx); -%% backend_timeout -> -%% error_out({halt, 503}, -%% "Storage backend timeout", [], RD, Ctx); -%% {query_exec_error, Detailed} -> -%% error_out({halt, 400}, -%% "Query execution failed: ~ts", [Detailed], RD, Ctx); -%% {table_activate_fail, Table} -> -%% error_out({halt, 500}, -%% "Failed to activate bucket type for table \"~ts\"", [Table], RD, Ctx); -%% {table_created_missing, Table} -> -%% error_out({halt, 500}, -%% "Bucket type for table \"~ts\" disappeared suddenly before activation", [Table], RD, Ctx); -%% {inappropriate_sql_for_method, Method} -> -%% error_out({halt, 400}, -%% "Inappropriate method ~s for SQL query type", [Method], RD, Ctx) -%% end. - -%% flat_format(Format, Args) -> -%% lists:flatten(io_lib:format(Format, Args)). - -%% utf8_to_binary(S) -> -%% unicode:characters_to_binary(S, utf8, utf8). - checkpoint(Format, Args) -> lager:log(info, self(), Format, Args). From 70d67a509130fd0d41ecb9afb7f5786e0db55666 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Mon, 14 Mar 2016 12:54:37 +0100 Subject: [PATCH 116/122] Trying to humour dialyzer, but is not yielding fully. --- src/riak_kv_wm_timeseries_listkeys.erl | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/riak_kv_wm_timeseries_listkeys.erl b/src/riak_kv_wm_timeseries_listkeys.erl index 69e33f443e..2122143284 100644 --- a/src/riak_kv_wm_timeseries_listkeys.erl +++ b/src/riak_kv_wm_timeseries_listkeys.erl @@ -159,12 +159,23 @@ ts_keys_to_html(EncodedKeys, Table, Mod) -> BaseUrl = base_url(Table), Keys = decode_keys(EncodedKeys), KeyTypes = riak_kv_wm_ts_util:local_key_fields_and_types(Mod), - URLs = [io_lib:format("~s~s", [BaseUrl, key_to_string(Key, KeyTypes)]) + URLs = [format_url(BaseUrl, KeyTypes, Key) || Key <- Keys], - Hrefs = [ io_lib:format("~s", [URL, URL]) - || URL <- URLs], + %% Dialyzer does not like the list_comprehension, if you want to avoid the + %% dialyzer error you have to write it like this: + %% URLs = lists:map(fun(Key) -> + %% format_url(BaseUrl, KeyTypes, Key) + %% end, + %% Keys), + Hrefs = [format_href(URL) || URL <- URLs], list_to_binary(lists:flatten(Hrefs)). +format_href(URL) -> + io_lib:format("~s", [URL, URL]). + +format_url(BaseUrl, KeyTypes, Key) -> + io_lib:format("~s~s", [BaseUrl, key_to_string(Key, KeyTypes)]). + decode_keys(Keys) -> [tuple_to_list(sext:decode(A)) || A <- Keys, A /= []]. @@ -181,5 +192,5 @@ value_to_url_string(V, timestamp) -> base_url(Table) -> {ok, [{Server, Port}]} = application:get_env(riak_api, http), - io_lib:format("http://~s:~B/ts/v1/tables/~s/keys/", - [Server, Port, Table]). + lists:flatten(io_lib:format("http://~s:~B/ts/v1/tables/~s/keys/", + [Server, Port, Table])). From 6736095f9b554ad5c14849a46b6abd0e8fb005a2 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Mon, 14 Mar 2016 12:55:27 +0100 Subject: [PATCH 117/122] riak_kv_wm_timeseries_query reorganised to have the callbacks in the right order. And all helper functions at the end of the module. --- src/riak_kv_wm_timeseries_query.erl | 93 +++++++++++++++-------------- 1 file changed, 47 insertions(+), 46 deletions(-) diff --git a/src/riak_kv_wm_timeseries_query.erl b/src/riak_kv_wm_timeseries_query.erl index b21a6fefe7..0c4c657bdd 100644 --- a/src/riak_kv_wm_timeseries_query.erl +++ b/src/riak_kv_wm_timeseries_query.erl @@ -56,7 +56,7 @@ -include("riak_kv_ts.hrl"). -record(ctx, { - table :: 'undefined' | string(), + table :: 'undefined' | binary(), mod :: 'undefined' | module(), method :: atom(), prefix, %% string() - prefix for resource uris @@ -99,7 +99,11 @@ service_available(RD, Ctx = #ctx{riak = RiakProps}) -> {false, Resp, Ctx} end. +-spec allowed_methods(#wm_reqdata{}, #ctx{}) -> cb_rv_spec([atom()]). +allowed_methods(RD, Ctx) -> + {['GET', 'POST'], RD, Ctx}. +-spec malformed_request(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). malformed_request(RD, Ctx) -> try {SqlType, SQL} = query_from_request(RD), @@ -117,16 +121,16 @@ malformed_request(RD, Ctx) -> {true, Response, Ctx} end. --spec is_authorized(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()|halt()). +-spec is_authorized(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()|string()|halt()). is_authorized(RD, #ctx{sql_type=SqlType, table=Table}=Ctx) -> - checkpoint("is_authorized", RD), Call = call_from_sql_type(SqlType), lager:log(info, self(), "is_authorized type:~p", [SqlType]), case riak_kv_wm_ts_util:authorize(Call, Table, RD) of ok -> {true, RD, Ctx}; {error, ErrorMsg} -> - {ErrorMsg, RD, Ctx}; + ErrorStr = lists:flatten(io_lib:format("~p", [ErrorMsg])), + {ErrorStr, RD, Ctx}; {insecure, Halt, Resp} -> {Halt, Resp, Ctx} end. @@ -143,45 +147,6 @@ forbidden(RD, Ctx) -> end. --spec allowed_methods(#wm_reqdata{}, #ctx{}) -> cb_rv_spec([atom()]). -allowed_methods(RD, Ctx) -> - {['GET', 'POST'], RD, Ctx}. - -query_from_request(RD) -> - QueryStr = query_string_from_request(RD), - lager:log(info, self(), "query_from_request: ~p", [QueryStr]), - compile_query(QueryStr). - -query_string_from_request(RD) -> - case wrq:get_qs_value("query", RD) of - undefined -> - throw({query, "no query key in query string"}); - Str -> - Str - end. - -compile_query(QueryStr) -> - case riak_ql_parser:ql_parse( - riak_ql_lexer:get_tokens(QueryStr)) of - {error, Reason} -> - ErrorMsg = lists:flatten(io_lib:format("parse error: ~p", [Reason])), - throw({query, ErrorMsg}); - {ddl, _ } = Res -> - Res; - {Type, Compiled} when Type==select; Type==describe -> - {ok, SQL} = riak_kv_ts_util:build_sql_record( - Type, Compiled, undefined), - {Type, SQL} - end. - -%% @todo: should really be in riak_ql somewhere -table_from_sql(#ddl_v1{table=Table}) -> Table; -table_from_sql(#riak_select_v1{'FROM'=Table}) -> Table; -table_from_sql(#riak_sql_describe_v1{'DESCRIBE'=Table}) -> Table. - -call_from_sql_type(ddl) -> query_create_table; -call_from_sql_type(select) -> query_select; -call_from_sql_type(describe) -> query_describe. @@ -200,7 +165,6 @@ encodings_provided(RD, Ctx) -> -spec content_types_accepted(#wm_reqdata{}, #ctx{}) -> cb_rv_spec([{ContentType::string(), Acceptor::atom()}]). content_types_accepted(RD, Ctx) -> -% {[{"application/json", accept_doc_body}], RD, Ctx}. %% @todo: if we end up without a body in the request this function should be deleted. {[], RD, Ctx}. @@ -265,6 +229,45 @@ process_post(RD, #ctx{sql_type=select, {false, Resp, Ctx} end. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Helper functions +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +query_from_request(RD) -> + QueryStr = query_string_from_request(RD), + lager:log(info, self(), "query_from_request: ~p", [QueryStr]), + compile_query(QueryStr). + +query_string_from_request(RD) -> + case wrq:get_qs_value("query", RD) of + undefined -> + throw({query, "no query key in query string"}); + Str -> + Str + end. + +compile_query(QueryStr) -> + case riak_ql_parser:ql_parse( + riak_ql_lexer:get_tokens(QueryStr)) of + {error, Reason} -> + ErrorMsg = lists:flatten(io_lib:format("parse error: ~p", [Reason])), + throw({query, ErrorMsg}); + {ddl, _ } = Res -> + Res; + {Type, Compiled} when Type==select; Type==describe -> + {ok, SQL} = riak_kv_ts_util:build_sql_record( + Type, Compiled, undefined), + {Type, SQL} + end. + +%% @todo: should really be in riak_ql somewhere +table_from_sql(#ddl_v1{table=Table}) -> Table; +table_from_sql(#riak_select_v1{'FROM'=Table}) -> Table; +table_from_sql(#riak_sql_describe_v1{'DESCRIBE'=Table}) -> Table. + +call_from_sql_type(ddl) -> query_create_table; +call_from_sql_type(select) -> query_select; +call_from_sql_type(describe) -> query_describe. + create_table(DDL = #ddl_v1{table = Table}) -> %% would be better to use a function to get the table out. {ok, Props1} = riak_kv_ts_util:apply_timeseries_bucket_props(DDL, []), @@ -295,8 +298,6 @@ wait_until_active(Table, Seconds) -> end. -spec produce_doc_body(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(iolist()). -%% @doc Extract the value of the document, and place it in the -%% response body of the request. produce_doc_body(RD, Ctx = #ctx{result = {Columns, Rows}}) -> {mochijson2:encode( {struct, [{<<"columns">>, Columns}, From ab13f9619c3840eb2a1e005df40ab8f3ca272e42 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Mon, 14 Mar 2016 13:17:16 +0100 Subject: [PATCH 118/122] Comments in the top of the files aligned with what the code does. --- src/riak_kv_wm_timeseries.erl | 3 ++- src/riak_kv_wm_timeseries_listkeys.erl | 5 ++--- src/riak_kv_wm_timeseries_query.erl | 3 +-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 02a36f290a..f08ea38161 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -37,7 +37,8 @@ %% Request body is expected to be a JSON containing a struct or structs for the %% POST. GET and DELETE have no body. %% -%% Response is a JSON containing full records. +%% Response is a JSON containing full records or {"success": true} for POST and +%% DELETE. %% -module(riak_kv_wm_timeseries). diff --git a/src/riak_kv_wm_timeseries_listkeys.erl b/src/riak_kv_wm_timeseries_listkeys.erl index 2122143284..f172d1ce05 100644 --- a/src/riak_kv_wm_timeseries_listkeys.erl +++ b/src/riak_kv_wm_timeseries_listkeys.erl @@ -24,11 +24,10 @@ %% @doc Resource for Riak TS operations over HTTP. %% %% ``` -%% GET /ts/v1/table/Table/keys list_keys +%% GET /ts/v1/table/Table/list_keys %% ''' %% -%% Request body is expected to be a JSON containing key and/or value(s). -%% Response is a JSON containing data rows with column headers. +%% Response is HTML URLs for the entries in the table. %% -module(riak_kv_wm_timeseries_listkeys). diff --git a/src/riak_kv_wm_timeseries_query.erl b/src/riak_kv_wm_timeseries_query.erl index 0c4c657bdd..6f2584966b 100644 --- a/src/riak_kv_wm_timeseries_query.erl +++ b/src/riak_kv_wm_timeseries_query.erl @@ -23,10 +23,9 @@ %% @doc Resource for Riak TS operations over HTTP. %% %% ``` -%% GET/POST /ts/v1/query execute SQL query +%% POST /ts/v1/query?query="query string" execute SQL query %% ''' %% -%% Request body is expected to be a JSON containing key and/or value(s). %% Response is a JSON containing data rows with column headers. %% From a3416663d08eb981960eb2e6d1b95c7fd8d2e48f Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Wed, 30 Mar 2016 11:07:29 +0200 Subject: [PATCH 119/122] ts_query: Better handling of wrong queries. --- src/riak_kv_wm_timeseries.erl | 6 ++---- src/riak_kv_wm_timeseries_query.erl | 6 +++++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 5ae1d02d0d..77860425cc 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -105,7 +105,7 @@ service_available(RD, #ctx{riak = RiakProps}=Ctx) -> Mod = riak_ql_ddl:make_module_name(Table), {true, RD, Ctx#ctx{table=Table, mod=Mod}}; {error, Reason} -> - Resp = riak_kv_wm_ts_util:set_error_message("Unable to connect to Riak: ~p", [Reason], RD), + Resp = riak_kv_wm_ts_util:set_error_message("Node not ready: ~p", [Reason], RD), {false, Resp, Ctx} end. @@ -281,7 +281,6 @@ delete_resource(RD, #ctx{table=Table, {{halt, 404}, Resp, Ctx} catch _:Reason -> - lager:log(info, self(), "delete_resource failed: ~p", [Reason]), Resp = riak_kv_wm_ts_util:set_error_message("Internal error: ~p", [Reason], RD), {{halt, 500}, Resp, Ctx} end. @@ -364,8 +363,7 @@ extract_data(RD, Mod) -> DDLFieldTypes = ddl_fields_and_types(Mod), extract_records(Json, DDLFieldTypes) catch - Error:Reason -> - lager:log(info, self(), "extract_data: ~p:~p", [Error, Reason]), + _Error:Reason -> throw({data_problem, Reason}) end. diff --git a/src/riak_kv_wm_timeseries_query.erl b/src/riak_kv_wm_timeseries_query.erl index 8d7cba8fd6..eb8729c287 100644 --- a/src/riak_kv_wm_timeseries_query.erl +++ b/src/riak_kv_wm_timeseries_query.erl @@ -264,7 +264,7 @@ query_string_from_request(RD) -> end. compile_query(QueryStr) -> - case riak_ql_parser:ql_parse( + try riak_ql_parser:ql_parse( riak_ql_lexer:get_tokens(QueryStr)) of {error, Reason} -> ErrorMsg = lists:flatten(io_lib:format("parse error: ~p", [Reason])), @@ -277,6 +277,10 @@ compile_query(QueryStr) -> {Type, SQL}; {UnsupportedType, _ } -> throw({unsupported_sql_type, UnsupportedType}) + catch + E:T -> + ErrorMsg = io_lib:format("query error: ~p:~p", [E, T]), + throw({query, ErrorMsg}) end. %% @todo: should really be in riak_ql somewhere From 19f62df9b04bb054501fdcc5c68c989721d65425 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Wed, 30 Mar 2016 11:11:50 +0200 Subject: [PATCH 120/122] wm_timeseries: fixed URL in comments. --- src/riak_kv_wm_timeseries.erl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/riak_kv_wm_timeseries.erl b/src/riak_kv_wm_timeseries.erl index 77860425cc..4c9c2f502c 100644 --- a/src/riak_kv_wm_timeseries.erl +++ b/src/riak_kv_wm_timeseries.erl @@ -24,13 +24,13 @@ %% %% This resource is responsible for everything under %% ``` -%% ts/v1/table/Table/keys +%% ts/v1/tables/Table/keys %% ``` %% Specific operations supported: %% ``` -%% GET /ts/v1/table/Table/keys/K1/V1/... single-key get -%% DELETE /ts/v1/table/Table/keys/K1/V1/... single-key delete -%% POST /ts/v1/table/Table/keys singe-key or batch put depending +%% GET /ts/v1/tables/Table/keys/K1/V1/... single-key get +%% DELETE /ts/v1/tables/Table/keys/K1/V1/... single-key delete +%% POST /ts/v1/tables/Table/keys singe-key or batch put depending %% on the body %% ''' %% From b4816592e9bfe5c46ecdc65974c0d0a35648c150 Mon Sep 17 00:00:00 2001 From: Torben Hoffmann Date: Thu, 31 Mar 2016 14:54:19 +0200 Subject: [PATCH 121/122] ts listkeys: modified the return type to be text/plain and returning urls separated by newline. --- src/riak_kv_wm_timeseries_listkeys.erl | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/riak_kv_wm_timeseries_listkeys.erl b/src/riak_kv_wm_timeseries_listkeys.erl index 38484c90aa..41b1a27598 100644 --- a/src/riak_kv_wm_timeseries_listkeys.erl +++ b/src/riak_kv_wm_timeseries_listkeys.erl @@ -128,7 +128,7 @@ encodings_provided(RD, Ctx) -> cb_rv_spec([{ContentType::string(), Producer::atom()}]). %% @doc List the content types available for representing this resource. content_types_provided(RD, Ctx) -> - {[{"application/json", produce_doc_body}], RD, Ctx}. + {[{"text/plain", produce_doc_body}], RD, Ctx}. produce_doc_body(RD, Ctx = #ctx{table = Table, mod=Mod, client = Client}) -> @@ -137,7 +137,7 @@ produce_doc_body(RD, Ctx = #ctx{table = Table, mod=Mod, {{halt, 200}, wrq:set_resp_body({stream, prepare_stream(ReqId, Table, Mod)}, RD), Ctx}. prepare_stream(ReqId, Table, Mod) -> - {<<"">>, fun() -> stream_keys(ReqId, Table, Mod) end}. + {<<"">>, fun() -> stream_keys(ReqId, Table, Mod) end}. stream_keys(ReqId, Table, Mod) -> receive @@ -149,11 +149,11 @@ stream_keys(ReqId, Table, Mod) -> stream_keys(ReqId, Table, Mod); {ReqId, From, {keys, Keys}} -> _ = riak_kv_keys_fsm:ack_keys(From), - {ts_keys_to_json(Keys, Table, Mod), fun() -> stream_keys(ReqId, Table, Mod) end}; + {ts_keys_to_body(Keys, Table, Mod), fun() -> stream_keys(ReqId, Table, Mod) end}; {ReqId, {keys, Keys}} -> - {ts_keys_to_json(Keys, Table, Mod), fun() -> stream_keys(ReqId, Table, Mod) end}; + {ts_keys_to_body(Keys, Table, Mod), fun() -> stream_keys(ReqId, Table, Mod) end}; {ReqId, done} -> - {<<"">>, done}; + {<<"">>, done}; {ReqId, {error, timeout}} -> {mochijson2:encode({struct, [{error, timeout}]}), done}; _Weird -> @@ -161,7 +161,7 @@ stream_keys(ReqId, Table, Mod) -> stream_keys(ReqId, Table, Mod) end. -ts_keys_to_json(EncodedKeys, Table, Mod) -> +ts_keys_to_body(EncodedKeys, Table, Mod) -> BaseUrl = base_url(Table), Keys = decode_keys(EncodedKeys), KeyTypes = riak_kv_wm_ts_util:local_key_fields_and_types(Mod), @@ -173,13 +173,12 @@ ts_keys_to_json(EncodedKeys, Table, Mod) -> %% format_url(BaseUrl, KeyTypes, Key) %% end, %% Keys), - JsonList = [ mochijson2:encode([{url, URL}]) || URL <- URLs], - list_to_binary(lists:flatten(JsonList)). + list_to_binary(lists:flatten(URLs)). format_url(BaseUrl, KeyTypes, Key) -> list_to_binary( - io_lib:format("~s~s", [BaseUrl, key_to_string(Key, KeyTypes)])). + io_lib:format("~s~s~n", [BaseUrl, key_to_string(Key, KeyTypes)])). decode_keys(Keys) -> [tuple_to_list(sext:decode(A)) From eef65508bd6ded0623577cc058c6dac59a4531f9 Mon Sep 17 00:00:00 2001 From: Andrei Zavada Date: Mon, 4 Apr 2016 04:25:18 +0300 Subject: [PATCH 122/122] amend create_table code path to take into account WITH-props --- src/riak_kv_wm_timeseries_query.erl | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/riak_kv_wm_timeseries_query.erl b/src/riak_kv_wm_timeseries_query.erl index eb8729c287..ada64d0528 100644 --- a/src/riak_kv_wm_timeseries_query.erl +++ b/src/riak_kv_wm_timeseries_query.erl @@ -63,6 +63,7 @@ security, %% security context sql_type, compiled_query :: undefined | #ddl_v1{} | #riak_sql_describe_v1{} | #riak_select_v1{}, + with_props :: undefined | proplists:proplist(), result :: undefined | ok | {Headers::[binary()], Rows::[ts_rec()]} | [{entry, proplists:proplist()}] }). @@ -99,11 +100,12 @@ allowed_methods(RD, Ctx) -> -spec malformed_request(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). malformed_request(RD, Ctx) -> try - {SqlType, SQL} = query_from_request(RD), + {SqlType, SQL, WithProps} = query_from_request(RD), Table = table_from_sql(SQL), Mod = riak_ql_ddl:make_module_name(Table), {false, RD, Ctx#ctx{sql_type=SqlType, compiled_query=SQL, + with_props=WithProps, table=Table, mod=Mod}} catch @@ -193,8 +195,8 @@ post_is_create(RD, Ctx) -> {false, RD, Ctx}. -spec process_post(#wm_reqdata{}, #ctx{}) -> cb_rv_spec(boolean()). -process_post(RD, #ctx{sql_type=ddl, compiled_query=SQL}=Ctx) -> - case create_table(SQL) of +process_post(RD, #ctx{sql_type = ddl, compiled_query = SQL, with_props = WithProps} = Ctx) -> + case create_table(SQL, WithProps) of ok -> Result = [{success, true}], %% represents ok Json = to_json(Result), @@ -269,12 +271,14 @@ compile_query(QueryStr) -> {error, Reason} -> ErrorMsg = lists:flatten(io_lib:format("parse error: ~p", [Reason])), throw({query, ErrorMsg}); - {ddl, _ } = Res -> + {ddl, _DDL, _Props} = Res -> Res; - {Type, Compiled} when Type==select; Type==describe -> - {ok, SQL} = riak_kv_ts_util:build_sql_record( + {Type, Compiled} when Type == select; + Type == describe; + Type == insert -> + {ok, SQL} = riak_kv_ts_util:build_sql_record( Type, Compiled, undefined), - {Type, SQL}; + {Type, SQL, undefined}; {UnsupportedType, _ } -> throw({unsupported_sql_type, UnsupportedType}) catch @@ -292,9 +296,9 @@ call_from_sql_type(ddl) -> query_create_table; call_from_sql_type(select) -> query_select; call_from_sql_type(describe) -> query_describe. -create_table(DDL = #ddl_v1{table = Table}) -> +create_table(DDL = #ddl_v1{table = Table}, Props) -> %% would be better to use a function to get the table out. - {ok, Props1} = riak_kv_ts_util:apply_timeseries_bucket_props(DDL, []), + {ok, Props1} = riak_kv_ts_util:apply_timeseries_bucket_props(DDL, Props), Props2 = [riak_kv_wm_utils:erlify_bucket_prop(P) || P <- Props1], case riak_core_bucket_type:create(Table, Props2) of ok ->