diff --git a/src/include/migraphx/op/multinomial.hpp b/src/include/migraphx/op/multinomial.hpp index 4c4aff1bd4a..e48c6d31928 100644 --- a/src/include/migraphx/op/multinomial.hpp +++ b/src/include/migraphx/op/multinomial.hpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -21,11 +21,52 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ + +/** + * * Multinomial or categorical distribution. Performs a sampling of random input + * and returns a count of + * each category, or bucket. This does not require the standard multinomial + * distribution but instead takes a probability distribution, i.e. cumulative + * distribution function (CDF) as its first input. + * + * Inputs: args[0] - a tensor of probabilities for each category. Values are + * cumulative density function + * totals as provided by operation prefix_scan_sum. Values are + * cumulative probabilities (i.e. start with any set of numbers > 0 + * and then apply prefix_scan_sum). Values do not need to be + * normalized to sum to 1; this is done in runtime computation. + * + * This input has Rank 2. Dimension 0 is batch #, so that there can be + * a different CDF for each iteration in the batch. The size of dimension + * 1 is the number of categories. + * + * args[1] - a tensor of random numbers. The last dimension is the sample + * size, i.e. the number of + * random samples in each iteration of the batch. Nominally + * has two dimensions where the first dimension is batch size, but + * any reshaping such that the total + * number of elements is (batch_size * sample_size) is legal. + * + * Values as created by a std::mt19937 like this: + * + * size_t sample_size = 100000; + * float seed = 0.0f; + * std::mt19937 gen(seed); + * std::uniform_real_distribution<> dis(0.0, 1.0); + * std::vector rand_samples(sample_size); + * std::generate(rand_samples.begin(), rand_samples.end(), [&]() { return + * dis(gen); }); + * + * Output: A 2D vector of category each input. Dimensions are (Input 1[first], Input + 2[last]). + * +*/ #ifndef MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP #define MIGRAPHX_GUARD_OPERATORS_MULTINOMIAL_HPP -#include #include +#include +#include #include #include #include @@ -47,22 +88,35 @@ struct multinomial std::string name() const { return "multinomial"; } shape compute_shape(std::vector inputs) const { - check_shapes{inputs, *this}.has(2).only_dims(2); - size_t sample_size = inputs.back().lens().back(); + check_shapes{inputs, *this, true}.has(2).only_dims(2); - if(not contains({shape::int32_type, shape::int64_type}, dtype)) - MIGRAPHX_THROW( - "Multinomial: Invalid output type. Valid types are int32_type and int64_type."); + if(inputs.back().ndim() < 1) + MIGRAPHX_THROW("Multinomial: Second input shape (sample) has no dimensions"); + if(dtype == shape::bool_type) + MIGRAPHX_THROW("Multinomial: boolean output type invalid."); - return {dtype, {inputs.front().lens().front(), sample_size}}; + // Output takes one dimension from each of the two input shapes. If they are both fixed, + // return a static shape + if((not inputs.front().dynamic()) or (inputs.front().dyn_dims().front().is_fixed())) + { + if((not inputs.back().dynamic()) or (inputs.back().dyn_dims().back().is_fixed())) + { + size_t batch = {inputs.front().max_lens().front()}; + size_t sample_size{inputs.back().max_lens().back()}; + return {dtype, {batch, sample_size}}; + } + } + return {dtype, + {inputs.front().to_dynamic().dyn_dims().front(), + inputs.back().to_dynamic().dyn_dims().back()}}; } - argument compute(const shape& output_shape, std::vector args) const + argument compute(const dyn_output& dyn_out, std::vector args) const { - argument result{output_shape}; - size_t batch_size = output_shape.lens().front(); + argument result{dyn_out.computed_shape}; + size_t batch_size = dyn_out.computed_shape.lens().front(); size_t class_size = args[0].get_shape().lens().back(); - size_t sample_size = output_shape.lens().back(); + size_t sample_size = dyn_out.computed_shape.lens().back(); visit_all(args[0], args[1])([&](auto cdf, auto dist) { result.visit([&](auto output) { @@ -70,13 +124,16 @@ struct multinomial auto idx = args[1].get_shape().multi(i); auto cdf_begin = cdf.begin() + (idx[0] * class_size); auto cdf_end = cdf_begin + class_size; + + // std::upper_bound returns an iterator to the bucket the value belongs in, + // when normalized by the probability distribution dist auto sample_iter = std::upper_bound(cdf_begin, cdf_end, dist[i] * *(std::prev(cdf_end))); + // convert iterator to an integer index output[i] = std::distance(cdf_begin, sample_iter); }); }); }); - return result; } }; diff --git a/src/include/migraphx/op/prefix_scan_op.hpp b/src/include/migraphx/op/prefix_scan_op.hpp index 6c509bc626a..c82eabd540b 100644 --- a/src/include/migraphx/op/prefix_scan_op.hpp +++ b/src/include/migraphx/op/prefix_scan_op.hpp @@ -22,6 +22,12 @@ * THE SOFTWARE. */ +/** + * Parent struct for prefix scan ops. A prefix scan is a mathematical entity useful + * in parallelizing various computations. Given a list of numbers, a prefix scan + * op returns an equal size list of running totals of the values. Other operations + * besides addition can be supported by child ops. + */ #ifndef MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP #define MIGRAPHX_GUARD_OPERATORS_SCAN_OP_HPP diff --git a/src/include/migraphx/op/random_uniform.hpp b/src/include/migraphx/op/random_uniform.hpp index 8420b583203..f873ae9d313 100644 --- a/src/include/migraphx/op/random_uniform.hpp +++ b/src/include/migraphx/op/random_uniform.hpp @@ -65,11 +65,10 @@ struct random_uniform return inputs.at(1); } - argument compute(const shape&, std::vector args) const + argument compute(const dyn_output& dyn_out, std::vector args) const { // Output goes into the passed buffer, not the shape output. - auto result = args[1]; - + argument result{dyn_out.computed_shape}; uint64_t local_seed = args[0].at(0); std::mt19937 gen(local_seed); diff --git a/src/onnx/parse_clip.cpp b/src/onnx/parse_clip.cpp index b537b525308..193dd29be80 100644 --- a/src/onnx/parse_clip.cpp +++ b/src/onnx/parse_clip.cpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal diff --git a/src/onnx/parse_multinomial.cpp b/src/onnx/parse_multinomial.cpp index 3cf584f41cd..4ad128c7db6 100644 --- a/src/onnx/parse_multinomial.cpp +++ b/src/onnx/parse_multinomial.cpp @@ -1,7 +1,7 @@ /* * The MIT License (MIT) * - * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -41,6 +41,9 @@ struct parse_multinomial : op_parser const onnx_parser::node_info& info, std::vector args) const { + if(args.empty()) + MIGRAPHX_THROW("PARSE_MULTINOMIAL: no arguments given"); + int dtype = 6; if(contains(info.attributes, "dtype")) dtype = info.attributes.at("dtype").i(); @@ -49,35 +52,90 @@ struct parse_multinomial : op_parser size_t sample_size = 1; if(contains(info.attributes, "sample_size")) sample_size = info.attributes.at("sample_size").i(); + else + MIGRAPHX_THROW("PARSE_MULTINOMIAL: sample_size not given"); + + // Use logarithmic math to scale probabilities while avoiding division by very + // small numbers. Scaling by the maximum makes very tiny ranges more + // tractable; any constant factor gives equivalent distr. since the Multinomial op. + // normalizes at runtime. // Subtract the per-batch maximum log-probability, making the per-batch max 0 auto maxes = info.add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), args[0]); - auto mb_maxes = info.add_instruction( - migraphx::make_op("multibroadcast", {{"out_lens", args[0]->get_shape().lens()}}), - maxes); - auto cdf = info.add_instruction(migraphx::make_op("sub"), args[0], mb_maxes); + auto cdf = info.add_common_op("sub", args[0], maxes); // Take the element-wise exponent to get probabilities in the range (0, 1] cdf = info.add_instruction(migraphx::make_op("exp"), cdf); - // Compute the cumulative density function + // Compute the cumulative distribution function cdf = info.add_instruction( migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), cdf); - // Pre-compute random distribution - std::mt19937 gen(std::chrono::high_resolution_clock::now().time_since_epoch().count()); + instruction_ref seed_input; if(contains(info.attributes, "seed")) - gen.seed(info.attributes.at("seed").f()); + { + float seed = info.attributes.at("seed").f(); + migraphx::shape s{migraphx::shape::float_type, {1}}; + std::vector data = {seed}; + seed_input = info.add_literal(migraphx::literal(s, data)); + } + else + { + seed_input = info.add_instruction(migraphx::make_op("random_seed")); + } + instruction_ref randoms; + + shape s0 = args[0]->get_shape(); + + if(s0.dynamic()) + { + // Dynamic batch_size will be taken from args[0]. The input argument to this should + // have a second dimension of sample_size. + std::vector dyn_dim_set; + dyn_dim_set.emplace_back(s0.dyn_dims().front()); + dyn_dim_set.emplace_back(shape::dynamic_dimension{sample_size, sample_size}); + + // read the input dimensions + auto dim_of = + info.add_instruction(migraphx::make_op("dimensions_of", {{"end", 2}}), args[0]); + + // The next two operations insert the value sample_size into the second array position + + // make an argument of (1, 0) + shape s(shape::int64_type, {2}); + std::vector data1{1, 0}; + auto l1 = info.add_literal(s, data1); + auto batch_arg = info.add_instruction(migraphx::make_op("mul"), dim_of, l1); + std::vector data2(2, 0); + // make an argument of (0, sample_size) + data2[1] = sample_size; + auto l2 = info.add_literal(s, data2); + auto alloc_shape = info.add_instruction(migraphx::make_op("add"), batch_arg, l2); + // alloc_shape should contain the input-based shape dimensions as its values at runtime, + // and its own shape is {2} + + // compile_shape is the shape used when compiling the Allocate op, and may be dynamic + migraphx::shape compile_shape = + migraphx::shape(s0.type(), {s0.dyn_dims().front(), {sample_size, sample_size}}); - std::uniform_real_distribution<> dis(0.0, 1.0); - size_t batch_size = args[0]->get_shape().lens().front(); - migraphx::shape dist_shape{migraphx::shape::float_type, {batch_size, sample_size}}; + // Allocate on-device storage for the random values + auto alloc = info.add_instruction( + migraphx::make_op("allocate", {{"shape", to_value(compile_shape)}}), alloc_shape); + randoms = info.add_instruction(migraphx::make_op("random_uniform"), seed_input, alloc); + } + else + { + // use literal. The array populated by random_uniform may have any shape, as long its + // number of elements is batch_size * sample_size . + size_t batch_size = s0.lens().front(); + auto rand_dummy = info.add_literal( + migraphx::literal{migraphx::shape::float_type, {batch_size * sample_size}}); - std::vector random_dist(batch_size * sample_size); - std::generate(random_dist.begin(), random_dist.end(), [&]() { return dis(gen); }); - auto dist_lit = info.add_literal(migraphx::literal{dist_shape, random_dist}); + randoms = + info.add_instruction(migraphx::make_op("random_uniform"), seed_input, rand_dummy); + } return info.add_instruction( - migraphx::make_op("multinomial", {{"dtype", output_type}}), cdf, dist_lit); + migraphx::make_op("multinomial", {{"dtype", output_type}}), cdf, randoms); } }; diff --git a/test/onnx/gen_onnx.py b/test/onnx/gen_onnx.py index eeb1229bffd..16a696e92e6 100644 --- a/test/onnx/gen_onnx.py +++ b/test/onnx/gen_onnx.py @@ -4883,9 +4883,9 @@ def mod_test_fmod_different_dtypes(): @onnx_test() def multinomial_test(): - sample_size = 10 - seed = 0.0 - input = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 10]) + sample_size = 13 + seed = 0. + input = helper.make_tensor_value_info("input", TensorProto.FLOAT, [3, 10]) output = helper.make_tensor_value_info("output", TensorProto.INT32, [1, 10]) @@ -4898,6 +4898,44 @@ def multinomial_test(): return ([node], [input], [output]) +@onnx_test() +def multinomial_dyn_test(): + sample_size = 100000 + seed = 1.3 + categories = 5 + input = helper.make_tensor_value_info("input", TensorProto.FLOAT, + [None, categories]) + output = helper.make_tensor_value_info("output", TensorProto.FLOAT, + [None, categories]) + + node = onnx.helper.make_node( + 'Multinomial', + inputs=['input'], + sample_size=sample_size, + dtype=1, # shape::float_type + seed=seed, + outputs=['output']) + + return ([node], [input], [output]) + + +@onnx_test() +def multinomial_autoseed_dyn_test(): + # If seed attribute is not given, device should auto generate one at runtime + sample_size = 12 + input = helper.make_tensor_value_info("input", TensorProto.FLOAT, + [None, 10]) + output = helper.make_tensor_value_info("output", TensorProto.INT32, + [None, 10]) + + node = onnx.helper.make_node('Multinomial', + inputs=['input'], + sample_size=sample_size, + outputs=['output']) + + return ([node], [input], [output]) + + @onnx_test() def multinomial_generated_seed_test(): sample_size = 10 diff --git a/test/onnx/multinomial_autoseed_dyn_test.onnx b/test/onnx/multinomial_autoseed_dyn_test.onnx new file mode 100644 index 00000000000..411ecf664e8 Binary files /dev/null and b/test/onnx/multinomial_autoseed_dyn_test.onnx differ diff --git a/test/onnx/multinomial_dyn_test.onnx b/test/onnx/multinomial_dyn_test.onnx new file mode 100644 index 00000000000..71540f6eba9 Binary files /dev/null and b/test/onnx/multinomial_dyn_test.onnx differ diff --git a/test/onnx/multinomial_int64_test.onnx b/test/onnx/multinomial_int64_test.onnx index f4c4114a109..87c847fb721 100644 Binary files a/test/onnx/multinomial_int64_test.onnx and b/test/onnx/multinomial_int64_test.onnx differ diff --git a/test/onnx/multinomial_test.onnx b/test/onnx/multinomial_test.onnx index e2a40ac7f0b..414b81f6d04 100644 Binary files a/test/onnx/multinomial_test.onnx and b/test/onnx/multinomial_test.onnx differ diff --git a/test/onnx/onnx_test.cpp b/test/onnx/onnx_test.cpp index ee6c29c0a34..06efbe4b703 100644 --- a/test/onnx/onnx_test.cpp +++ b/test/onnx/onnx_test.cpp @@ -4679,32 +4679,140 @@ TEST_CASE(multinomial_test) { migraphx::program p; auto* mm = p.get_main_module(); - size_t sample_size = 10; - float seed = 0.0f; + size_t sample_size = 13; + size_t batch_size = 3; + size_t categories = 10; + float seed = 0; - auto input = mm->add_parameter("input", migraphx::shape{migraphx::shape::float_type, {1, 10}}); - auto maxes = mm->add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), input); - auto mb_maxes = - mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {1, 10}}}), maxes); + auto input = mm->add_parameter( + "input", migraphx::shape{migraphx::shape::float_type, {batch_size, categories}}); + auto maxes = mm->add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), input); + auto mb_maxes = mm->add_instruction( + migraphx::make_op("multibroadcast", {{"out_lens", {batch_size, 10}}}), maxes); auto cdf = mm->add_instruction(migraphx::make_op("sub"), input, mb_maxes); cdf = mm->add_instruction(migraphx::make_op("exp"), cdf); cdf = mm->add_instruction( migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), cdf); - std::mt19937 gen(seed); - std::uniform_real_distribution<> dis(0.0, 1.0); - std::vector rand_samples(sample_size); - std::generate(rand_samples.begin(), rand_samples.end(), [&]() { return dis(gen); }); - migraphx::shape rs{migraphx::shape::float_type, {1, sample_size}}; - auto rs_lit = mm->add_literal(migraphx::literal{rs, rand_samples}); - - mm->add_instruction(migraphx::make_op("multinomial"), cdf, rs_lit); + migraphx::shape s{migraphx::shape::float_type, {1}}; + std::vector seed_data = {seed}; + auto seed_input = mm->add_literal(migraphx::literal(s, seed_data)); + auto rand_dummy = + mm->add_literal(migraphx::literal{migraphx::shape::float_type, {batch_size * sample_size}}); + auto randoms = mm->add_instruction(migraphx::make_op("random_uniform"), seed_input, rand_dummy); + mm->add_instruction(migraphx::make_op("multinomial"), cdf, randoms); auto prog = optimize_onnx("multinomial_test.onnx"); EXPECT(p == prog); } +TEST_CASE(multinomial_dyn_test) +{ + // compile-time random seed + migraphx::program p; + auto* mm = p.get_main_module(); + size_t sample_size = 100000; + size_t categories = 5; + float seed = 1.3f; + + auto input = mm->add_parameter( + "input", + migraphx::shape{migraphx::shape::float_type, {{1, categories}, {categories, categories}}}); + + auto maxes = mm->add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), input); + + auto cdf = add_common_op(*mm, migraphx::make_op("sub"), {input, maxes}); + cdf = mm->add_instruction(migraphx::make_op("exp"), cdf); + cdf = mm->add_instruction( + migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), cdf); + + migraphx::shape s{migraphx::shape::float_type, {1}}; + std::vector seed_data = {seed}; + auto seed_input = mm->add_literal(migraphx::literal(s, seed_data)); + + // dynamic input only: must calculate alloc_shape as (batch_size, sample_size) + // read the runtime input dimensions + auto dim_of = mm->add_instruction(migraphx::make_op("dimensions_of", {{"end", 2}}), input); + // make an argument of (1, 0) + migraphx::shape lit_shape(migraphx::shape::int64_type, {2}); + std::vector data1{1, 0}; + auto l1 = mm->add_literal(lit_shape, data1); + auto batch_arg = mm->add_instruction(migraphx::make_op("mul"), dim_of, l1); + std::vector data2(2, 0); + // make an argument of (0, sample_size) + data2[1] = sample_size; + auto l2 = mm->add_literal(lit_shape, data2); + auto alloc_shape = mm->add_instruction(migraphx::make_op("add"), batch_arg, l2); + migraphx::shape compile_shape = + migraphx::shape(migraphx::shape::float_type, + {input->get_shape().dyn_dims().front(), {sample_size, sample_size}}); + + auto alloc = mm->add_instruction( + migraphx::make_op("allocate", {{"shape", to_value(compile_shape)}}), alloc_shape); + + auto randoms = mm->add_instruction(migraphx::make_op("random_uniform"), seed_input, alloc); + auto ret = mm->add_instruction( + migraphx::make_op("multinomial", {{"dtype", migraphx::shape::float_type}}), cdf, randoms); + mm->add_return({ret}); + + migraphx::onnx_options options; + options.default_dyn_dim_value = {1, categories}; + options.print_program_on_error = true; + auto prog = migraphx::parse_onnx("multinomial_dyn_test.onnx", options); + EXPECT(p == prog); +} + +TEST_CASE(multinomial_autoseed_dyn_test) +{ + // runtime random seed + migraphx::program p; + auto* mm = p.get_main_module(); + size_t sample_size = 12; + size_t categories = 10; + + auto input = mm->add_parameter( + "input", migraphx::shape{migraphx::shape::float_type, {{1, 10}, {10, 10}}}); + + auto maxes = mm->add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), input); + + auto cdf = add_common_op(*mm, migraphx::make_op("sub"), {input, maxes}); + cdf = mm->add_instruction(migraphx::make_op("exp"), cdf); + cdf = mm->add_instruction( + migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), cdf); + auto seed_input = mm->add_instruction(migraphx::make_op("random_seed")); + + // dynamic input only: must calculate alloc_shape as (batch_size, sample_size) + // read the runtime input dimensions + auto dim_of = mm->add_instruction(migraphx::make_op("dimensions_of", {{"end", 2}}), input); + // make an argument of (1, 0) + migraphx::shape lit_shape(migraphx::shape::int64_type, {2}); + std::vector data1{1, 0}; + auto l1 = mm->add_literal(lit_shape, data1); + auto batch_arg = mm->add_instruction(migraphx::make_op("mul"), dim_of, l1); + std::vector data2(2, 0); + // make an argument of (0, sample_size) + data2[1] = sample_size; + auto l2 = mm->add_literal(lit_shape, data2); + auto alloc_shape = mm->add_instruction(migraphx::make_op("add"), batch_arg, l2); + migraphx::shape compile_shape = + migraphx::shape(migraphx::shape::float_type, + {input->get_shape().dyn_dims().front(), {sample_size, sample_size}}); + + auto alloc = mm->add_instruction( + migraphx::make_op("allocate", {{"shape", to_value(compile_shape)}}), alloc_shape); + + auto randoms = mm->add_instruction(migraphx::make_op("random_uniform"), seed_input, alloc); + auto ret = mm->add_instruction(migraphx::make_op("multinomial"), cdf, randoms); + mm->add_return({ret}); + + migraphx::onnx_options options; + options.default_dyn_dim_value = {1, categories}; + options.print_program_on_error = true; + auto prog = migraphx::parse_onnx("multinomial_autoseed_dyn_test.onnx", options); + EXPECT(p == prog); +} + TEST_CASE(multinomial_dtype_error_test) { EXPECT(test::throws([&] { migraphx::parse_onnx("multinomial_dtype_error_test.onnx"); })); @@ -4712,10 +4820,11 @@ TEST_CASE(multinomial_dtype_error_test) TEST_CASE(multinomial_generated_seed_test) { + // multinomial op. no longer generates its own randoms auto p1 = optimize_onnx("multinomial_generated_seed_test.onnx"); auto p2 = optimize_onnx("multinomial_generated_seed_test.onnx"); - EXPECT(p1 != p2); + EXPECT(p1 == p2); } TEST_CASE(multinomial_int64_test) @@ -4723,27 +4832,27 @@ TEST_CASE(multinomial_int64_test) migraphx::program p; auto* mm = p.get_main_module(); size_t sample_size = 10; - float seed = 1.0f; + float seed = 1.0; + uint32_t batch_size = 1; migraphx::shape::type_t dtype = migraphx::shape::type_t::int64_type; auto input = mm->add_parameter("input", migraphx::shape{migraphx::shape::float_type, {1, 10}}); auto maxes = mm->add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), input); - auto mb_maxes = - mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {1, 10}}}), maxes); - auto cdf = mm->add_instruction(migraphx::make_op("sub"), input, mb_maxes); + + auto cdf = add_common_op(*mm, migraphx::make_op("sub"), {input, maxes}); cdf = mm->add_instruction(migraphx::make_op("exp"), cdf); cdf = mm->add_instruction( migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), cdf); - std::mt19937 gen(seed); - std::uniform_real_distribution<> dis(0.0, 1.0); - std::vector rand_samples(sample_size); - std::generate(rand_samples.begin(), rand_samples.end(), [&]() { return dis(gen); }); - migraphx::shape rs{migraphx::shape::float_type, {1, sample_size}}; - auto rs_lit = mm->add_literal(migraphx::literal{rs, rand_samples}); - - mm->add_instruction(migraphx::make_op("multinomial", {{"dtype", dtype}}), cdf, rs_lit); + migraphx::shape s{migraphx::shape::float_type, {1}}; + std::vector data = {seed}; + auto seed_input = mm->add_literal(migraphx::literal(s, data)); + // static size + auto rand_dummy = + mm->add_literal(migraphx::literal{migraphx::shape::float_type, {batch_size * sample_size}}); + auto randoms = mm->add_instruction(migraphx::make_op("random_uniform"), seed_input, rand_dummy); + mm->add_instruction(migraphx::make_op("multinomial", {{"dtype", dtype}}), cdf, randoms); auto prog = optimize_onnx("multinomial_int64_test.onnx"); EXPECT(p == prog); diff --git a/test/onnx/verify_onnx.cpp b/test/onnx/verify_onnx.cpp index fd642ac2e53..c866c44c40b 100644 --- a/test/onnx/verify_onnx.cpp +++ b/test/onnx/verify_onnx.cpp @@ -1434,6 +1434,77 @@ TEST_CASE(mod_test_fmod_different_types) EXPECT(migraphx::verify::verify_rms_range(result_vector, gold)); } +TEST_CASE(multinomial_dyn_test) +{ + migraphx::onnx_options options; + options.default_dyn_dim_value = {1, 4}; + auto p = migraphx::parse_onnx("multinomial_dyn_test.onnx", options); + const size_t batch_size(2); + const size_t categories(5); + const size_t sample_size(100000); + p.compile(migraphx::make_target("ref")); + + // Distribution function (2 distributions of 5 categories each) + std::vector dist{15, 25, 15, 25, 20, 20, 20, 10, 25, 25}; + EXPECT(dist.size() == categories * batch_size); + std::vector data(categories * batch_size); + + std::transform(dist.begin(), dist.end(), data.begin(), [&](auto d) { return log(d); }); + // Shape of the probability distribution, which also defines the number of categories + migraphx::shape s{migraphx::shape::float_type, {batch_size, categories}}; + + migraphx::parameter_map pp; + pp["input"] = migraphx::argument(s, data.data()); + + auto result = p.eval(pp).back(); + + std::vector result_vec(batch_size * sample_size); + result.visit([&](auto output) { result_vec.assign(output.begin(), output.end()); }); + + // Make a categorical histogram of output + // for first result in batch + std::vector res_dist(categories, 0); + size_t r = 0; + for(r = 0; r < result_vec.size() / 2; r++) + res_dist[result_vec[r]]++; + + // normalizing factors for original and measured distributions + auto dist_sum = std::accumulate(dist.begin(), dist.begin() + 5, 0); + auto res_dist_sum = std::accumulate(res_dist.begin(), res_dist.end(), 0); + + // Values approximate the distribution in dist + std::vector norm(5); + std::vector res_norm(5); + + std::transform(dist.begin(), dist.begin() + 5, norm.begin(), [&](auto n) { + return static_cast(n) / dist_sum; + }); + std::transform(res_dist.begin(), res_dist.end(), res_norm.begin(), [&](auto n) { + return static_cast(n) / res_dist_sum; + }); + + EXPECT(migraphx::verify::verify_range_with_tolerance( + norm, migraphx::verify::expected{res_norm}, migraphx::verify::tolerance{0.01})); + + // Make a categorical histogram of output + // for second result in batch + std::fill(res_dist.begin(), res_dist.end(), 0); + for(; r < result_vec.size(); r++) + res_dist[result_vec[r]]++; + + dist_sum = std::accumulate(dist.begin() + 5, dist.end(), 0); + res_dist_sum = std::accumulate(res_dist.begin(), res_dist.end(), 0); + std::transform(dist.begin() + 5, dist.end(), norm.begin(), [&](auto n) { + return static_cast(n) / dist_sum; + }); + std::transform(res_dist.begin(), res_dist.end(), res_norm.begin(), [&](auto n) { + return static_cast(n) / res_dist_sum; + }); + + EXPECT(migraphx::verify::verify_range_with_tolerance( + res_norm, migraphx::verify::expected{norm}, migraphx::verify::tolerance{0.01})); +} + TEST_CASE(nonzero_test) { migraphx::program p = migraphx::parse_onnx("nonzero_dynamic_test.onnx"); diff --git a/test/op_shape_test.cpp b/test/op_shape_test.cpp index 68dc3db2d14..709cf2a7ab8 100644 --- a/test/op_shape_test.cpp +++ b/test/op_shape_test.cpp @@ -1957,12 +1957,42 @@ TEST_CASE(multibroadcast_3in_dyn_dyn) expect_shape(expected_shape, migraphx::make_op("multibroadcast"), c_shape, a_shape, b_shape); } -TEST_CASE(multinomial) +TEST_CASE(multinomial_bool_type) { - migraphx::shape s{migraphx::shape::float_type, {2, 5}}; + migraphx::shape s1{migraphx::shape::float_type, {1, 2}}; + migraphx::shape s2{migraphx::shape::float_type, {3, 4}}; int dtype = 0; - throws_shape(migraphx::make_op("multinomial", {{"dtype", dtype}}), s, s); + throws_shape(migraphx::make_op("multinomial", {{"dtype", dtype}}), s1, s2); +} + +TEST_CASE(multinomial) +{ + migraphx::shape s1{migraphx::shape::float_type, {1, 2}}; + migraphx::shape s2{migraphx::shape::float_type, {3, 4}}; + migraphx::shape s3{migraphx::shape::float_type, {1, 4}}; + int dtype = 2; + + expect_shape(s3, migraphx::make_op("multinomial", {{"dtype", dtype}}), s1, s2); +} + +TEST_CASE(multinomial_0size_input) +{ + migraphx::shape s1{migraphx::shape::float_type, {1, 2}}; + migraphx::shape s2{migraphx::shape::float_type, {}}; + int dtype = 2; + + throws_shape(migraphx::make_op("multinomial", {{"dtype", dtype}}), s1, s2); +} + +TEST_CASE(multinomial_dyn) +{ + migraphx::shape s1{migraphx::shape::int32_type, {{2, 3}, {5, 6}}}; + migraphx::shape s2{migraphx::shape::int32_type, {{7, 8}, {9, 10}}}; + migraphx::shape s3{migraphx::shape::int32_type, {{2, 3}, {9, 10}}}; + + expect_shape( + s3, migraphx::make_op("multinomial", {{"dtype", migraphx::shape::int32_type}}), s1, s2); } TEST_CASE(nms_shape) diff --git a/test/ref/multinomial.cpp b/test/ref/multinomial.cpp index b280a1ac17e..96ab12e388a 100644 --- a/test/ref/multinomial.cpp +++ b/test/ref/multinomial.cpp @@ -24,9 +24,10 @@ #include #include #include -#include +#include #include #include +#include #include #include @@ -48,27 +49,37 @@ TEST_CASE(multinomial_test) migraphx::shape s{migraphx::shape::float_type, {1, 5}}; std::vector dist{15, 25, 15, 25, 20}; std::vector data(5); - std::transform(dist.begin(), dist.end(), data.begin(), [&](auto d) { return std::log(d); }); - auto input = mm->add_literal(migraphx::literal(s, data)); + std::vector sum(5); + // convert to float + std::transform(dist.begin(), dist.end(), data.begin(), [&](auto d) { return d; }); + // take cumulative sum + std::partial_sum(data.begin(), data.end(), sum.begin(), std::plus()); + // scale probabilities arbitrarily + float odd_scale = 10000.; + std::transform(sum.begin(), sum.end(), data.begin(), [&](auto d) { return d * odd_scale; }); - auto maxes = mm->add_instruction(migraphx::make_op("reduce_max", {{"axes", {1}}}), input); - auto mb_maxes = - mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {1, 5}}}), maxes); - auto cdf = mm->add_instruction(migraphx::make_op("sub"), input, mb_maxes); - cdf = mm->add_instruction(migraphx::make_op("exp"), cdf); - cdf = mm->add_instruction( - migraphx::make_op("prefix_scan_sum", {{"axis", 1}, {"exclusive", false}}), cdf); + auto input = mm->add_literal(migraphx::literal(s, data)); - mm->add_instruction(migraphx::make_op("multinomial"), cdf, rs_lit); + mm->add_instruction(migraphx::make_op("multinomial"), input, rs_lit); p.compile(migraphx::make_target("ref")); auto result = p.eval({}).back(); + // result_vec contains an index, or category label, for each random input value std::vector result_vec(sample_size); result.visit([&](auto output) { result_vec.assign(output.begin(), output.end()); }); + // res_dist is a count, or histogram, of the number of samples in each category. This is the + // sampled distribution. std::vector res_dist(5, 0); for(const auto& r : result_vec) res_dist[r]++; - auto dist_sum = std::accumulate(dist.begin(), dist.end(), 0); + + // To check the result, normalize the original probability distribution dist + // and the sampling result res_dist; they should be close + + // Total the unnormalized probabilities + auto dist_sum = std::accumulate(dist.begin(), dist.end(), 0); + + // Total the number of values returned auto res_dist_sum = std::accumulate(res_dist.begin(), res_dist.end(), 0); std::vector norm(5); std::vector res_norm(5); @@ -78,6 +89,204 @@ TEST_CASE(multinomial_test) std::transform(res_dist.begin(), res_dist.end(), res_norm.begin(), [&](auto n) { return static_cast(n) / res_dist_sum; }); + + EXPECT(migraphx::verify::verify_range_with_tolerance( + res_norm, migraphx::verify::expected{norm}, migraphx::verify::tolerance{0.01})); +} + +TEST_CASE(multinomial_dyn_test) +{ + // Invokes random_uniform and multinomial ops together, to verify the interface + // Dynamic Batch dimension input of 2 means there are 2 different probability + // distribution functions contained in Input_2 + migraphx::program p; + auto* mm = p.get_main_module(); + + size_t sample_size = 100000; + size_t batch_size = 2; + + // Shape of the random data + migraphx::shape rs{migraphx::shape::float_type, {{1, 2}, {2, sample_size + 1}}}; + auto input = mm->add_parameter("Input_1", rs); + + // Runtime randomization seed + // To seed the random_uniform, we can provide a value by literal or input, + // or ask the system to auto-seed with random_seed op. + migraphx::shape seed_shape{migraphx::shape::uint32_type, + {migraphx::shape::dynamic_dimension{0, 1}}}; + auto seed_input = mm->add_parameter("Seed", seed_shape); + + // Shape of the probability distribution, which also defines the number of categories + migraphx::shape s{migraphx::shape::float_type, {{2, 2}, {5, 6}}}; + + // Unnormalized distributions for batch size 2: + // 15, 25, 15, 15, 20 + // 20, 20, 10, 25, 25 + std::vector dist{15, 25, 15, 25, 20, 20, 20, 10, 25, 25}; + // Hard-coded non-normalized, accumulated distribution follows: + std::vector data{.15f, .40f, .55f, .80f, 1.0f, 20.f, 40.f, 50.f, 75.f, 100.f}; + + auto input2 = mm->add_parameter("Input_2", s); + + auto randoms = mm->add_instruction(migraphx::make_op("random_uniform"), seed_input, input); + mm->add_instruction(migraphx::make_op("multinomial"), input2, randoms); + + p.compile(migraphx::make_target("ref")); + + // Create a dummy input in the shape we want for the random data + std::vector dummy(sample_size, 0); + migraphx::shape input_fixed_shape1{migraphx::shape::float_type, {batch_size, sample_size}}; + migraphx::shape input_fixed_shape2{migraphx::shape::float_type, {batch_size, 5}}; + migraphx::parameter_map params0; + params0["Input_1"] = migraphx::argument(input_fixed_shape1, dummy.data()); + + migraphx::shape seed_fixed_shape{migraphx::shape::uint32_type, {1}}; + std::vector seed_data = {4}; + params0["Seed"] = migraphx::argument(seed_fixed_shape, seed_data.data()); + + params0["Input_2"] = migraphx::argument(input_fixed_shape2, data.data()); + auto result = p.eval(params0).back(); + + std::vector result_vec(input_fixed_shape2.elements()); + result.visit([&](auto output) { result_vec.assign(output.begin(), output.end()); }); + + // Make a categorical histogram of output + std::vector res_dist(5, 0); + size_t r = 0; + for(r = 0; r < result_vec.size() / 2; r++) + res_dist[result_vec[r]]++; + + // histogram for second set of batch + std::vector res_dist2(5, 0); + for(; r < result_vec.size(); r++) + res_dist2[result_vec[r]]++; + + // Rescale or normalize both the input probability distribution and the output + // histogram, and compare. Should be close but not identical. + auto dist_sum = std::accumulate(dist.begin(), dist.begin() + 5, 0); + auto res_dist_sum = std::accumulate(res_dist.begin(), res_dist.end(), 0); + std::vector norm(5); + std::vector res_norm(5); + + std::transform(dist.begin(), dist.begin() + 5, norm.begin(), [&](auto n) { + return static_cast(n) / dist_sum; + }); + std::transform(res_dist.begin(), res_dist.end(), res_norm.begin(), [&](auto n) { + return static_cast(n) / res_dist_sum; + }); + + EXPECT(migraphx::verify::verify_range_with_tolerance( + res_norm, migraphx::verify::expected{norm}, migraphx::verify::tolerance{0.01})); + + // Do the same rescaling for the 2nd in batch, which has a different probability distribution + dist_sum = std::accumulate(dist.begin() + 5, dist.end(), 0); + res_dist_sum = std::accumulate(res_dist2.begin(), res_dist2.end(), 0); + std::transform(dist.begin() + 5, dist.end(), norm.begin(), [&](auto n) { + return static_cast(n) / dist_sum; + }); + std::transform(res_dist2.begin(), res_dist2.end(), res_norm.begin(), [&](auto n) { + return static_cast(n) / res_dist_sum; + }); + + EXPECT(migraphx::verify::verify_range_with_tolerance( + res_norm, migraphx::verify::expected{norm}, migraphx::verify::tolerance{0.01})); +} + +TEST_CASE(multinomial_float_dyn_test) +{ + // int data type for random_uniform op and float data type for multinomial. + + migraphx::program p; + auto* mm = p.get_main_module(); + + size_t sample_size = 100000; + size_t batch_size = 2; + + // Shape of the random data + migraphx::shape rs{migraphx::shape::int32_type, {{1, 2}, {2, sample_size + 1}}}; + auto input = mm->add_parameter("Input_1", rs); + + // Runtime randomization seed + // To seed the random_uniform, we can provide a value by literal or input, + // or ask the system to auto-seed with random_seed op. + migraphx::shape seed_shape{migraphx::shape::uint32_type, + {migraphx::shape::dynamic_dimension{0, 1}}}; + auto seed_input = mm->add_parameter("Seed", seed_shape); + + // Shape of the probability distribution, which also defines the number of categories + migraphx::shape s{migraphx::shape::float_type, {{2, 2}, {5, 6}}}; + + // Unnormalized distributions for batch size 2: + // 15, 25, 15, 15, 20 + // 20, 20, 10, 25, 25 + std::vector dist{15, 25, 15, 25, 20, 20, 20, 10, 25, 25}; + // Hard-coded normalized, accumulated distribution follows: + std::vector data{.15f, .40f, .55f, .80f, 1.0f, .20f, .40f, .50f, .75f, 1.0f}; + + auto input2 = mm->add_parameter("Input_2", s); + + auto randoms = mm->add_instruction(migraphx::make_op("random_uniform"), seed_input, input); + mm->add_instruction(migraphx::make_op("multinomial", {{"dtype", migraphx::shape::float_type}}), + input2, + randoms); + + p.compile(migraphx::make_target("ref")); + + // Create a dummy input in the shape we want for the random data + std::vector dummy(sample_size, 0); + migraphx::shape input_fixed_shape1{migraphx::shape::float_type, {batch_size, sample_size}}; + migraphx::shape input_fixed_shape2{migraphx::shape::float_type, {batch_size, 5}}; + migraphx::parameter_map params0; + params0["Input_1"] = migraphx::argument(input_fixed_shape1, dummy.data()); + + migraphx::shape seed_fixed_shape{migraphx::shape::uint32_type, {1}}; + std::vector seed_data = {4}; + params0["Seed"] = migraphx::argument(seed_fixed_shape, seed_data.data()); + + params0["Input_2"] = migraphx::argument(input_fixed_shape2, data.data()); + auto result = p.eval(params0).back(); + + std::vector result_vec(input_fixed_shape2.elements()); + result.visit([&](auto output) { result_vec.assign(output.begin(), output.end()); }); + + // Make a categorical histogram of output + std::vector res_dist(5, 0); + size_t r = 0; + for(r = 0; r < result_vec.size() / 2; r++) + res_dist[result_vec[r]]++; + + // histogram for second set of batch + std::vector res_dist2(5, 0); + for(; r < result_vec.size(); r++) + res_dist2[result_vec[r]]++; + + // Rescale or normalize both the input probability distribution and the output + // histogram, and compare. Should be close but not identical. + auto dist_sum = std::accumulate(dist.begin(), dist.begin() + 5, 0); + auto res_dist_sum = std::accumulate(res_dist.begin(), res_dist.end(), 0); + std::vector norm(5); + std::vector res_norm(5); + + std::transform(dist.begin(), dist.begin() + 5, norm.begin(), [&](auto n) { + return static_cast(n) / dist_sum; + }); + std::transform(res_dist.begin(), res_dist.end(), res_norm.begin(), [&](auto n) { + return static_cast(n) / res_dist_sum; + }); + + EXPECT(migraphx::verify::verify_range_with_tolerance( + res_norm, migraphx::verify::expected{norm}, migraphx::verify::tolerance{0.01})); + + // Do the same rescaling for the 2nd in batch, which has a different probability distribution + dist_sum = std::accumulate(dist.begin() + 5, dist.end(), 0); + res_dist_sum = std::accumulate(res_dist2.begin(), res_dist2.end(), 0); + std::transform(dist.begin() + 5, dist.end(), norm.begin(), [&](auto n) { + return static_cast(n) / dist_sum; + }); + std::transform(res_dist2.begin(), res_dist2.end(), res_norm.begin(), [&](auto n) { + return static_cast(n) / res_dist_sum; + }); + EXPECT(migraphx::verify::verify_range_with_tolerance( res_norm, migraphx::verify::expected{norm}, migraphx::verify::tolerance{0.01})); }