Skip to content

Commit

Permalink
be ut
Browse files Browse the repository at this point in the history
  • Loading branch information
Mryange committed Dec 25, 2024
1 parent c593088 commit 8861fd6
Show file tree
Hide file tree
Showing 6 changed files with 467 additions and 23 deletions.
6 changes: 0 additions & 6 deletions be/src/vec/functions/function_cast.h
Original file line number Diff line number Diff line change
Expand Up @@ -472,12 +472,6 @@ struct ConvertImpl {
}
return Status::OK();
}

private:
static void map_ipv4_to_ipv6(IPv4 ipv4, UInt8* buf) {
unaligned_store<UInt64>(buf, 0x0000FFFF00000000ULL | static_cast<UInt64>(ipv4));
unaligned_store<UInt64>(buf + 8, 0);
}
};

/** If types are identical, just take reference to column.
Expand Down
6 changes: 0 additions & 6 deletions be/src/vec/functions/function_ip.h
Original file line number Diff line number Diff line change
Expand Up @@ -1228,12 +1228,6 @@ class FunctionIPv4ToIPv6 : public IFunction {
block.replace_by_position(result, std::move(col_res));
return Status::OK();
}

private:
static void map_ipv4_to_ipv6(IPv4 ipv4, UInt8* buf) {
unaligned_store<UInt64>(buf, 0x0000FFFF00000000ULL | static_cast<UInt64>(ipv4));
unaligned_store<UInt64>(buf + 8, 0);
}
};

class FunctionCutIPv6 : public IFunction {
Expand Down
11 changes: 0 additions & 11 deletions be/src/vec/functions/ip_address_dictionary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,6 @@

namespace doris::vectorized {

void map_ipv4_to_ipv6(IPv4 ipv4, UInt8* buf) {
unaligned_store<UInt64>(buf, 0x0000FFFF00000000ULL | static_cast<UInt64>(ipv4));
unaligned_store<UInt64>(buf + 8, 0);
}

IPv6 ipv4_to_ipv6(IPv4 ipv4) {
IPv6 ipv6;
map_ipv4_to_ipv6(ipv4, reinterpret_cast<UInt8*>(&ipv6));
return ipv6;
}

ColumnPtr IPAddressDictionary::getColumn(const std::string& attribute_name,
const DataTypePtr& attribute_type,
const ColumnPtr& key_column,
Expand Down
13 changes: 13 additions & 0 deletions be/src/vec/runtime/ipv4_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,19 @@ class IPv4Value {
IPv4 _value;
};

namespace vectorized {
void inline map_ipv4_to_ipv6(IPv4 ipv4, UInt8* buf) {
unaligned_store<UInt64>(buf, 0x0000FFFF00000000ULL | static_cast<UInt64>(ipv4));
unaligned_store<UInt64>(buf + 8, 0);
}

IPv6 inline ipv4_to_ipv6(IPv4 ipv4) {
IPv6 ipv6;
map_ipv4_to_ipv6(ipv4, reinterpret_cast<UInt8*>(&ipv6));
return ipv6;
}
} // namespace vectorized

} // namespace doris

#include "common/compile_check_end.h"
241 changes: 241 additions & 0 deletions be/test/vec/function/function_ip_dict_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "function_ip_dict_test.h"

#include <memory>
#include <random>
#include <sstream>
#include <type_traits>
#include <vector>

#include "function_test_util.h"
#include "vec/columns/column_string.h"
#include "vec/columns/columns_number.h"
#include "vec/common/assert_cast.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type_ipv4.h"
#include "vec/data_types/data_type_number.h"
#include "vec/functions/ip_address_dictionary.h"

namespace doris::vectorized {

template <typename IPType, bool output>
void test_for_ip_type(std::vector<std::string> ips, std::vector<std::string> ip_string) {
static_assert(std::is_same_v<IPType, DataTypeIPv4> || std::is_same_v<IPType, DataTypeIPv6>,
"IPType must be either DataTypeIPv4 or DataTypeIPv6");
std::cout << "input data size\t" << ips.size() << "\t" << ip_string.size() << "\n";
auto input_key_column = DataTypeString::ColumnType::create();
auto intput_key_data = std::make_shared<DataTypeString>();

auto value_column = DataTypeInt64::ColumnType::create();
auto value_type = std::make_shared<DataTypeInt64>();

for (int i = 0; i < ips.size(); i++) {
input_key_column->insert_value(ips[i]);
value_column->insert_value(i);
}

auto mock_ip_dict = create_mock_ip_trie_dict_from_column(
"mock ip dict", ColumnWithTypeAndName {input_key_column->clone(), intput_key_data, ""},
ColumnsWithTypeAndName {
ColumnWithTypeAndName {value_column->clone(), value_type, "row"},
});
auto ip_dict = create_ip_trie_dict_from_column(
"ip dict", ColumnWithTypeAndName {input_key_column->clone(), intput_key_data, ""},
ColumnsWithTypeAndName {
ColumnWithTypeAndName {value_column->clone(), value_type, "row"},
});

std::string attribute_name = "row";
DataTypePtr attribute_type = value_type;

{
auto key_type = std::make_shared<IPType>();
auto ipv_column = IPType::ColumnType::create();
for (const auto& ip : ip_string) {
if constexpr (std::is_same_v<IPType, DataTypeIPv4>) {
IPv4 ipv4;
EXPECT_TRUE(IPv4Value::from_string(ipv4, ip));
ipv_column->insert_value(ipv4);
} else {
IPv6 ipv6;
EXPECT_TRUE(IPv6Value::from_string(ipv6, ip));
ipv_column->insert_value(ipv6);
}
}

ColumnPtr key_column = ipv_column->clone();
auto mock_result =
mock_ip_dict->getColumn(attribute_name, attribute_type, key_column, key_type);
auto result = ip_dict->getColumn(attribute_name, attribute_type, key_column, key_type);

const auto* real_mock_result = assert_cast<const ColumnInt64*>(mock_result.get());
const auto* real_result = assert_cast<const ColumnInt64*>(result.get());
for (int i = 0; i < ip_string.size(); i++) {
if constexpr (output) {
std::cout << ip_string[i] << "\t" << ips[real_mock_result->get_element(i)] << "\t"
<< ips[real_result->get_element(i)] << "\n";
}
EXPECT_EQ(ips[real_mock_result->get_element(i)], ips[real_result->get_element(i)]);
}
}
}

TEST(IpDictTest, TestIpv4) {
std::vector<std::string> ips = {
"192.168.0.0/16", "192.168.1.0/24", "192.168.1.128/25", "1:288:2080::/41",
"10.0.0.0/8", "10.1.0.0/16", "172.16.0.0/12", "172.16.1.0/24",
"172.16.1.128/25", "203.0.113.0/24", "198.51.100.0/24", "2001:db8::/32",
"2001:db8:abcd::/48", "fc00::/7", "fe80::/10", "192.0.2.0/24",
"198.18.0.0/15", "198.51.100.0/24", "203.0.113.0/24", "2400:cb00::/32",
"192.168.2.0/24", "192.168.3.0/24", "10.2.0.0/16", "172.17.0.0/16",
"172.18.0.0/16", "203.0.114.0/24", "198.51.101.0/24", "2001:db8:abcd:1::/64"};

std::vector<std::string> ipv4_string = {
"192.168.1.1", "10.1.1.1", "172.16.1.1", "198.51.100.1", "203.0.113.1",
"192.0.2.1", "198.18.1.1", "203.0.113.2", "198.51.100.2", "192.168.0.1",
"10.0.0.1", "172.16.0.1", "192.168.1.129", "10.1.0.1",
"192.168.2.1", "192.168.3.1", "10.2.0.1", "172.17.0.1", "172.18.0.1", "203.0.114.1", "198.51.101.1"};
test_for_ip_type<DataTypeIPv4, true>(ips, ipv4_string);
}

TEST(IpDictTest, TestIpv6) {
std::vector<std::string> ips = {"2001:db8::/32",
"2001:db8:abcd::/48",
"fc00::/7",
"fe80::/10",
"2400:cb00::/32",
"2001:0db8:85a3::/64",
"2001:0db8:85a3:0000:0000:8a2e:0370:7334/128",
"2001:0db8:0000:0042:0000:8a2e:0370:7334/128",
"2001:0db8:0000:0042:0000:8a2e:0370:7335/128",
"2001:0db8:0000:0042:0000:8a2e:0370:7336/128",
"2001:db8:abcd:1::/64",
"2001:db8:abcd:2::/64",
"fc00:1::/64",
"fe80:1::/64"};

std::vector<std::string> ipv6_string = {"2001:db8::1",
"2001:db8:abcd::1",
"fc00::1",
"fe80::1",
"2400:cb00::1",
"2001:0db8:85a3::1",
"2001:0db8:85a3:0000:0000:8a2e:0370:7334",
"2001:0db8:0000:0042:0000:8a2e:0370:7334",
"2001:0db8:0000:0042:0000:8a2e:0370:7335",
"2001:0db8:0000:0042:0000:8a2e:0370:7336",
"2001:db8:abcd:1::1",
"2001:db8:abcd:2::1",
"fc00:1::1",
"fe80:1::1"};

test_for_ip_type<DataTypeIPv6, true>(ips, ipv6_string);
}

std::string generate_random_ipv4() {
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<> dis(0, 255);
std::stringstream ss;
ss << dis(gen) << '.' << dis(gen) << '.' << dis(gen) << '.' << dis(gen);
return ss.str();
}

std::string generate_random_ipv6() {
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<> dis(0, 65535);
std::stringstream ss;
for (int i = 0; i < 8; ++i) {
ss << std::hex << dis(gen);
if (i != 7) {
ss << ':';
}
}
return ss.str();
}

std::string generate_random_cidr_ipv4(std::unordered_set<std::string>& existing_cidrs) {
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<> dis_prefix(1, 4); // 1到4之间的整数
std::uniform_int_distribution<> dis(0, 255);
std::string cidr;
do {
int prefix_length = dis_prefix(gen) * 8; // 8的倍数
int mask = ~((1 << (32 - prefix_length)) - 1);
int ip = (dis(gen) << 24) | (dis(gen) << 16) | (dis(gen) << 8) | dis(gen);
ip &= mask;
std::stringstream ss;
ss << ((ip >> 24) & 0xFF) << '.' << ((ip >> 16) & 0xFF) << '.' << ((ip >> 8) & 0xFF) << '.'
<< (ip & 0xFF);
cidr = ss.str() + "/" + std::to_string(prefix_length);
} while (existing_cidrs.find(cidr) != existing_cidrs.end());
existing_cidrs.insert(cidr);
return cidr;
}

std::string generate_random_cidr_ipv6(std::unordered_set<std::string>& existing_cidrs) {
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<> dis_prefix(1, 8); // 1到8之间的整数
std::uniform_int_distribution<> dis(0, 65535);
std::string cidr;
do {
int prefix_length = dis_prefix(gen) * 16; // 16的倍数
std::stringstream ss;
for (int i = 0; i < 8; ++i) {
if (i * 16 < prefix_length) {
ss << std::hex << dis(gen);
} else {
ss << "0";
}
if (i != 7) {
ss << ':';
}
}
cidr = ss.str() + "/" + std::to_string(prefix_length);
} while (existing_cidrs.find(cidr) != existing_cidrs.end());
existing_cidrs.insert(cidr);
return cidr;
}

TEST(IpDictTest, RandomIpv4) {
std::unordered_set<std::string> existing_cidrs;
std::vector<std::string> ips;
std::vector<std::string> ipv4_string;
for (int i = 0; i < 1000; ++i) {
ips.push_back(generate_random_cidr_ipv4(existing_cidrs));
ipv4_string.push_back(generate_random_ipv4());
}
test_for_ip_type<DataTypeIPv4, false>(ips, ipv4_string);
}

TEST(IpDictTest, RandomIpv6) {
std::unordered_set<std::string> existing_cidrs;
std::vector<std::string> ips;
std::vector<std::string> ipv6_string;
for (int i = 0; i < 1000; ++i) {
ips.push_back(generate_random_cidr_ipv6(existing_cidrs));
ipv6_string.push_back(generate_random_ipv6());
}
test_for_ip_type<DataTypeIPv6, false>(ips, ipv6_string);
}

} // namespace doris::vectorized
Loading

0 comments on commit 8861fd6

Please sign in to comment.