Skip to content

Commit

Permalink
Merge branch 'apache:master' into sss
Browse files Browse the repository at this point in the history
  • Loading branch information
Tech-Circle-48 authored Jan 11, 2025
2 parents 2fd583b + 9fc6b8d commit ae3af22
Show file tree
Hide file tree
Showing 39 changed files with 239 additions and 68 deletions.
5 changes: 3 additions & 2 deletions be/src/pipeline/exec/set_probe_sink_operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <memory>

#include "pipeline/exec/operator.h"
#include "pipeline/pipeline_task.h"
#include "vec/common/hash_table/hash_table_set_probe.h"

namespace doris {
Expand Down Expand Up @@ -70,7 +71,7 @@ Status SetProbeSinkOperatorX<is_intersect>::sink(RuntimeState* state, vectorized
SCOPED_TIMER(local_state.exec_time_counter());
COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)in_block->rows());

uint32_t probe_rows = cast_set<uint32_t>(in_block->rows());
const auto probe_rows = cast_set<uint32_t>(in_block->rows());
if (probe_rows > 0) {
{
SCOPED_TIMER(local_state._extract_probe_data_timer);
Expand All @@ -93,7 +94,7 @@ Status SetProbeSinkOperatorX<is_intersect>::sink(RuntimeState* state, vectorized
local_state._shared_state->hash_table_variants->method_variant));
}

if (eos) {
if (eos && !state->get_task()->wake_up_early()) {
_finalize_probe(local_state);
}
return Status::OK();
Expand Down
15 changes: 8 additions & 7 deletions be/src/util/mem_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ class MemInfo {
size_t value_size = sizeof(T);
if (jemallctl(name.c_str(), &value, &value_size, nullptr, 0) == 0) {
return value;
} else {
LOG(WARNING) << fmt::format("Failed, jemallctl get {}", name);
}
#endif
return 0;
Expand All @@ -123,9 +125,6 @@ class MemInfo {
if (err) {
LOG(WARNING) << fmt::format("Failed, jemallctl value for {} set to {} (old {})",
name, value, old_value);
} else {
LOG(INFO) << fmt::format("Successfully, jemallctl value for {} set to {} (old {})",
name, value, old_value);
}
} catch (...) {
LOG(WARNING) << fmt::format("Exception, jemallctl value for {} set to {} (old {})",
Expand All @@ -140,8 +139,6 @@ class MemInfo {
int err = jemallctl(name.c_str(), nullptr, nullptr, nullptr, 0);
if (err) {
LOG(WARNING) << fmt::format("Failed, jemallctl action {}", name);
} else {
LOG(INFO) << fmt::format("Successfully, jemallctl action {}", name);
}
} catch (...) {
LOG(WARNING) << fmt::format("Exception, jemallctl action {}", name);
Expand Down Expand Up @@ -181,8 +178,12 @@ class MemInfo {
// Each time this interface is set, all currently unused dirty pages are considered
// to have fully decayed, which causes immediate purging of all unused dirty pages unless
// the decay time is set to -1
set_jemallctl_value<ssize_t>(fmt::format("arena.{}.dirty_decay_ms", MALLCTL_ARENAS_ALL),
dirty_decay_ms);
//
// NOTE: Using "arena.MALLCTL_ARENAS_ALL.dirty_decay_ms" to modify all arenas will fail or even crash,
// which may be a bug.
for (unsigned i = 0; i < get_jemallctl_value<unsigned>("arenas.narenas"); i++) {
set_jemallctl_value<ssize_t>(fmt::format("arena.{}.dirty_decay_ms", i), dirty_decay_ms);
}
#endif
}

Expand Down
1 change: 1 addition & 0 deletions be/src/vec/exec/format/table/transactional_hive_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ Status TransactionalHiveReader::init_row_filters(const TFileRangeDesc& range,
++num_delete_files;
}
if (num_delete_rows > 0) {
orc_reader->set_push_down_agg_type(TPushAggOp::NONE);
orc_reader->set_delete_rows(&_delete_rows);
COUNTER_UPDATE(_transactional_orc_profile.num_delete_files, num_delete_files);
COUNTER_UPDATE(_transactional_orc_profile.num_delete_rows, num_delete_rows);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,26 @@ cp /etc/trino/conf/presto-server.keytab /keytabs/other-presto-server.keytab
cp /keytabs/update-location.sh /etc/hadoop-init.d/update-location.sh
/usr/local/hadoop-run.sh &

sleep 30
# check healthy hear
echo "Waiting for hadoop to be healthy"

for i in {1..10}; do
if /usr/local/health.sh; then
echo "Hadoop is healthy"
break
fi
echo "Hadoop is not healthy yet. Retrying in 20 seconds..."
sleep 20
done

if [ $i -eq 10 ]; then
echo "Hadoop did not become healthy after 120 attempts. Exiting."
exit 1
fi

echo "Init kerberos test data"
kinit -kt /etc/hive/conf/hive.keytab hive/[email protected]
hive -f /usr/local/sql/create_kerberos_hive_table.sql

sleep 20
touch /mnt/SUCCESS

tail -f /dev/null
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,26 @@ mkdir -p /etc/hadoop-init.d/
cp /etc/trino/conf/* /keytabs/
/usr/local/hadoop-run.sh &

sleep 30
# check healthy hear
echo "Waiting for hadoop to be healthy"

for i in {1..10}; do
if /usr/local/health.sh; then
echo "Hadoop is healthy"
break
fi
echo "Hadoop is not healthy yet. Retrying in 20 seconds..."
sleep 20
done

if [ $i -eq 10 ]; then
echo "Hadoop did not become healthy after 120 attempts. Exiting."
exit 1
fi

echo "Init kerberos test data"
kinit -kt /etc/hive/conf/hive.keytab hive/[email protected]
hive -f /usr/local/sql/create_kerberos_hive_table.sql

sleep 20
touch /mnt/SUCCESS

tail -f /dev/null
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ fi
FAILED=$(supervisorctl status | grep -v RUNNING || true)

if [ "$FAILED" == "" ]; then
echo "All services are running"
exit 0
else
echo "Some of the services are failing: ${FAILED}"
Expand Down
1 change: 1 addition & 0 deletions docker/thirdparties/docker-compose/kerberos/health-checks/health.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,4 @@ if test -d "${HEALTH_D}"; then
"${health_script}" &>> /var/log/container-health.log || exit 1
done
fi
exit 0
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

kinit -kt /etc/hive/conf/hive.keytab hive/[email protected]
beeline -u "jdbc:hive2://localhost:10000/default;principal=hive/[email protected]" -e "show databases;"
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

kinit -kt /etc/hive/conf/hive.keytab hive/[email protected]
beeline -u "jdbc:hive2://localhost:10000/default;principal=hive/[email protected]" -e "show databases;"
16 changes: 12 additions & 4 deletions docker/thirdparties/docker-compose/kerberos/kerberos.yaml.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,17 @@ services:
- ./sql:/usr/local/sql
- ./common/hadoop/apply-config-overrides.sh:/etc/hadoop-init.d/00-apply-config-overrides.sh
- ./common/hadoop/hadoop-run.sh:/usr/local/hadoop-run.sh
- ./health-checks/health.sh:/usr/local/health.sh
- ./health-checks/hadoop-health-check.sh:/etc/health.d/hadoop-health-check.sh
- ./health-checks/hive-health-check.sh:/etc/health.d/hive-health-check.sh
- ./entrypoint-hive-master.sh:/usr/local/entrypoint-hive-master.sh
restart: on-failure
hostname: hadoop-master
entrypoint: /usr/local/entrypoint-hive-master.sh
healthcheck:
test: ./health-checks/health.sh
test: ["CMD", "ls", "/mnt/SUCCESS"]
interval: 20s
timeout: 60s
retries: 120
ports:
- "5806:5006"
- "8820:8020"
Expand All @@ -46,17 +50,21 @@ services:
image: doristhirdpartydocker/trinodb:hdp3.1-hive-kerberized-2_96
container_name: doris--kerberos2
hostname: hadoop-master-2
restart: on-failure
volumes:
- ./two-kerberos-hives:/keytabs
- ./sql:/usr/local/sql
- ./common/hadoop/apply-config-overrides.sh:/etc/hadoop-init.d/00-apply-config-overrides.sh
- ./common/hadoop/hadoop-run.sh:/usr/local/hadoop-run.sh
- ./health-checks/health.sh:/usr/local/health.sh
- ./health-checks/hadoop-health-check.sh:/etc/health.d/hadoop-health-check.sh
- ./health-checks/hive-health-check-2.sh:/etc/health.d/hive-health-check-2.sh
- ./entrypoint-hive-master-2.sh:/usr/local/entrypoint-hive-master-2.sh
entrypoint: /usr/local/entrypoint-hive-master-2.sh
healthcheck:
test: ./health-checks/health.sh
test: ["CMD", "ls", "/mnt/SUCCESS"]
interval: 20s
timeout: 60s
retries: 120
ports:
- "15806:5006"
- "18820:8020"
Expand Down
14 changes: 5 additions & 9 deletions docker/thirdparties/run-thirdparties-docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -708,6 +708,11 @@ if [[ "${RUN_MINIO}" -eq 1 ]]; then
pids["minio"]=$!
fi

if [[ "${RUN_KERBEROS}" -eq 1 ]]; then
start_kerberos > start_kerberos.log 2>&1 &
pids["kerberos"]=$!
fi

echo "waiting all dockers starting done"

for compose in "${!pids[@]}"; do
Expand All @@ -727,15 +732,6 @@ for compose in "${!pids[@]}"; do
fi
done

if [[ "${RUN_KERBEROS}" -eq 1 ]]; then
echo "Starting Kerberos after all other components..."
start_kerberos > start_kerberos.log 2>&1
if [ $? -ne 0 ]; then
echo "Kerberos startup failed"
cat start_kerberos.log
exit 1
fi
fi
echo "docker started"
docker ps -a --format "{{.ID}} | {{.Image}} | {{.Status}}"
echo "all dockers started successfully"
Original file line number Diff line number Diff line change
Expand Up @@ -289,12 +289,12 @@ private void getFileSplitByPartitions(HiveMetaStoreCache cache, List<HivePartiti
* we don't need to split the file because for parquet/orc format, only metadata is read.
* If we split the file, we will read metadata of a file multiple times, which is not efficient.
*
* - Hive Transactional Table may need merge on read, so do not apply this optimization.
* - Hive Full Acid Transactional Table may need merge on read, so do not apply this optimization.
* - If the file format is not parquet/orc, eg, text, we need to split the file to increase the parallelism.
*/
boolean needSplit = true;
if (getPushDownAggNoGroupingOp() == TPushAggOp.COUNT
&& hiveTransaction != null) {
&& !(hmsTable.isHiveTransactionalTable() && hmsTable.isFullAcidTable())) {
int totalFileNum = 0;
for (FileCacheValue fileCacheValue : fileCaches) {
if (fileCacheValue.getFiles() != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ protected Object schema(
Optional aggregationType = Optional.ofNullable(column.getAggregationType());
baseInfo.put("aggregation_type", aggregationType.isPresent()
? column.getAggregationType().toSql() : "");
baseInfo.put("is_nullable", column.isAllowNull() ? "Yes" : "No");
propList.add(baseInfo);
}
resultMap.put("status", 200);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1243,8 +1243,11 @@ public boolean isExpired(long currentTimeMs) {
expireTime = Config.streaming_label_keep_max_second;
}

LOG.info("state {}, expireTime {}, currentTimeMs {}, finishTimestamp {}",
state, expireTime, currentTimeMs, getFinishTimestamp());
if (LOG.isDebugEnabled()) {
LOG.debug("Job ID: {}, DB ID: {}, Label: {}, State: {}, Expire Time: {}, Current Time: {}, "
+ "Finish Timestamp: {}", id, dbId, label, state, expireTime, currentTimeMs,
getFinishTimestamp());
}
return (currentTimeMs - getFinishTimestamp()) / 1000 > expireTime;
}

Expand Down
Loading

0 comments on commit ae3af22

Please sign in to comment.