Skip to content

Commit

Permalink
add fe.
Browse files Browse the repository at this point in the history
  • Loading branch information
hubgeter committed Oct 21, 2024
1 parent b993ce0 commit 09dc709
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 6 deletions.
2 changes: 1 addition & 1 deletion be/src/io/fs/buffered_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ struct PrefetchRange {
return {start_offset, other.end_offset};
}

//Range needs to be sorted.
//Ranges needs to be sorted.
static std::vector<PrefetchRange> mergeAdjacentSeqRanges(
const std::vector<PrefetchRange>& seq_ranges, int64_t max_merge_distance_bytes,
int64_t max_read_size_bytes) {
Expand Down
14 changes: 9 additions & 5 deletions be/src/vec/exec/format/orc/vorc_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -868,10 +868,15 @@ Status OrcReader::set_fill_columns(

int64_t range_end_offset = _range_start_offset + _range_size;

// 三个参数 todo
int orc_tiny_stripe_threshold = 8L * 1024L * 1024L;
int orc_once_max_read_size = 8L * 1024L * 1024L;
int orc_max_merge_distance = 1L * 1024L * 1024L;
int64_t orc_tiny_stripe_threshold = 8L * 1024L * 1024L;
int64_t orc_once_max_read_size = 8L * 1024L * 1024L;
int64_t orc_max_merge_distance = 1L * 1024L * 1024L;

if (_state != nullptr) {
orc_tiny_stripe_threshold = _state->query_options().orc_tiny_stripe_threshold;
orc_once_max_read_size = _state->query_options().orc_once_max_read_size;
orc_max_merge_distance = _state->query_options().orc_max_merge_distance;
}

bool all_tiny_stripes = true;
std::vector<io::PrefetchRange> tiny_stripe_ranges;
Expand All @@ -896,7 +901,6 @@ Status OrcReader::set_fill_columns(
std::vector<io::PrefetchRange> prefetch_merge_ranges =
io::PrefetchRange::mergeAdjacentSeqRanges(
tiny_stripe_ranges, orc_max_merge_distance, orc_once_max_read_size);

auto range_finder =
std::make_shared<io::LinearProbeRangeFinder>(std::move(prefetch_merge_ranges));

Expand Down
77 changes: 77 additions & 0 deletions fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,12 @@ public class SessionVariable implements Serializable, Writable {

public static final String ENABLE_ORC_LAZY_MAT = "enable_orc_lazy_materialization";

public static final String ORC_TINY_STRIPE_THRESHOLD = "orc_tiny_stripe_threshold";

public static final String ORC_ONCE_MAX_READ_SIZE = "orc_once_max_read_size";

public static final String ORC_MAX_MERGE_DISTANCE = "orc_max_merge_distance";

public static final String ENABLE_PARQUET_FILTER_BY_MIN_MAX = "enable_parquet_filter_by_min_max";

public static final String ENABLE_ORC_FILTER_BY_MIN_MAX = "enable_orc_filter_by_min_max";
Expand Down Expand Up @@ -1677,6 +1683,46 @@ public void setEnableLeftZigZag(boolean enableLeftZigZag) {
public boolean enableOrcLazyMat = true;


@VariableMgr.VarAttr(
name = ORC_TINY_STRIPE_THRESHOLD,
description = {"在orc文件中如果一个stripe的字节大小小于`orc_tiny_stripe_threshold`,"
+ "我们认为该stripe为 tiny stripe。对于多个连续的tiny stripe我们会进行读取优化,即一次性读多个tiny stripe."
+ "如果你不想使用该优化,可以将该值设置为0。默认为 8M。",
"In an orc file, if the byte size of a stripe is less than `orc_tiny_stripe_threshold`,"
+ "we consider the stripe to be a tiny stripe. For multiple consecutive tiny stripes,"
+ "we will perform read optimization, that is, read multiple tiny stripes at a time."
+ "If you do not want to use this optimization, you can set this value to 0."
+ "The default is 8M."},
needForward = true,
setter = "setOrcTinyStripeThreshold")
public long orcTinyStripeThreshold = 8L * 1024L * 1024L;


@VariableMgr.VarAttr(
name = ORC_ONCE_MAX_READ_SIZE,
description = {"在使用tiny stripe读取优化的时候,会对多个tiny stripe合并成一次IO,"
+ "该参数用来控制每次IO请求的最大字节大小。你不应该将值设置的小于`orc_tiny_stripe_threshold`。默认为 8M。",
"When using tiny stripe read optimization, multiple tiny stripes will be merged into one IO."
+ "This parameter is used to control the maximum byte size of each IO request."
+ "You should not set the value less than `orc_tiny_stripe_threshold`."
+ "The default is 8M."},
needForward = true,
setter = "setOrcOnceMaxReadSize")
public long orcOnceMaxReadSize = 8L * 1024L * 1024L;


@VariableMgr.VarAttr(
name = ORC_MAX_MERGE_DISTANCE,
description = {"在使用tiny stripe读取优化的时候,由于tiny stripe并不一定连续。"
+ "当两个tiny stripe之间距离大于该参数时,我们不会将其合并成一次IO。默认为 1M。",
"When using tiny stripe read optimization, since tiny stripes are not necessarily continuous,"
+ "when the distance between two tiny stripes is greater than this parameter,"
+ "we will not merge them into one IO. The default value is 1M."},
needForward = true,
setter = "setOrcMaxMergeDistance")
public long orcMaxMergeDistance = 1024L * 1024L;


@VariableMgr.VarAttr(
name = ENABLE_PARQUET_FILTER_BY_MIN_MAX,
description = {"控制 parquet reader 是否启用 min-max 值过滤。默认为 true。",
Expand Down Expand Up @@ -2743,6 +2789,32 @@ public void setFragmentInstanceNum(String value) throws Exception {
this.parallelExecInstanceNum = val;
}

public void setOrcTinyStripeThreshold(String value) throws Exception {
long val = checkFieldLongValue(ORC_TINY_STRIPE_THRESHOLD, 0, value);
this.orcTinyStripeThreshold = val;
}

public void setOrcOnceMaxReadSize(String value) throws Exception {
long val = checkFieldLongValue(ORC_ONCE_MAX_READ_SIZE, 0, value);
this.orcOnceMaxReadSize = val;
}

public void setOrcMaxMergeDistance(String value) throws Exception {
long val = checkFieldLongValue(ORC_MAX_MERGE_DISTANCE, 0, value);
this.orcMaxMergeDistance = val;
}

private long checkFieldLongValue(String variableName, long minValue, String value) throws Exception {
long val = Long.parseLong(value);
if (val < minValue) {
throw new Exception(
variableName + " value should greater than or equal " + String.valueOf(minValue)
+ ", you set value is: " + value);
}
return val;
}


private int checkFieldValue(String variableName, int minValue, String value) throws Exception {
int val = Integer.valueOf(value);
if (val < minValue) {
Expand Down Expand Up @@ -3810,6 +3882,11 @@ public TQueryOptions toThrift() {
tResult.setAdaptivePipelineTaskSerialReadOnLimit(adaptivePipelineTaskSerialReadOnLimit);
tResult.setInListValueCountThreshold(inListValueCountThreshold);
tResult.setEnablePhraseQuerySequentialOpt(enablePhraseQuerySequentialOpt);

tResult.setOrcTinyStripeThreshold(orcTinyStripeThreshold);
tResult.setOrcMaxMergeDistance(orcMaxMergeDistance);
tResult.setOrcOnceMaxReadSize(orcOnceMaxReadSize);

return tResult;
}

Expand Down
3 changes: 3 additions & 0 deletions gensrc/thrift/PaloInternalService.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,9 @@ struct TQueryOptions {
135: optional bool enable_parallel_outfile = false;

136: optional bool enable_phrase_query_sequential_opt = true;
137: optional i64 orc_tiny_stripe_threshold = 8388608;
138: optional i64 orc_once_max_read_size = 8388608;
139: optional i64 orc_max_merge_distance = 1048576;

// For cloud, to control if the content would be written into file cache
// In write path, to control if the content would be written into file cache.
Expand Down

0 comments on commit 09dc709

Please sign in to comment.