Skip to content

Commit

Permalink
non-overlapping intervals
Browse files Browse the repository at this point in the history
  • Loading branch information
ashleynh committed Dec 19, 2024
1 parent 4e1ae4a commit 1be536e
Show file tree
Hide file tree
Showing 6 changed files with 160 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/passes/Outlining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,8 @@ struct Outlining : public Pass {
DBG(printHashString(stringify.hashString, stringify.exprs));
// Remove substrings that are substrings of longer repeat substrings.
substrings = StringifyProcessor::dedupe(substrings);
// Remove substrings with overlapping indices
substrings = StringifyProcessor::removeOverlaps(substrings);
// Remove substrings with branch and return instructions until an analysis
// is performed to see if the intended destination of the branch is included
// in the substring to be outlined.
Expand Down
40 changes: 40 additions & 0 deletions src/passes/hash-stringify-walker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/

#include "stringify-walker.h"
#include "support/intervals.h"

namespace wasm {

Expand Down Expand Up @@ -147,6 +148,45 @@ std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::dedupe(
return result;
}

std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::removeOverlaps(
const std::vector<SuffixTree::RepeatedSubstring>& substrings) {
std::vector<Interval> intervals;
std::unordered_map<Interval, unsigned> intervalMap;

// Construct intervals
for (Index i = 0; i < substrings.size(); i++) {
auto substring = substrings[i];
for (auto startIdx : substring.StartIndices) {
auto interval =
Interval(startIdx,
startIdx + substring.Length,
substring.Length * substring.StartIndices.size());
intervals.push_back(interval);
intervalMap[std::move(interval)] = i;
}
}

// Get the overlapping intervals
std::set<Interval> overlaps = IntervalProcessor::getOverlaps(intervals);
std::set<unsigned> doNotInclude;
for (auto& interval : overlaps) {
doNotInclude.insert(intervalMap[interval]);
}

// Only include non-overlapping substrings
std::vector<SuffixTree::RepeatedSubstring> result;
for (Index i = 0; i < substrings.size(); i++) {
if (doNotInclude.find(i) != doNotInclude.end()) {
continue;
}

auto substring = substrings[i];
result.push_back(substring);
}

return result;
}

std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::filter(
const std::vector<SuffixTree::RepeatedSubstring>& substrings,
const std::vector<Expression*>& exprs,
Expand Down
2 changes: 2 additions & 0 deletions src/passes/stringify-walker.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "ir/module-utils.h"
#include "ir/stack-utils.h"
#include "ir/utils.h"
#include "support/intervals.h"
#include "support/suffix_tree.h"
#include "wasm-ir-builder.h"
#include "wasm-traversal.h"
Expand Down Expand Up @@ -264,6 +265,7 @@ using Substrings = std::vector<SuffixTree::RepeatedSubstring>;
struct StringifyProcessor {
static Substrings repeatSubstrings(std::vector<uint32_t>& hashString);
static Substrings dedupe(const Substrings& substrings);
static Substrings removeOverlaps(const Substrings& substrings);
// Filter is the general purpose function backing subsequent filter functions.
// It can be used directly, but generally prefer a wrapper function
// to encapsulate your condition and make it available for tests.
Expand Down
1 change: 1 addition & 0 deletions src/support/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ set(support_SOURCES
debug.cpp
dfa_minimization.cpp
file.cpp
intervals.cpp
istring.cpp
json.cpp
name.cpp
Expand Down
51 changes: 51 additions & 0 deletions src/support/intervals.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright 2024 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <assert.h>

#include "intervals.h"
#include "support/index.h"
#include <algorithm>

using namespace wasm;

std::set<Interval>
IntervalProcessor::getOverlaps(std::vector<Interval>& intervals) {
std::sort(intervals.begin(), intervals.end(), [](Interval a, Interval b) {
return a.start < b.end;
});

std::set<Interval> overlaps;
auto& firstInterval = intervals[0];
// Look for overlapping intervals
for (Index i = 1; i < intervals.size(); i++) {
auto& nextInterval = intervals[i];
if (firstInterval.end < nextInterval.start) {
firstInterval = nextInterval;
continue;
}

// Keep the interval with the higher score
if (nextInterval.weight > firstInterval.weight) {
overlaps.insert(firstInterval);
firstInterval = nextInterval;
} else {
overlaps.insert(nextInterval);
}
}

return overlaps;
}
64 changes: 64 additions & 0 deletions src/support/intervals.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Copyright 2024 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

// Helpers for handling a generic range of values

#ifndef wasm_support_intervals_h
#define wasm_support_intervals_h

#include <set>
#include <vector>

namespace wasm {

// The weight determines the value of the
// interval when comparing against another interval, higher is better.
struct Interval {
unsigned start;
unsigned end;
unsigned weight;
Interval(unsigned start, unsigned end, unsigned weight)
: start(start), end(end), weight(weight) {}

bool operator<(const Interval& other) const {
return start < other.start && weight < other.weight;
}

bool operator==(const Interval& other) const {
return start == other.start && end == other.end && weight == other.weight;
}
};

struct IntervalProcessor {
// Given a vector of intervals, returns a new vector. To resolve overlapping
// intervals, the interval with the highest weight is kept.
static std::set<Interval> getOverlaps(std::vector<Interval>&);
};

} // namespace wasm

namespace std {

template<> struct hash<wasm::Interval> {
size_t operator()(const wasm::Interval& i) const {
return std::hash<unsigned>{}(i.start) + std::hash<unsigned>{}(i.end) +
std::hash<unsigned>{}(i.weight);
}
};

} // namespace std

#endif // wasm_suport_intervals

0 comments on commit 1be536e

Please sign in to comment.