Skip to content

Commit 75c7788

Browse files
committed
Add PODArray<> and make BitState use it.
Change-Id: I6dd275f90133065a9254f27027889a333fd8ec8b Reviewed-on: https://code-review.googlesource.com/32370 Reviewed-by: Paul Wankadia <[email protected]>
1 parent 06af5d3 commit 75c7788

File tree

4 files changed

+91
-51
lines changed

4 files changed

+91
-51
lines changed

BUILD

+1
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ cc_library(
5858
"util/logging.h",
5959
"util/mix.h",
6060
"util/mutex.h",
61+
"util/pod_array.h",
6162
"util/rune.cc",
6263
"util/sparse_array.h",
6364
"util/sparse_set.h",

Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ HFILES=\
8383
util/mix.h\
8484
util/mutex.h\
8585
util/pcre.h\
86+
util/pod_array.h\
8687
util/sparse_array.h\
8788
util/sparse_set.h\
8889
util/strutil.h\

re2/bitstate.cc

+34-51
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,10 @@
2020
#include <stddef.h>
2121
#include <stdint.h>
2222
#include <string.h>
23+
#include <utility>
2324

2425
#include "util/logging.h"
26+
#include "util/pod_array.h"
2527
#include "re2/prog.h"
2628
#include "re2/regexp.h"
2729

@@ -36,7 +38,6 @@ struct Job {
3638
class BitState {
3739
public:
3840
explicit BitState(Prog* prog);
39-
~BitState();
4041

4142
// The usual Search prototype.
4243
// Can only call Search once per BitState.
@@ -47,7 +48,7 @@ class BitState {
4748
private:
4849
inline bool ShouldVisit(int id, const char* p);
4950
void Push(int id, const char* p, int arg);
50-
bool GrowStack();
51+
void GrowStack();
5152
bool TrySearch(int id, const char* p);
5253

5354
// Search parameters
@@ -57,20 +58,15 @@ class BitState {
5758
bool anchored_; // whether search is anchored at text.begin()
5859
bool longest_; // whether search wants leftmost-longest match
5960
bool endmatch_; // whether match must end at text.end()
60-
StringPiece *submatch_; // submatches to fill in
61+
StringPiece* submatch_; // submatches to fill in
6162
int nsubmatch_; // # of submatches to fill in
6263

6364
// Search state
64-
const char** cap_; // capture registers
65-
int ncap_;
66-
6765
static const int VisitedBits = 32;
68-
uint32_t *visited_; // bitmap: (Inst*, char*) pairs already backtracked
69-
size_t nvisited_; // # of words in bitmap
70-
71-
Job *job_; // stack of text positions to explore
72-
int njob_;
73-
int maxjob_;
66+
PODArray<uint32_t> visited_; // bitmap: (Inst*, char*) pairs visited
67+
PODArray<const char*> cap_; // capture registers
68+
PODArray<Job> job_; // stack of text positions to explore
69+
int njob_; // stack size
7470
};
7571

7672
BitState::BitState(Prog* prog)
@@ -80,19 +76,7 @@ BitState::BitState(Prog* prog)
8076
endmatch_(false),
8177
submatch_(NULL),
8278
nsubmatch_(0),
83-
cap_(NULL),
84-
ncap_(0),
85-
visited_(NULL),
86-
nvisited_(0),
87-
job_(NULL),
88-
njob_(0),
89-
maxjob_(0) {
90-
}
91-
92-
BitState::~BitState() {
93-
delete[] visited_;
94-
delete[] job_;
95-
delete[] cap_;
79+
njob_(0) {
9680
}
9781

9882
// Should the search visit the pair ip, p?
@@ -107,24 +91,22 @@ bool BitState::ShouldVisit(int id, const char* p) {
10791
}
10892

10993
// Grow the stack.
110-
bool BitState::GrowStack() {
111-
maxjob_ *= 2;
112-
Job* newjob = new Job[maxjob_];
113-
memmove(newjob, job_, njob_*sizeof job_[0]);
114-
delete[] job_;
115-
job_ = newjob;
116-
if (njob_ >= maxjob_) {
117-
LOG(DFATAL) << "Job stack overflow.";
118-
return false;
119-
}
120-
return true;
94+
void BitState::GrowStack() {
95+
PODArray<Job> tmp(2*job_.size());
96+
memmove(tmp.data(), job_.data(), njob_*sizeof job_[0]);
97+
job_ = std::move(tmp);
12198
}
12299

123100
// Push the triple (id, p, arg) onto the stack, growing it if necessary.
124101
void BitState::Push(int id, const char* p, int arg) {
125-
if (njob_ >= maxjob_) {
126-
if (!GrowStack())
102+
if (njob_ >= job_.size()) {
103+
GrowStack();
104+
if (njob_ >= job_.size()) {
105+
LOG(DFATAL) << "GrowStack() failed: "
106+
<< "njob_ = " << njob_ << ", "
107+
<< "job_.size() = " << job_.size();
127108
return;
109+
}
128110
}
129111
int op = prog_->inst(id)->opcode();
130112
if (op == kInstFail)
@@ -234,7 +216,7 @@ bool BitState::TrySearch(int id0, const char* p0) {
234216
if (!ip->last())
235217
Push(id+1, p, 0); // try the next when we're done
236218

237-
if (0 <= ip->cap() && ip->cap() < ncap_) {
219+
if (0 <= ip->cap() && ip->cap() < cap_.size()) {
238220
// Capture p to register, but save old value.
239221
Push(id, cap_[ip->cap()], 1); // come back when we're done
240222
cap_[ip->cap()] = p;
@@ -327,18 +309,19 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context,
327309
submatch_[i] = StringPiece();
328310

329311
// Allocate scratch space.
330-
nvisited_ = (prog_->size() * (text.size()+1) + VisitedBits-1) / VisitedBits;
331-
visited_ = new uint32_t[nvisited_];
332-
memset(visited_, 0, nvisited_*sizeof visited_[0]);
333-
334-
ncap_ = 2*nsubmatch;
335-
if (ncap_ < 2)
336-
ncap_ = 2;
337-
cap_ = new const char*[ncap_];
338-
memset(cap_, 0, ncap_*sizeof cap_[0]);
339-
340-
maxjob_ = 256;
341-
job_ = new Job[maxjob_];
312+
int nvisited = prog_->size() * static_cast<int>(text.size()+1);
313+
nvisited = (nvisited + VisitedBits-1) / VisitedBits;
314+
visited_ = PODArray<uint32_t>(nvisited);
315+
memset(visited_.data(), 0, nvisited*sizeof visited_[0]);
316+
317+
int ncap = 2*nsubmatch;
318+
if (ncap < 2)
319+
ncap = 2;
320+
cap_ = PODArray<const char*>(ncap);
321+
memset(cap_.data(), 0, ncap*sizeof cap_[0]);
322+
323+
// When sizeof(Job) == 16, we start with a nice round 4KiB. :)
324+
job_ = PODArray<Job>(256);
342325

343326
// Anchored search must start at text.begin().
344327
if (anchored_) {

util/pod_array.h

+55
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
// Copyright 2018 The RE2 Authors. All Rights Reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
#ifndef UTIL_POD_ARRAY_H_
6+
#define UTIL_POD_ARRAY_H_
7+
8+
#include <memory>
9+
#include <type_traits>
10+
11+
namespace re2 {
12+
13+
template <typename T>
14+
class PODArray {
15+
public:
16+
static_assert(std::is_pod<T>::value,
17+
"T must be POD");
18+
19+
PODArray()
20+
: ptr_(nullptr, Deleter()) {}
21+
explicit PODArray(int len)
22+
: ptr_(std::allocator<T>().allocate(len), Deleter(len)) {}
23+
24+
T* data() const {
25+
return ptr_.get();
26+
}
27+
28+
int size() const {
29+
return ptr_.get_deleter().len_;
30+
}
31+
32+
T& operator[](int pos) const {
33+
return ptr_[pos];
34+
}
35+
36+
private:
37+
struct Deleter {
38+
Deleter()
39+
: len_(0) {}
40+
explicit Deleter(int len)
41+
: len_(len) {}
42+
43+
void operator()(T* ptr) const {
44+
std::allocator<T>().deallocate(ptr, len_);
45+
}
46+
47+
int len_;
48+
};
49+
50+
std::unique_ptr<T[], Deleter> ptr_;
51+
};
52+
53+
} // namespace re2
54+
55+
#endif // UTIL_POD_ARRAY_H_

0 commit comments

Comments
 (0)