Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support 'd' flag on RegExp #1358

Merged
merged 6 commits into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions src/Escargot.h
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,6 @@ extern "C" {

#ifdef ENABLE_ICU
#if defined(ENABLE_RUNTIME_ICU_BINDER)
typedef unsigned char LChar;
#include "RuntimeICUBinder.h"
#include "ICUPolyfill.h"
#else
Expand Down Expand Up @@ -574,21 +573,26 @@ typedef uint16_t LexicalBlockIndex;
#endif

#include <tsl/robin_set.h>
#include <tsl/robin_map.h>

namespace Escargot {

template <class Key, class Hash = std::hash<Key>,
class KeyEqual = std::equal_to<Key>,
class Allocator = std::allocator<Key>,
bool StoreHash = false,
class GrowthPolicy = tsl::rh::power_of_two_growth_policy<2>>
using HashSet = tsl::robin_set<Key, Hash, KeyEqual, Allocator, StoreHash, GrowthPolicy>;

#include <tsl/robin_map.h>
template <class Key, class T, class Hash = std::hash<Key>,
class KeyEqual = std::equal_to<Key>,
class Allocator = std::allocator<std::pair<Key, T>>,
bool StoreHash = false,
class GrowthPolicy = tsl::rh::power_of_two_growth_policy<2>>
using HashMap = tsl::robin_map<Key, T, Hash, KeyEqual, Allocator, StoreHash, GrowthPolicy>;

} // namespace Escargot


#include "EscargotInfo.h"
#include "heap/Heap.h"
Expand Down
14 changes: 8 additions & 6 deletions src/api/EscargotPublic.h
Original file line number Diff line number Diff line change
Expand Up @@ -1721,12 +1721,14 @@ class ESCARGOT_EXPORT RegExpObjectRef : public ObjectRef {
public:
enum RegExpObjectOption {
None = 0 << 0,
Global = 1 << 0,
IgnoreCase = 1 << 1,
MultiLine = 1 << 2,
Sticky = 1 << 3,
Unicode = 1 << 4,
DotAll = 1 << 5,
HasIndices = 1 << 0,
Global = 1 << 1,
IgnoreCase = 1 << 2,
MultiLine = 1 << 3,
DotAll = 1 << 4,
Unicode = 1 << 5,
UnicodeSets = 1 << 6,
Sticky = 1 << 7,
};

struct ESCARGOT_EXPORT RegexMatchResult {
Expand Down
41 changes: 38 additions & 3 deletions src/builtins/BuiltinRegExp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,29 @@ static Value builtinRegExpCompile(ExecutionState& state, Value thisValue, size_t
ErrorObject::throwBuiltinError(state, ErrorCode::TypeError, ErrorObject::Messages::GlobalObject_ThisNotRegExpObject);
}

Optional<Object*> proto = thisValue.asObject()->getPrototypeObject(state);
Context* calleeContext = state.resolveCallee()->codeBlock()->context();

if (!proto || !proto->isRegExpPrototypeObject()) {
ErrorObject::throwBuiltinError(state, ErrorCode::TypeError, ErrorObject::Messages::GlobalObject_ThisNotRegExpObject);
}

bool match = false;
while (proto) {
Value c = proto->getOwnProperty(state, ObjectPropertyName(state.context()->staticStrings().constructor)).value(state, proto.value());
if (c.isFunction()) {
if (c.asFunction()->codeBlock()->context() == calleeContext) {
match = true;
break;
}
}
proto = proto->getPrototypeObject(state);
}

if (!match) {
ErrorObject::throwBuiltinError(state, ErrorCode::TypeError, "Cannot use compile function with another Realm");
}

if (argv[0].isObject() && argv[0].asObject()->isRegExpObject()) {
if (!argv[1].isUndefined()) {
ErrorObject::throwBuiltinError(state, ErrorCode::TypeError, "Cannot supply flags when constructing one RegExp from another");
Expand All @@ -216,9 +239,9 @@ static Value builtinRegExpCompile(ExecutionState& state, Value thisValue, size_t
}

RegExpObject* retVal = thisValue.asPointerValue()->asObject()->asRegExpObject();
String* pattern_str = argv[0].isUndefined() ? String::emptyString : argv[0].toString(state);
String* flags_str = argv[1].isUndefined() ? String::emptyString : argv[1].toString(state);
retVal->init(state, pattern_str, flags_str);
String* patternStr = argv[0].isUndefined() ? String::emptyString : argv[0].toString(state);
String* flagsStr = argv[1].isUndefined() ? String::emptyString : argv[1].toString(state);
retVal->init(state, patternStr, flagsStr);
return retVal;
}
static Value builtinRegExpSearch(ExecutionState& state, Value thisValue, size_t argc, Value* argv, Optional<Object*> newTarget)
Expand Down Expand Up @@ -620,6 +643,11 @@ static Value builtinRegExpMultiLineGetter(ExecutionState& state, Value thisValue
return builtinRegExpOptionGetterHelper(state, thisValue, RegExpObject::Option::MultiLine);
}

static Value builtinRegExpHasIndicesGetter(ExecutionState& state, Value thisValue, size_t argc, Value* argv, Optional<Object*> newTarget)
{
return builtinRegExpOptionGetterHelper(state, thisValue, RegExpObject::Option::HasIndices);
}

static Value builtinRegExpSourceGetter(ExecutionState& state, Value thisValue, size_t argc, Value* argv, Optional<Object*> newTarget)
{
if (!thisValue.isObject()) {
Expand Down Expand Up @@ -868,6 +896,13 @@ void GlobalObject::installRegExp(ExecutionState& state)
m_regexpPrototype->directDefineOwnProperty(state, ObjectPropertyName(state, strings->multiline), desc);
}

{
Value getter = new NativeFunctionObject(state, NativeFunctionInfo(strings->getHasIndices, builtinRegExpHasIndicesGetter, 0, NativeFunctionInfo::Strict));
JSGetterSetter gs(getter, Value());
ObjectPropertyDescriptor desc(gs, ObjectPropertyDescriptor::ConfigurablePresent);
m_regexpPrototype->directDefineOwnProperty(state, ObjectPropertyName(state, strings->hasIndices), desc);
}

{
Value getter = new NativeFunctionObject(state, NativeFunctionInfo(strings->getSource, builtinRegExpSourceGetter, 0, NativeFunctionInfo::Strict));
JSGetterSetter gs(getter, Value());
Expand Down
6 changes: 5 additions & 1 deletion src/parser/Lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1984,7 +1984,11 @@ void Scanner::scanRegExp(Scanner::ScannerResult* token)

String* body = this->scanRegExpBody();
String* flags = this->scanRegExpFlags();
// const value = this->testRegExp(body.value, flags.value);

auto error = RegExpObject::checkRegExpSyntax(body, flags);
if (UNLIKELY(error)) {
ErrorHandler::throwError(this->index, this->lineNumber, this->index - this->lineStart + 1, error.value(), ErrorCode::SyntaxError);
}

ScanRegExpResult result;
result.body = body;
Expand Down
9 changes: 2 additions & 7 deletions src/parser/esprima_cpp/esprima.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,14 +208,9 @@ class Parser {
{
ASSERT(escargotContext != nullptr);

this->stackLimit = ThreadLocal::stackLimit();

// Use more stack for computing loc on stack-overflow situation
#ifdef STACK_GROWS_DOWN
this->stackLimit = this->stackLimit + STACK_FREESPACE_FROM_LIMIT / 2;
#else
this->stackLimit = this->stackLimit - STACK_FREESPACE_FROM_LIMIT / 2;
#endif
this->stackLimit = ThreadLocal::extendedStackLimit();

this->escargotContext = escargotContext;
this->stringArguments = escargotContext->staticStrings().arguments;
this->currentBlockContext = nullptr;
Expand Down
56 changes: 56 additions & 0 deletions src/runtime/RegExpObject.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "Yarr.h"
#include "YarrPattern.h"
#include "YarrInterpreter.h"
#include "YarrSyntaxChecker.h"

namespace Escargot {

Expand Down Expand Up @@ -182,6 +183,7 @@ bool RegExpObject::defineOwnProperty(ExecutionState& state, const ObjectProperty
|| name->equals(state.context()->staticStrings().sticky.string())
|| name->equals(state.context()->staticStrings().dotAll.string())
|| name->equals(state.context()->staticStrings().source.string())
|| name->equals(state.context()->staticStrings().hasIndices.string())
|| name->equals(state.context()->staticStrings().flags.string())) {
m_hasOwnPropertyWhichHasDefinedFromRegExpPrototype = true;
}
Expand Down Expand Up @@ -233,6 +235,16 @@ RegExpObject::Option RegExpObject::parseOption(ExecutionState& state, String* op
ErrorObject::throwBuiltinError(state, ErrorCode::SyntaxError, "RegExp has multiple 's' flags");
tempOption = (Option)(tempOption | Option::DotAll);
break;
case 'v':
if (tempOption & Option::UnicodeSets)
ErrorObject::throwBuiltinError(state, ErrorCode::SyntaxError, "RegExp has multiple 'v' flags");
tempOption = (Option)(tempOption | Option::UnicodeSets);
break;
case 'd':
if (tempOption & Option::HasIndices)
ErrorObject::throwBuiltinError(state, ErrorCode::SyntaxError, "RegExp has multiple 'd' flags");
tempOption = (Option)(tempOption | Option::HasIndices);
break;
default:
ErrorObject::throwBuiltinError(state, ErrorCode::SyntaxError, "RegExp has invalid flag");
}
Expand Down Expand Up @@ -465,6 +477,40 @@ ArrayObject* RegExpObject::createRegExpMatchedArray(ExecutionState& state, const
}
}

if (option() & HasIndices) {
ArrayObject* indices = new ArrayObject(state, len);
arr->directDefineOwnProperty(state, ObjectPropertyName(state.context()->staticStrings().indices), ObjectPropertyDescriptor(Value(indices), ObjectPropertyDescriptor::AllPresent));

size_t idx = 0;
for (unsigned i = 0; i < result.m_matchResults.size(); i++) {
for (unsigned j = 0; j < result.m_matchResults[i].size(); j++) {
if (result.m_matchResults[i][j].m_start == std::numeric_limits<unsigned>::max()) {
indices->defineOwnIndexedPropertyWithoutExpanding(state, idx++, Value());
} else {
ArrayObject* pair = new ArrayObject(state, 2);
pair->defineOwnIndexedPropertyWithoutExpanding(state, 0, Value(result.m_matchResults[i][j].m_start));
pair->defineOwnIndexedPropertyWithoutExpanding(state, 1, Value(result.m_matchResults[i][j].m_end));

indices->defineOwnIndexedPropertyWithoutExpanding(state, idx++, Value(pair));
}
}
}

if (m_yarrPattern->m_namedGroupToParenIndices.empty()) {
indices->defineOwnProperty(state, ObjectPropertyName(state.context()->staticStrings().groups), ObjectPropertyDescriptor(Value(), ObjectPropertyDescriptor::AllPresent));
} else {
Object* groups = new Object(state, Object::PrototypeIsNull);
for (auto it = m_yarrPattern->m_captureGroupNames.begin(); it != m_yarrPattern->m_captureGroupNames.end(); ++it) {
auto foundMapElement = m_yarrPattern->m_namedGroupToParenIndices.find(*it);
if (foundMapElement != m_yarrPattern->m_namedGroupToParenIndices.end()) {
groups->directDefineOwnProperty(state, ObjectPropertyName(state, it->impl()),
ObjectPropertyDescriptor(indices->getOwnProperty(state, ObjectPropertyName(state, foundMapElement->second[0])).value(state, this), ObjectPropertyDescriptor::AllPresent));
}
}
indices->directDefineOwnProperty(state, ObjectPropertyName(state.context()->staticStrings().groups), ObjectPropertyDescriptor(Value(groups), ObjectPropertyDescriptor::AllPresent));
}
}

if (m_yarrPattern->m_namedGroupToParenIndices.empty()) {
arr->defineOwnProperty(state, ObjectPropertyName(state.context()->staticStrings().groups), ObjectPropertyDescriptor(Value(), ObjectPropertyDescriptor::AllPresent));
} else {
Expand Down Expand Up @@ -533,6 +579,16 @@ Value RegExpObject::regexpFlagsValue(ExecutionState& state, Object* obj)
}
}

Optional<String*> RegExpObject::checkRegExpSyntax(String* pattern, String* flags)
{
JSC::Yarr::ErrorCode errorCode = JSC::Yarr::checkSyntax(pattern, flags);
if (errorCode != JSC::Yarr::ErrorCode::NoError) {
auto str = JSC::Yarr::errorMessage(errorCode);
return String::fromASCII(str, strlen(str));
}
return nullptr;
}

String* RegExpObject::computeRegExpOptionString(ExecutionState& state, Object* obj)
{
char flags[8] = { 0 };
Expand Down
2 changes: 2 additions & 0 deletions src/runtime/RegExpObject.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@ class RegExpObject : public DerivedObject {
static String* computeRegExpOptionString(ExecutionState& state, Object* obj);
static String* regexpSourceValue(ExecutionState& state, Object* obj);
static Value regexpFlagsValue(ExecutionState& state, Object* obj);
// returns error string if there is error
static Optional<String*> checkRegExpSyntax(String* pattern, String* flags);

protected:
explicit RegExpObject(ExecutionState& state, Object* proto, bool hasLastIndex = true);
Expand Down
1 change: 1 addition & 0 deletions src/runtime/StaticStrings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ void StaticStrings::initStaticStrings()
INIT_STATIC_STRING(getFlags, "get flags");
INIT_STATIC_STRING(getFormat, "get format");
INIT_STATIC_STRING(getGlobal, "get global");
INIT_STATIC_STRING(getHasIndices, "get hasIndices");
INIT_STATIC_STRING(getHourCycle, "get hourCycle");
INIT_STATIC_STRING(getHourCycles, "get hourCycles");
INIT_STATIC_STRING(getIgnoreCase, "get ignoreCase");
Expand Down
2 changes: 2 additions & 0 deletions src/runtime/StaticStrings.h
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ namespace Escargot {
F(implements) \
F(imul) \
F(includes) \
F(indices) \
F(index) \
F(indexOf) \
F(input) \
Expand Down Expand Up @@ -1002,6 +1003,7 @@ class StaticStrings {
AtomicString getFlags;
AtomicString getFormat;
AtomicString getGlobal;
AtomicString getHasIndices;
AtomicString getHourCycle;
AtomicString getHourCycles;
AtomicString getIgnoreCase;
Expand Down
10 changes: 10 additions & 0 deletions src/runtime/ThreadLocal.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,16 @@ class ThreadLocal {
return g_stackLimit;
}

// Use this limit if you want to use more stack
static size_t extendedStackLimit()
{
#ifdef STACK_GROWS_DOWN
return stackLimit() + STACK_FREESPACE_FROM_LIMIT / 2;
#else
return stackLimit() - STACK_FREESPACE_FROM_LIMIT / 2;
#endif
}

static std::mt19937& randEngine()
{
ASSERT(inited && !!g_randEngine);
Expand Down
2 changes: 1 addition & 1 deletion test/vendortest
52 changes: 52 additions & 0 deletions third_party/yarr/HashMap.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Copyright (c) 2024-present Samsung Electronics Co., Ltd
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this library; see the file COPYING.LIB. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/

#pragma once

#include <unordered_map>
#include <iterator>

namespace WTF {

template <typename Key, typename Value, typename Allocator = std::allocator<std::pair<Key const, Value>>>
class HashMap : public std::unordered_map<Key, Value, std::hash<Key>, std::equal_to<Key>, Allocator> {
public:
struct AddResult {
bool isNewEntry;
typename std::unordered_map<Key, Value, std::hash<Key>, std::equal_to<Key>, Allocator>::iterator iterator;
};
AddResult add(const Key& k, const Value& v)
{
AddResult r;
auto result = std::unordered_map<Key, Value, std::hash<Key>, std::equal_to<Key>, Allocator>::insert(std::make_pair(k, v));
r.iterator = result.first;
r.isNewEntry = result.second;
return r;
}

const Value& get(const Key& k)
{
return std::unordered_map<Key, Value, std::hash<Key>, std::equal_to<Key>, Allocator>::find(k)->second;
}
};

} // namespace WTF

using WTF::HashMap;
Loading
Loading