Skip to content

Commit

Permalink
Merge pull request #24 from bab2min/develop
Browse files Browse the repository at this point in the history
direct-managing memory by using mi_stl_allocator
  • Loading branch information
bab2min authored Jan 19, 2021
2 parents 06ffe4e + bc1d628 commit 971c646
Show file tree
Hide file tree
Showing 8 changed files with 83 additions and 43 deletions.
18 changes: 12 additions & 6 deletions evaluator.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -100,21 +100,27 @@
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental>
<IncludePath>$(ProjectDir)mimalloc/include/;$(VC_IncludePath);$(WindowsSDK_IncludePath);</IncludePath>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release(LODA_TXT)|x64'">
<LinkIncremental>false</LinkIncremental>
<IncludePath>$(ProjectDir)mimalloc/include/;$(VC_IncludePath);$(WindowsSDK_IncludePath);</IncludePath>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<LinkIncremental>true</LinkIncremental>
<IncludePath>$(ProjectDir)mimalloc/include/;$(VC_IncludePath);$(WindowsSDK_IncludePath);</IncludePath>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental>
<IncludePath>$(ProjectDir)mimalloc/include/;$(VC_IncludePath);$(WindowsSDK_IncludePath);</IncludePath>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LinkIncremental>false</LinkIncremental>
<IncludePath>$(ProjectDir)mimalloc/include/;$(VC_IncludePath);$(WindowsSDK_IncludePath);</IncludePath>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release(LODA_TXT)|Win32'">
<LinkIncremental>false</LinkIncremental>
<IncludePath>$(ProjectDir)mimalloc/include/;$(VC_IncludePath);$(WindowsSDK_IncludePath);</IncludePath>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
Expand All @@ -123,7 +129,7 @@
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>USE_MIMALLOC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<AdditionalOptions>/Qvec-report:1 %(AdditionalOptions)</AdditionalOptions>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
Expand All @@ -142,7 +148,7 @@
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>LOAD_TXT;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>USE_MIMALLOC;LOAD_TXT;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<AdditionalOptions>/Qvec-report:1 %(AdditionalOptions)</AdditionalOptions>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
Expand All @@ -159,7 +165,7 @@
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>USE_MIMALLOC;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
Expand All @@ -170,7 +176,7 @@
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>USE_MIMALLOC;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
Expand All @@ -184,7 +190,7 @@
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>USE_MIMALLOC;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
</ClCompile>
<Link>
Expand All @@ -200,7 +206,7 @@
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>LOAD_TXT;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>USE_MIMALLOC;LOAD_TXT;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
</ClCompile>
<Link>
Expand Down
2 changes: 1 addition & 1 deletion src/core/KModelMgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ void KModelMgr::loadMorphBin(_Istream& is)
forms.resize(formSize);
morphemes.resize(morphemeSize);

auto mapper = [this](size_t p)->const KMorpheme*
auto mapper = [this](size_t p)
{
return (const KMorpheme*)p;
};
Expand Down
14 changes: 7 additions & 7 deletions src/core/KTrie.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@
using namespace std;
using namespace kiwi;

vector<KGraphNode> KTrie::split(const k_string& str, const PatternMatcher* pm, size_t matchOptions) const
mvector<KGraphNode> KTrie::split(const k_string& str, const PatternMatcher* pm, size_t matchOptions) const
{
vector<KGraphNode> ret;
mvector<KGraphNode> ret;
ret.reserve(8);
ret.emplace_back();
size_t n = 0;
vector<const KForm*> candidates;
mvector<const KForm*> candidates;
const KTrie* curTrie = this;
unordered_map<uint32_t, int> spacePos;
munordered_map<uint32_t, int> spacePos;
size_t lastSpecialEndPos = 0, specialStartPos = 0;
KPOSTag chrType, lastChrType = KPOSTag::UNKNOWN;
auto branchOut = [&](bool makeLongMatch = false)
Expand Down Expand Up @@ -317,9 +317,9 @@ KTrie KTrie::loadFromBin(std::istream & is, const KForm* base)
return t;
}

vector<KGraphNode> KGraphNode::removeUnconnected(const vector<KGraphNode>& graph)
mvector<KGraphNode> KGraphNode::removeUnconnected(const mvector<KGraphNode>& graph)
{
vector<uint16_t> connectedList(graph.size()), newIndexDiff(graph.size());
mvector<uint16_t> connectedList(graph.size()), newIndexDiff(graph.size());
connectedList[graph.size() - 1] = true;
connectedList[0] = true;
// forward searching
Expand Down Expand Up @@ -368,7 +368,7 @@ vector<KGraphNode> KGraphNode::removeUnconnected(const vector<KGraphNode>& graph
newIndexDiff[i] = i + 1 - newIndexDiff[i];
}

vector<KGraphNode> ret;
mvector<KGraphNode> ret;
ret.reserve(connectedCnt);
for (size_t i = 0; i < graph.size(); ++i)
{
Expand Down
4 changes: 2 additions & 2 deletions src/core/KTrie.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ namespace kiwi

KGraphNode* getPrev(size_t idx) const { return prevs[idx] ? (KGraphNode*)this - prevs[idx] : nullptr; }

static std::vector<KGraphNode> removeUnconnected(const std::vector<KGraphNode>& graph);
static mvector<KGraphNode> removeUnconnected(const mvector<KGraphNode>& graph);

void addPrev(size_t distance)
{
Expand All @@ -39,7 +39,7 @@ namespace kiwi

struct KTrie : public Trie<char16_t, const KForm*, OverriddenMap<std::map<char16_t, int32_t>>>
{
std::vector<KGraphNode> split(const k_string& str, const PatternMatcher* pm, size_t matchOptions) const;
mvector<KGraphNode> split(const k_string& str, const PatternMatcher* pm, size_t matchOptions) const;
const KForm* findForm(const k_string& str) const;
KTrie* getNext(k_char i) const { return (KTrie*)Trie::getNext(i); }
KTrie* getFail() const { return (KTrie*)Trie::getFail(); }
Expand Down
30 changes: 13 additions & 17 deletions src/core/Kiwi.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
#ifdef USE_MIMALLOC
#include <mimalloc-new-delete.h>
#endif

#include <future>
#include <future>
#include "Kiwi.h"
#include "Utils.h"
#include "KFeatureTestor.h"
Expand Down Expand Up @@ -287,8 +283,8 @@ struct MInfo
};

struct WordLL;
using MInfos = vector<MInfo>;
using WordLLs = vector<WordLL>;
using MInfos = mvector<MInfo>;
using WordLLs = mvector<WordLL>;

struct WordLL
{
Expand Down Expand Up @@ -327,7 +323,7 @@ auto findNthLargest(_Iter first, _Iter last, size_t nth, _Key&& fn, _Filter&& fi
{
using KeyType = decltype(fn(*first));

std::vector<KeyType> v;
mvector<KeyType> v;
for (; first != last; ++first)
{
if(filter(*first)) v.emplace_back(fn(*first));
Expand All @@ -338,7 +334,7 @@ auto findNthLargest(_Iter first, _Iter last, size_t nth, _Key&& fn, _Filter&& fi
}

template<class _Type>
void evalTrigram(const KNLangModel::Node* rootNode, const KMorpheme* morphBase, const vector<k_string>& ownForms, const WordLL** wBegin, const WordLL** wEnd,
void evalTrigram(const KNLangModel::Node* rootNode, const KMorpheme* morphBase, const mvector<k_string>& ownForms, const WordLL** wBegin, const WordLL** wEnd,
array<WID, 4> seq, size_t chSize, const KMorpheme* curMorph, const KGraphNode* node, _Type& maxWidLL)
{
for (; wBegin != wEnd; ++wBegin)
Expand Down Expand Up @@ -384,12 +380,12 @@ void evalTrigram(const KNLangModel::Node* rootNode, const KMorpheme* morphBase,
}
}

vector<pair<Kiwi::path, float>> Kiwi::findBestPath(const vector<KGraphNode>& graph, const KNLangModel * knlm, const KMorpheme* morphBase, size_t topN) const
mvector<pair<Kiwi::path, float>> Kiwi::findBestPath(const mvector<KGraphNode>& graph, const KNLangModel * knlm, const KMorpheme* morphBase, size_t topN) const
{
vector<WordLLs> cache(graph.size());
mvector<WordLLs> cache(graph.size());
const KGraphNode* startNode = &graph.front();
const KGraphNode* endNode = &graph.back();
vector<k_string> ownFormList;
mvector<k_string> ownFormList;

vector<const KMorpheme*> unknownNodeCands, unknownNodeLCands;
unknownNodeCands.emplace_back(morphBase + (size_t)KPOSTag::NNG + 1);
Expand Down Expand Up @@ -435,8 +431,8 @@ vector<pair<Kiwi::path, float>> Kiwi::findBestPath(const vector<KGraphNode>& gra
condV = curMorph->vowel;
condP = curMorph->polar;

unordered_map<WID, vector<WordLLP>> maxWidLL;
vector<const WordLL*> works;
munordered_map<WID, mvector<WordLLP>> maxWidLL;
mvector<const WordLL*> works;
works.reserve(8);
float discountForCombining = 0;

Expand Down Expand Up @@ -645,7 +641,7 @@ vector<pair<Kiwi::path, float>> Kiwi::findBestPath(const vector<KGraphNode>& gra

#endif

vector<pair<path, float>> ret;
mvector<pair<path, float>> ret;
for (size_t i = 0; i < min(topN, cand.size()); ++i)
{
path mv(cand[i].morphs.size() - 1);
Expand Down Expand Up @@ -757,7 +753,7 @@ void Kiwi::perform(size_t topN, const function<u16string(size_t)>& reader, const
std::vector<KResult> Kiwi::analyzeSent(const std::u16string::const_iterator & sBegin, const std::u16string::const_iterator & sEnd, size_t topN, size_t matchOptions) const
{
auto nstr = normalizeHangul({ sBegin, sEnd });
vector<uint32_t> posMap(nstr.size() + 1);
mvector<uint32_t> posMap(nstr.size() + 1);
for (size_t i = 0; i < nstr.size(); ++i)
{
posMap[i + 1] = posMap[i] + (isHangulCoda(nstr[i]) ? 0 : 1);
Expand Down Expand Up @@ -820,7 +816,7 @@ vector<KResult> Kiwi::analyze(const u16string & str, size_t topN, size_t matchOp
{
if (!mdl->getTrie()) throw KiwiException("Model should be prepared before analyzing.");
auto chunk = str.begin();
vector<u16string::const_iterator> sents;
mvector<u16string::const_iterator> sents;
sents.emplace_back(chunk);
while (chunk != str.end())
{
Expand Down
6 changes: 3 additions & 3 deletions src/core/Kiwi.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ namespace kiwi
friend std::ostream& (::operator<<) (std::ostream& os, const KWordPair& kp);
};

typedef std::pair<std::vector<KWordPair>, float> KResult;
using KResult = std::pair<std::vector<KWordPair>, float>;

class KModelMgr;

Expand All @@ -59,8 +59,8 @@ namespace kiwi
bool integrateAllomorph;
std::unique_ptr<PatternMatcher> pm;
KWordDetector detector;
typedef std::vector<std::tuple<const KMorpheme*, k_string, uint32_t>> path;
std::vector<std::pair<path, float>> findBestPath(const std::vector<KGraphNode>& graph, const KNLangModel * knlm, const KMorpheme* morphBase, size_t topN) const;
using path = mvector<std::tuple<const KMorpheme*, k_string, uint32_t>>;
mvector<std::pair<path, float>> findBestPath(const mvector<KGraphNode>& graph, const KNLangModel * knlm, const KMorpheme* morphBase, size_t topN) const;
std::vector<KResult> analyzeSent(const std::u16string::const_iterator& sBegin, const std::u16string::const_iterator& sEnd, size_t topN, size_t matchOptions) const;
public:
enum
Expand Down
41 changes: 39 additions & 2 deletions src/core/KiwiHeader.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@
#define TRIE_ALLOC_ARRAY
#define KSTR(x) u##x

#ifdef USE_MIMALLOC
#include <mimalloc.h>
#endif

namespace kiwi
{
template<typename T, typename... Args>
Expand All @@ -54,9 +58,42 @@ namespace kiwi
using std::runtime_error::runtime_error;
};

#ifdef USE_MIMALLOC
template<typename _Ty>
using mvector = std::vector<_Ty, mi_stl_allocator<_Ty>>;

template<typename _K, typename _V>
using munordered_map = std::unordered_map<_K, _V, std::hash<_K>, std::equal_to<_K>, mi_stl_allocator<std::pair<const _K, _V>>>;

using k_string = std::basic_string<k_char, std::char_traits<k_char>, mi_stl_allocator<k_char>>;
using k_stringstream = std::basic_stringstream<k_char, std::char_traits<k_char>, mi_stl_allocator<k_char>>;
using k_vchar = mvector<k_char>;
using k_vpcf = mvector<std::pair<k_vchar, float>>;
#else
template<typename _Ty>
using mvector = std::vector<_Ty>;

template<typename _K, typename _V>
using munordered_map = std::unordered_map<_K, _V>;

using k_string = std::basic_string<k_char>;
using k_stringstream = std::basic_stringstream<k_char>;
using k_vchar = std::vector<k_char>;
using k_vpcf = std::vector<std::pair<k_vchar, float>>;
using k_vchar = mvector<k_char>;
using k_vpcf = mvector<std::pair<k_vchar, float>>;
#endif
}

#ifdef USE_MIMALLOC
namespace std
{
template<>
struct hash<kiwi::k_string>
{
size_t operator()(const kiwi::k_string& s) const
{
return hash<basic_string<kiwi::k_char>>{}({ s.begin(), s.end() });
}
};

}
#endif
11 changes: 6 additions & 5 deletions src/core/serializer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,17 +83,18 @@ namespace kiwi
if (!is.read((char*)&v, sizeof(_Ty))) throw std::ios_base::failure(std::string{ "reading type '" } +typeid(_Ty).name() + "' failed");
}

inline void writeToBinStreamImpl(std::ostream& os, const std::u16string& v)
template<class CharTy, class T, class Al>
inline void writeToBinStreamImpl(std::ostream& os, const std::basic_string<CharTy, T, Al>& v)
{
writeToBinStream<uint32_t>(os, v.size());
if (!os.write((const char*)&v[0], v.size() * sizeof(char16_t))) throw std::ios_base::failure(std::string{ "writing type '" } +typeid(k_string).name() + "' failed");
if (!os.write((const char*)&v[0], v.size() * sizeof(CharTy))) throw std::ios_base::failure("writing type 'string' failed");
}

template<class _Istream>
inline void readFromBinStreamImpl(_Istream& is, std::u16string& v)
template<class _Istream, class CharTy, class T, class Al>
inline void readFromBinStreamImpl(_Istream& is, std::basic_string<CharTy, T, Al>& v)
{
v.resize(readFromBinStream<uint32_t>(is));
if (!is.read((char*)&v[0], v.size() * sizeof(char16_t))) throw std::ios_base::failure(std::string{ "reading type '" } +typeid(k_string).name() + "' failed");
if (!is.read((char*)&v[0], v.size() * sizeof(CharTy))) throw std::ios_base::failure("reading type 'string' failed");
}

template<class _Ty1, class _Ty2>
Expand Down

0 comments on commit 971c646

Please sign in to comment.