From 0aecc5043e39d63d02d5b456f138a0052dcda641 Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Thu, 18 Sep 2014 16:10:22 -0700 Subject: [PATCH 01/50] change init value to init function for hash updates --- applications/join/Aggregates.hpp | 4 ++++ applications/join/DHT_symmetric.hpp | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/applications/join/Aggregates.hpp b/applications/join/Aggregates.hpp index 89b244e50..29cd2697d 100644 --- a/applications/join/Aggregates.hpp +++ b/applications/join/Aggregates.hpp @@ -10,4 +10,8 @@ namespace Aggregates { State COUNT(State sofar, UV nextval) { return sofar + 1; } + + int64_t Zero() { + return 0; + } } diff --git a/applications/join/DHT_symmetric.hpp b/applications/join/DHT_symmetric.hpp index d9ee4fe6c..5137dd936 100644 --- a/applications/join/DHT_symmetric.hpp +++ b/applications/join/DHT_symmetric.hpp @@ -55,14 +55,14 @@ class DHT_symmetric { return object; } - template< GlobalCompletionEvent * GCE, typename UV, V (*UpF)(V oldval, UV incVal), V Init, SyncMode S = SyncMode::Async > + template< GlobalCompletionEvent * GCE, typename UV, V (*UpF)(V oldval, UV incVal), V (*Init)(void), SyncMode S = SyncMode::Async > void update( K key, UV val ) { uint64_t index = computeIndex( key ); auto target = this->self; Grappa::delegate::call(index, [key, val, target]() { // inserts initial value only if the key is not yet present - std::pair entry(key, Init); + std::pair entry(key, Init()); auto res = target->local_map->insert(entry); auto resIt = res.first; //auto resNew = res.second; // perform the update in place From 47f9fccdb0bd6caa78d5a8fa2b4ebbc408f6f400 Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Thu, 18 Sep 2014 16:11:13 -0700 Subject: [PATCH 02/50] deal temporarily with disambiguation like this --- applications/join/relation_io.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/applications/join/relation_io.hpp b/applications/join/relation_io.hpp index e342611bf..5f73a675d 100644 --- a/applications/join/relation_io.hpp +++ b/applications/join/relation_io.hpp @@ -350,7 +350,7 @@ GlobalAddress readTuples( std::string fn, int64_t numTuples ) { readFields.push_back(f); } - T val( readFields ); + T val( readFields, false, false ); VLOG(5) << val; From 9df6a189c9c447d0758a31d5220ae682e2c7bffe Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Fri, 19 Sep 2014 14:13:59 -0700 Subject: [PATCH 03/50] const ref for combine function --- applications/join/Aggregates.hpp | 4 ++-- applications/join/DHT_symmetric.hpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/applications/join/Aggregates.hpp b/applications/join/Aggregates.hpp index 29cd2697d..a98296c18 100644 --- a/applications/join/Aggregates.hpp +++ b/applications/join/Aggregates.hpp @@ -2,12 +2,12 @@ namespace Aggregates { template < typename State, typename UV > - State SUM(State sofar, UV nextval) { + State SUM(const State& sofar, const UV& nextval) { return sofar + nextval; } template < typename State, typename UV > - State COUNT(State sofar, UV nextval) { + State COUNT(const State& sofar, const UV& nextval) { return sofar + 1; } diff --git a/applications/join/DHT_symmetric.hpp b/applications/join/DHT_symmetric.hpp index 5137dd936..27c6e5567 100644 --- a/applications/join/DHT_symmetric.hpp +++ b/applications/join/DHT_symmetric.hpp @@ -55,7 +55,7 @@ class DHT_symmetric { return object; } - template< GlobalCompletionEvent * GCE, typename UV, V (*UpF)(V oldval, UV incVal), V (*Init)(void), SyncMode S = SyncMode::Async > + template< GlobalCompletionEvent * GCE, typename UV, V (*UpF)(const V& oldval, const UV& incVal), V (*Init)(void), SyncMode S = SyncMode::Async > void update( K key, UV val ) { uint64_t index = computeIndex( key ); auto target = this->self; From ac74936ede001eb27b3de9a62152b377831f863b Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Sat, 20 Sep 2014 12:11:11 -0700 Subject: [PATCH 04/50] refactor to use c++ standard hash format --- applications/join/DHT_symmetric.hpp | 17 +++++++++-------- applications/join/DoubleDHT.hpp | 20 ++++++++++---------- applications/join/MatchesDHT.hpp | 24 ++++++++++++------------ 3 files changed, 31 insertions(+), 30 deletions(-) diff --git a/applications/join/DHT_symmetric.hpp b/applications/join/DHT_symmetric.hpp index 27c6e5567..6a6a7e581 100644 --- a/applications/join/DHT_symmetric.hpp +++ b/applications/join/DHT_symmetric.hpp @@ -7,6 +7,7 @@ #include #include +#include //GRAPPA_DECLARE_METRIC(MaxMetric, max_cell_length); @@ -15,23 +16,23 @@ GRAPPA_DECLARE_METRIC(SummarizingMetric, hash_tables_lookup_steps); // for naming the types scoped in DHT_symmetric -#define DHT_symmetric_TYPE(type) typename DHT_symmetric::type -#define DHT_symmetric_T DHT_symmetric +#define DHT_symmetric_TYPE(type) typename DHT_symmetric::type +#define DHT_symmetric_T DHT_symmetric // Hash table for joins // * allows multiple copies of a Key // * lookups return all Key matches -template +template class DHT_symmetric { private: // private members GlobalAddress< DHT_symmetric_T > self; - std::unordered_map * local_map; + std::unordered_map * local_map; size_t partitions; - uint64_t computeIndex( K key ) { - return HF(key) % partitions; + size_t computeIndex( K key ) { + return Hash()(key) % partitions; } // for creating local DHT_symmetric @@ -57,7 +58,7 @@ class DHT_symmetric { template< GlobalCompletionEvent * GCE, typename UV, V (*UpF)(const V& oldval, const UV& incVal), V (*Init)(void), SyncMode S = SyncMode::Async > void update( K key, UV val ) { - uint64_t index = computeIndex( key ); + auto index = computeIndex( key ); auto target = this->self; Grappa::delegate::call(index, [key, val, target]() { @@ -72,7 +73,7 @@ class DHT_symmetric { template< GlobalCompletionEvent * GCE, SyncMode S = SyncMode::Async > void insert_unique( K key, V val ) { - uint64_t index = computeIndex( key ); + auto index = computeIndex( key ); auto target = this->self; Grappa::delegate::call(index, [key, val, target]() { diff --git a/applications/join/DoubleDHT.hpp b/applications/join/DoubleDHT.hpp index d35c648be..e161e7564 100644 --- a/applications/join/DoubleDHT.hpp +++ b/applications/join/DoubleDHT.hpp @@ -15,15 +15,15 @@ GRAPPA_DECLARE_METRIC(SimpleMetric, hash_tables_size); GRAPPA_DECLARE_METRIC(SummarizingMetric, hash_tables_lookup_steps); // for naming the types scoped in DoubleDHT -#define DDHT_TYPE(type) typename DoubleDHT::type -#define _DDHT_TYPE(type) DoubleDHT::type +#define DDHT_TYPE(type) typename DoubleDHT::type +#define _DDHT_TYPE(type) DoubleDHT::type enum class Direction { LEFT, RIGHT }; // Hash table for joins // * allows multiple copies of a Key // * lookups return all Key matches -template +template class DoubleDHT { private: @@ -54,8 +54,8 @@ class DoubleDHT { GlobalAddress< PairCell > base; size_t capacity; - uint64_t computeIndex( K key ) { - return HF(key) & (capacity - 1); + size_t computeIndex( K key ) { + return Hash()(key) & (capacity - 1); } // for creating local DoubleDHT @@ -138,14 +138,14 @@ class DoubleDHT { // for static construction DoubleDHT( ) {} - static void init_global_DHT( DoubleDHT * globally_valid_local_pointer, size_t capacity ) { + static void init_global_DHT( DoubleDHT * globally_valid_local_pointer, size_t capacity ) { uint32_t capacity_exp = log2(capacity); size_t capacity_powerof2 = pow(2, capacity_exp); GlobalAddress base = Grappa::global_alloc( capacity_powerof2 ); Grappa::on_all_cores( [globally_valid_local_pointer,base,capacity_powerof2] { - *globally_valid_local_pointer = DoubleDHT( base, capacity_powerof2 ); + *globally_valid_local_pointer = DoubleDHT( base, capacity_powerof2 ); }); Grappa::forall( base, capacity_powerof2, []( int64_t i, PairCell& c ) { @@ -154,7 +154,7 @@ class DoubleDHT { }); } - static void set_RO_global( DoubleDHT * globally_valid_local_pointer ) { + static void set_RO_global( DoubleDHT * globally_valid_local_pointer ) { //noop //Grappa::forall( globally_valid_local_pointer->base, globally_valid_local_pointer->capacity, []( int64_t i, Cell& c ) { //}); @@ -203,7 +203,7 @@ class DoubleDHT { // version of lookup that takes a continuation instead of returning results back template< typename CF, Grappa::GlobalCompletionEvent * GCE = &Grappa::impl::local_gce, bool Unique=false > void insert_lookup_iter_left ( K key, VL val, CF f ) { - uint64_t index = computeIndex( key ); + auto index = computeIndex( key ); GlobalAddress< PairCell > target = base + index; // FIXME: remove 'this' capture when using gcc4.8, this is just a bug in 4.7 @@ -242,7 +242,7 @@ class DoubleDHT { template< typename CF, Grappa::GlobalCompletionEvent * GCE = &Grappa::impl::local_gce, bool Unique=false > void insert_lookup_iter_right ( K key, VR val, CF f ) { - uint64_t index = computeIndex( key ); + auto index = computeIndex( key ); GlobalAddress< PairCell > target = base + index; // FIXME: remove 'this' capture when using gcc4.8, this is just a bug in 4.7 diff --git a/applications/join/MatchesDHT.hpp b/applications/join/MatchesDHT.hpp index c44745613..af6d6cfc5 100644 --- a/applications/join/MatchesDHT.hpp +++ b/applications/join/MatchesDHT.hpp @@ -25,12 +25,12 @@ GRAPPA_DECLARE_METRIC(SimpleMetric, hash_called_inserts); // for naming the types scoped in MatchesDHT -#define MDHT_TYPE(type) typename MatchesDHT::type +#define MDHT_TYPE(type) typename MatchesDHT::type // Hash table for joins // * allows multiple copies of a Key // * lookups return all Key matches -template +template class MatchesDHT { private: @@ -58,8 +58,8 @@ class MatchesDHT { GlobalAddress< Cell > base; size_t capacity; - uint64_t computeIndex( K key ) { - return HF(key) & (capacity - 1); + size_t computeIndex( K key ) { + return Hash()(key) & (capacity - 1); } // for creating local MatchesDHT @@ -94,14 +94,14 @@ class MatchesDHT { // for static construction MatchesDHT( ) {} - static void init_global_DHT( MatchesDHT * globally_valid_local_pointer, size_t capacity ) { + static void init_global_DHT( MatchesDHT * globally_valid_local_pointer, size_t capacity ) { uint32_t capacity_exp = log2(capacity); size_t capacity_powerof2 = pow(2, capacity_exp); GlobalAddress base = Grappa::global_alloc( capacity_powerof2 ); Grappa::on_all_cores( [globally_valid_local_pointer,base,capacity_powerof2] { - *globally_valid_local_pointer = MatchesDHT( base, capacity_powerof2 ); + *globally_valid_local_pointer = MatchesDHT( base, capacity_powerof2 ); }); Grappa::forall( base, capacity_powerof2, []( int64_t i, Cell& c ) { @@ -135,7 +135,7 @@ class MatchesDHT { } - static void set_RO_global( MatchesDHT * globally_valid_local_pointer ) { + static void set_RO_global( MatchesDHT * globally_valid_local_pointer ) { Grappa::forall( globally_valid_local_pointer->base, globally_valid_local_pointer->capacity, []( int64_t i, Cell& c ) { // list of entries in this cell std::list * entries = c.entries; @@ -159,7 +159,7 @@ class MatchesDHT { } uint64_t lookup ( K key, GlobalAddress * vals ) { - uint64_t index = computeIndex( key ); + auto index = computeIndex( key ); GlobalAddress< Cell > target = base + index; // FIXME: remove 'this' capture when using gcc4.8, this is just a bug in 4.7 @@ -186,7 +186,7 @@ class MatchesDHT { // version of lookup that takes a continuation instead of returning results back template< typename CF, Grappa::GlobalCompletionEvent * GCE = &Grappa::impl::local_gce > void lookup_iter ( K key, CF f ) { - uint64_t index = computeIndex( key ); + auto index = computeIndex( key ); GlobalAddress< Cell > target = base + index; // FIXME: remove 'this' capture when using gcc4.8, this is just a bug in 4.7 @@ -220,7 +220,7 @@ class MatchesDHT { // version of lookup that takes a continuation instead of returning results back template< typename CF, Grappa::GlobalCompletionEvent * GCE = &Grappa::impl::local_gce > void lookup ( K key, CF f ) { - uint64_t index = computeIndex( key ); + auto index = computeIndex( key ); GlobalAddress< Cell > target = base + index; Grappa::delegate::call( target.core(), [key, target, f]() { @@ -239,7 +239,7 @@ class MatchesDHT { // // returns true if the set already contains the key void insert_unique( K key, V val ) { - uint64_t index = computeIndex( key ); + auto index = computeIndex( key ); GlobalAddress< Cell > target = base + index; Grappa::delegate::call( target.core(), [key,val,target]() { // TODO: have an additional version that returns void // to upgrade to call_async @@ -277,7 +277,7 @@ class MatchesDHT { template< Grappa::GlobalCompletionEvent * GCE = &Grappa::impl::local_gce > void insert_async( K key, V val ) { - uint64_t index = computeIndex( key ); + auto index = computeIndex( key ); GlobalAddress< Cell > target = base + index; if (target.core() == Grappa::mycore()) { From 212dbc7e340e6b4bff0a8ef9af9be8c52d99f70d Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Sat, 20 Sep 2014 12:23:25 -0700 Subject: [PATCH 05/50] forgot a use of unordered_map without Hash --- applications/join/DHT_symmetric.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/applications/join/DHT_symmetric.hpp b/applications/join/DHT_symmetric.hpp index 6a6a7e581..1c956a5f4 100644 --- a/applications/join/DHT_symmetric.hpp +++ b/applications/join/DHT_symmetric.hpp @@ -39,7 +39,7 @@ class DHT_symmetric { DHT_symmetric( GlobalAddress self ) : self(self) , partitions(Grappa::cores()) - , local_map(new std::unordered_map()) + , local_map(new std::unordered_map()) {} public: From cd465a21bcec4dd4bd5328d82c54898c31d62f0d Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Sun, 21 Sep 2014 10:30:32 -0700 Subject: [PATCH 06/50] make ascii read compatible with the new tuple --- applications/join/relation_io.hpp | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/applications/join/relation_io.hpp b/applications/join/relation_io.hpp index 5f73a675d..d2c2d68e9 100644 --- a/applications/join/relation_io.hpp +++ b/applications/join/relation_io.hpp @@ -144,7 +144,7 @@ size_t readTuplesUnordered( std::string fn, GlobalAddress * buf_addr, int64_t std::string data_path = FLAGS_relations+"/"+fn; size_t file_size = fs::file_size( data_path ); size_t ntuples = file_size / row_size_bytes; - CHECK( ntuples * row_size_bytes == file_size ) << "File is ill-formatted; perhaps not all rows have same columns?"; + CHECK( ntuples * row_size_bytes == file_size ) << "File " << data_path << " is ill-formatted; perhaps not all rows have same columns? file size = " << file_size << " row_size_bytes = " << row_size_bytes; VLOG(1) << fn << " has " << ntuples << " rows"; auto tuples = Grappa::global_alloc(ntuples); @@ -261,6 +261,8 @@ int64_t toInt(std::string& s) { double toDouble(std::string& s) { return std::stod(s); } + + #include template< typename N=int64_t, typename Parser=decltype(toInt) > void convert2bin( std::string fn, Parser parser=&toInt, char * separators=" ", uint64_t burn=0 ) { @@ -336,22 +338,11 @@ GlobalAddress readTuples( std::string fn, int64_t numTuples ) { for (int ignore=s; ignore readFields; - - // TODO: compiler should use catalog to statically insert num fields - std::stringstream ss(line); - while (true) { - std::string buf; - ss >> buf; - if (buf.compare("") == 0) break; - - auto f = std::stoi(buf); - readFields.push_back(f); - } + std::istringstream iss(line); + auto val = T::fromIStream(iss); - T val( readFields, false, false ); - VLOG(5) << val; // write edge to location From e77cefda7da4f45e85fe9b204924f9b1e62f133c Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Sun, 21 Sep 2014 10:41:11 -0700 Subject: [PATCH 07/50] comment out hash join shuffle --- applications/join/CMakeLists.txt | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/applications/join/CMakeLists.txt b/applications/join/CMakeLists.txt index 7fbfe4c5d..bccb29b91 100644 --- a/applications/join/CMakeLists.txt +++ b/applications/join/CMakeLists.txt @@ -23,11 +23,14 @@ set(QUERYLIB_SOURCES stats.cpp MapReduce.cpp MapReduce.hpp - HashJoin.hpp - HashJoin.cpp Aggregates.hpp DHT_symmetric.hpp ) +#FIXME: these MapReduce Hash joins belong in the above sources + #HashJoin.hpp + #HashJoin.cpp + + set(QUERYIO_SOURCES relation_io.hpp relation_io.cpp From 9c2d2bcfce0df45d6d37d70600cfdaf9851f1de0 Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Sun, 21 Sep 2014 20:21:53 -0700 Subject: [PATCH 08/50] use new tuple ostream binary --- applications/join/relation_io.hpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/applications/join/relation_io.hpp b/applications/join/relation_io.hpp index d2c2d68e9..5a4860580 100644 --- a/applications/join/relation_io.hpp +++ b/applications/join/relation_io.hpp @@ -230,14 +230,11 @@ void writeTuplesUnordered(std::vector * vec, std::string fn ) { VLOG(5) << "writing"; for (auto it = vec->begin(); it < vec->end(); it++) { - for (int j = 0; j < it->numFields(); j++) { - int64_t val = it->get(j); - data_file.write((char*)&val, sizeof(val)); - } + it->toOStream(data_file); } data_file.close(); - }); + }); } void writeSchema(std::string names, std::string types, std::string fn ) { @@ -336,7 +333,7 @@ GlobalAddress readTuples( std::string fn, int64_t numTuples ) { Grappa::forall_here<&Grappa::impl::local_gce, 1>(0, numTuples, [tuples,numTuples,&fin,&testfile](int64_t s, int64_t n) { std::string line; for (int ignore=s; ignore Date: Sun, 28 Sep 2014 22:24:17 -0700 Subject: [PATCH 11/50] fix non initialization bug in relation_io --- applications/join/relation_io.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/applications/join/relation_io.hpp b/applications/join/relation_io.hpp index 68576cac6..29fc16e17 100644 --- a/applications/join/relation_io.hpp +++ b/applications/join/relation_io.hpp @@ -149,7 +149,7 @@ size_t readTuplesUnordered( std::string fn, GlobalAddress * buf_addr, int64_t auto tuples = Grappa::global_alloc(ntuples); - size_t offset_counter; + size_t offset_counter = 0; auto offset_counter_addr = make_global( &offset_counter, Grappa::mycore() ); // we will broadcast the file name as bytes From 9c8f7aca6209222de9a038e6fa7b4b7247da2a6e Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Mon, 29 Sep 2014 09:28:59 -0700 Subject: [PATCH 12/50] igor for naive bayes --- applications/join/igor_nb.rb | 88 ++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100755 applications/join/igor_nb.rb diff --git a/applications/join/igor_nb.rb b/applications/join/igor_nb.rb new file mode 100755 index 000000000..89dcc4c30 --- /dev/null +++ b/applications/join/igor_nb.rb @@ -0,0 +1,88 @@ +#!/usr/bin/env ruby +require 'igor' + +query = ARGV[0] +plan = "" +if ARGV.length == 2 then + plan = "#{ARGV[1]}_" +end + +queryexe = "grappa_#{plan}#{query}.exe" + + +machine = ENV['GRAPPA_CLUSTER'] +if not machine then + raise "need to set GRAPPA_CLUSTER to pal or sampa" +end + + +# inherit parser, sbatch_flags +load '../../../../util/igor_common.rb' + +def expand_flags(*names) + names.map{|n| "--#{n}=%{#{n}}"}.join(' ') +end + +$datasets="/sampa/home/bdmyers/graph_datasets" + +Igor do + include Isolatable + + database "#{ENV['HOME']}/hardcode_results/nb.db", :msd + + # isolate everything needed for the executable so we can sbcast them for local execution + isolate(["#{queryexe}"], + File.dirname(__FILE__))#, + # symlinks=["sp2bench_1m"]) + + GFLAGS = Params.new { + num_starting_workers 1024 + loop_threshold 128 + aggregator_autoflush_ticks 100000 + periodic_poll_ticks 20000 + load_balance 'none' + flush_on_idle 0 + poll_on_idle 1 + nt ENV['NTUPLES'].to_i + input_file_testdata '/sampa/home/bdmyers/escience/datasets/millionsong/YearPredictionMSD_test_8attr.txt' + input_file_conditionals '/sampa/home/grappa-cmake/build/Make+Release/applications/join/conditionals' + } + + command %Q[ %{tdir}/grappa_srun --nnode=%{nnode} --ppn=%{ppn} -t 60 + -- %{tdir}/#{queryexe} --vmodule=grappa_*=%{emitlogging} + #{expand_flags(*GFLAGS.keys)} + ].gsub(/\s+/,' ') + + sbatch_flags << "--time=60" + + params { + trial 1 + nnode 2 + ppn 2 + vtag 'v1' + machine "#{machine}" + query "#{query}" + plan "#{plan}" + hash_local_cells 16*1024 + emitlogging 0 + } + params.merge!(GFLAGS) + + run { + trial 1,2,3 + nnode 16 + ppn 16 + periodic_poll_ticks 16*2500 + aggregator_autoflush_ticks 16*12500 +} + + + # required measures + #expect :query_runtime + #expect :shit + + $basic = results{|t| t.select(:vtag, :run_at,:nnode, :ppn, :query, :query_runtime, :in_memory_runtime, :nt)} +# $detail = results{|t| t.select(:nnode, :ppn, :scale, :edgefactor, :query_runtime, :ir5_final_count_max/(:ir5_final_count_min+1), :ir6_count, (:ir2_count+:ir4_count+:ir6_count)/:query_runtime, :edges_transfered/:total_edges, :total_edges) } + + #interact # enter interactive mode +end From 4392f1fd45db1563c7f6eee4928c57b7d2f69f22 Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Mon, 29 Sep 2014 09:29:47 -0700 Subject: [PATCH 13/50] sequential Output in io --- applications/join/relation_io.hpp | 39 ++++++++++++++++++------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/applications/join/relation_io.hpp b/applications/join/relation_io.hpp index 29fc16e17..c5ea31cce 100644 --- a/applications/join/relation_io.hpp +++ b/applications/join/relation_io.hpp @@ -219,22 +219,29 @@ void writeTuplesUnordered(std::vector * vec, std::string fn ) { CHECK( data_path.size() <= 2040 ); char data_path_char[2048]; sprintf(data_path_char, "%s", data_path.c_str()); - - on_all_cores( [=] { - VLOG(5) << "opening addr next"; - VLOG(5) << "opening addr " << &data_path_char; - VLOG(5) << "opening " << data_path_char; - - std::ofstream data_file(data_path_char, std::ios_base::out | std::ios_base::app | std::ios_base::binary); - CHECK( data_file.is_open() ) << data_path_char << " failed to open"; - VLOG(5) << "writing"; - - for (auto it = vec->begin(); it < vec->end(); it++) { - it->toOStream(data_file); - } - - data_file.close(); - }); + + std::ofstream for_trunc(data_path_char, std::ios_base::out | std::ios_base::trunc | std::ios_base::binary); + //no writes + for_trunc.close(); + + // sequentiall open for append and write + for (int i=0; ibegin(); it < vec->end(); it++) { + it->toOStream(data_file); + } + + data_file.close(); + return 1; + }); + } } void writeSchema(std::string scheme, std::string fn ) { From db9382338ef3cdd65c9bb0611311ab1a67483049 Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Tue, 5 May 2015 20:02:41 -0700 Subject: [PATCH 14/50] add application must go after librarys added --- applications/join/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/applications/join/CMakeLists.txt b/applications/join/CMakeLists.txt index dd36fe505..c53a20cb2 100644 --- a/applications/join/CMakeLists.txt +++ b/applications/join/CMakeLists.txt @@ -101,7 +101,6 @@ endforeach() # exe targets for generated query codes foreach(query ${GENERATED_SOURCES}) get_filename_component(query_name ${query}, NAME_WE) - add_grappa_application(${query_name}.exe ${query}) # Raco C++ environment set(RACO_DIR "$ENV{RACO_HOME}") @@ -113,6 +112,8 @@ foreach(query ${GENERATED_SOURCES}) message(FATAL_ERROR "Undefined RACO_HOME environment variable, required for generated queries applications/join/grappa_*.cpp ") endif() + + add_grappa_application(${query_name}.exe ${query}) target_link_libraries(${query_name}.exe generator querylib queryio racoc) list(APPEND GENERATED_EXES "${query_name}.exe") From d589e68ed20c6a6e98c5b7254aa37f6561c9d897 Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Wed, 13 May 2015 10:15:54 -0700 Subject: [PATCH 15/50] update Grappa IO to handle native Radish strings --- applications/join/Relation_io_tests.cpp | 256 +++++++++++++++++++----- applications/join/relation_io.hpp | 8 +- 2 files changed, 213 insertions(+), 51 deletions(-) diff --git a/applications/join/Relation_io_tests.cpp b/applications/join/Relation_io_tests.cpp index 119e13079..b7f7e6903 100644 --- a/applications/join/Relation_io_tests.cpp +++ b/applications/join/Relation_io_tests.cpp @@ -29,51 +29,177 @@ #include #include "relation_io.hpp" +#include "strings.h" + +// Unfortunately Grappa addressing does not work for things > 64 bytes... +#undef MAX_STR_LEN +#define MAX_STR_LEN 8 using namespace Grappa; BOOST_AUTO_TEST_SUITE( Relation_io_tests ); -class MaterializedTupleRef_V1_0_1 { - public: - int64_t _fields[2]; +class MaterializedTupleRef_V1_0_1_2 { + // Invariant: data stored in _fields is always in the representation + // specified by _scheme. - int64_t get(int field) const { - return _fields[field]; - } + public: + + int64_t f0; + + std::array f1; - void set(int field, int64_t val) { - _fields[field] = val; + std::array f2; + + + static constexpr int numFields() { + return 3; } - int numFields() const { - return 2; + static size_t fieldsSize() { + const MaterializedTupleRef_V1_0_1_2 _t; + return + + sizeof(_t.f0) + + + sizeof(_t.f1) + + + sizeof(_t.f2) + + + 0; } - MaterializedTupleRef_V1_0_1 () { + MaterializedTupleRef_V1_0_1_2 () { // no-op } - MaterializedTupleRef_V1_0_1 (std::vector vals) { - for (int i=0; i + //MaterializedTupleRef_V1_0_1_2 (const OT& other) { + // std::memcpy(this, &other, sizeof(MaterializedTupleRef_V1_0_1_2)); + //} + MaterializedTupleRef_V1_0_1_2 ( + const int64_t& a0 + , + + const std::array& a1 + , + + const std::array& a2 + + + ) { + + f0 = a0; + + f1 = a1; + + f2 = a2; + + } + + // shamelessly terrible disambiguation: one solution is named factory methods + //MaterializedTupleRef_V1_0_1_2 (std::vector vals, bool ignore1, bool ignore2) { + // + // f0 = vals[0]; + // + // f1 = vals[1]; + // + // f2 = vals[2]; + // + //} + + // use the tuple schema to interpret the input stream + static MaterializedTupleRef_V1_0_1_2 fromIStream(std::istream& ss) { + MaterializedTupleRef_V1_0_1_2 _ret; + + + + ss >> _ret.f0; + + + + { + std::string _temp; + ss >> _temp; + _ret.f1 = to_array(_temp); + } + + + + { + std::string _temp; + ss >> _temp; + _ret.f2 = to_array(_temp); + } + + + + return _ret; + } + + void toOStream(std::ostream& os) const { + + + os.write((char*)&f0, sizeof(int64_t)); + + + + os.write(f1.data(), (size_t)MAX_STR_LEN * sizeof(char)); + + + + os.write(f2.data(), (size_t)MAX_STR_LEN * sizeof(char)); + + + } + + void toOStreamAscii(std::ostream& os) const { + os + + << f0 << " " + + << f1 << " " + + << f2 << std::endl; } + //template + //MaterializedTupleRef_V1_0_1_2 (const Tuple& v0, const T& from) { + // constexpr size_t v0_size = std::tuple_size::value; + // constexpr int from_size = T::numFields(); + // static_assert(MaterializedTupleRef_V1_0_1_2::numFields() == (v0_size + from_size), "constructor only works on same number of total fields"); + // TupleUtils::assign<0, decltype(_scheme)>(_fields, v0); + // std::memcpy(((char*)&_fields)+v0_size*sizeof(int64_t), &(from._fields), from_size*sizeof(int64_t)); + //} + + //template + //MaterializedTupleRef_V1_0_1_2 (const Tuple& v0) { + // static_assert(MaterializedTupleRef_V1_0_1_2::numFields() == (std::tuple_size::value), "constructor only works on same number of total fields"); + // TupleUtils::assign<0, decltype(_scheme)>(_fields, v0); + //} + std::ostream& dump(std::ostream& o) const { o << "Materialized("; - for (int i=0; i more_data; +std::vector more_data; BOOST_AUTO_TEST_CASE( test1 ) { Grappa::init( GRAPPA_TEST_ARGS ); @@ -83,53 +209,89 @@ BOOST_AUTO_TEST_CASE( test1 ) { // write to new file std::string write_file = "write.bin"; - MaterializedTupleRef_V1_0_1 one; - MaterializedTupleRef_V1_0_1 two; - one.set(0, 10); - one.set(1, 11); - two.set(0, 12); - two.set(1, 13); + MaterializedTupleRef_V1_0_1_2 one; + MaterializedTupleRef_V1_0_1_2 two; + one.f0 = 51; + one.f1 = {'c','o','f','f','e','e','\0'}; + one.f2 = {'d', 'o', 'g', '\0'}; + two.f0 = 57; + two.f1 = {'s','u','p','e','r','\0'}; + two.f2 = {'a','b','c','d', '\0'}; more_data.push_back(one); more_data.push_back(two); - writeTuplesUnordered( &more_data, write_file ); + writeTuplesUnordered( &more_data, write_file ); // try read - Relation results = - readTuplesUnordered( write_file ); + Relation results = + readTuplesUnordered( write_file ); BOOST_CHECK_EQUAL( 2, results.numtuples ); - MaterializedTupleRef_V1_0_1 expected; - expected.set(0, 10); - expected.set(1, 11); - BOOST_CHECK_EQUAL( expected.get(0), (*results.data.localize()).get(0) ); - BOOST_CHECK_EQUAL( expected.get(1), (*results.data.localize()).get(1) ); - + auto r0 = Grappa::delegate::read(results.data); + auto r1 = Grappa::delegate::read(results.data + 1); + // the tuples might be read in either order + if (one.f0 == r0.f0) { + BOOST_CHECK_EQUAL( one.f0, r0.f0 ); + BOOST_CHECK( one.f1 == r0.f1 ); + BOOST_CHECK( one.f2 == r0.f2 ); + + BOOST_CHECK_EQUAL( two.f0, r1.f0 ); + std::cout << "(" << std::string(r1.f1.data()) << ") (" << std::string(r1.f2.data()) << ")" << std::endl; + BOOST_CHECK( two.f1 == r1.f1 ); + BOOST_CHECK( two.f2 == r1.f2 ); + } else { + BOOST_CHECK_EQUAL( one.f0, r1.f0 ); + BOOST_CHECK( one.f1 == r1.f1 ); + BOOST_CHECK( one.f2 == r1.f2 ); + + BOOST_CHECK_EQUAL( two.f0, r0.f0 ); + BOOST_CHECK( two.f1 == r0.f1 ); + BOOST_CHECK( two.f2 == r0.f2 ); + } // write to existing file - MaterializedTupleRef_V1_0_1 three; - MaterializedTupleRef_V1_0_1 four; - three.set(0, 14); - three.set(1, 15); - four.set(0, 16); - four.set(1, 17); + MaterializedTupleRef_V1_0_1_2 three; + MaterializedTupleRef_V1_0_1_2 four; + three.f0 = 83; + three.f1 = {'x','y','z','\0'}; + four.f0 = 87; + four.f1 = {'c','h','e','c','k','\0'}; more_data.clear(); + more_data.push_back(one); + more_data.push_back(two); more_data.push_back(three); more_data.push_back(four); - writeTuplesUnordered( &more_data, write_file ); + // more_data now has tuples 1 - 4 + writeTuplesUnordered( &more_data, write_file ); // verify write results = - readTuplesUnordered( write_file ); + readTuplesUnordered( write_file ); BOOST_CHECK_EQUAL( 4, results.numtuples ); - expected.set(0, 10); - expected.set(1, 11); - BOOST_CHECK_EQUAL( expected.get(0), (*results.data.localize()).get(0) ); - BOOST_CHECK_EQUAL( expected.get(1), (*results.data.localize()).get(1) ); - + r0 = Grappa::delegate::read(results.data); + r1 = Grappa::delegate::read(results.data + 1); + auto r2 = Grappa::delegate::read(results.data + 2); + auto r3 = Grappa::delegate::read(results.data + 3); + if (one.f0 == r0.f0) { + BOOST_CHECK_EQUAL( one.f0, r0.f0 ); + BOOST_CHECK( one.f1 == r0.f1 ); + BOOST_CHECK( one.f2 == r0.f2 ); + } else if (one.f0 == r1.f0) { + BOOST_CHECK_EQUAL( one.f0, r1.f0 ); + BOOST_CHECK( one.f1 == r1.f1 ); + BOOST_CHECK( one.f2 == r1.f2 ); + } else if (one.f0 == r2.f0) { + BOOST_CHECK_EQUAL( one.f0, r2.f0 ); + BOOST_CHECK( one.f1 == r2.f1 ); + BOOST_CHECK( one.f2 == r2.f2 ); + } else { + BOOST_CHECK_EQUAL( one.f0, r3.f0 ); + BOOST_CHECK( one.f1 == r3.f1 ); + BOOST_CHECK( one.f2 == r3.f2 ); + } }); Grappa::finalize(); } diff --git a/applications/join/relation_io.hpp b/applications/join/relation_io.hpp index c5ea31cce..5021a1819 100644 --- a/applications/join/relation_io.hpp +++ b/applications/join/relation_io.hpp @@ -127,7 +127,7 @@ tuple_graph readEdges( std::string fn, int64_t numTuples ) { // assumes that for object T, the address of T is the address of its fields template -size_t readTuplesUnordered( std::string fn, GlobalAddress * buf_addr, int64_t numfields ) { +size_t readTuplesUnordered( std::string fn, GlobalAddress * buf_addr ) { /* std::string metadata_path = FLAGS_relations+"/"+fn+"."+metadata; //TODO replace such metadatafiles with a real catalog std::ifstream metadata_file(metadata_path, std::ifstream::in); @@ -139,7 +139,7 @@ size_t readTuplesUnordered( std::string fn, GlobalAddress * buf_addr, int64_t // binary; TODO: factor out to allow other formats like fixed-line length ascii // we get just the size of the fields (since T is a padded data type) - size_t row_size_bytes = sizeof(int64_t) * numfields; + size_t row_size_bytes = T::fieldsSize(); VLOG(2) << "row_size_bytes=" << row_size_bytes; std::string data_path = FLAGS_relations+"/"+fn; size_t file_size = fs::file_size( data_path ); @@ -203,9 +203,9 @@ Relation readTuplesUnordered( std::string fn ) { GlobalAddress tuples; T sample; - CHECK( reinterpret_cast(&sample._fields) == reinterpret_cast(&sample) ) << "IO assumes _fields is the first field, but it is not for T"; + CHECK( reinterpret_cast(&sample.f0) == reinterpret_cast(&sample) ) << "IO assumes f0 is the first field, but it is not for T"; - auto ntuples = readTuplesUnordered( fn, &tuples, sizeof(sample._fields)/sizeof(int64_t) ); + auto ntuples = readTuplesUnordered( fn, &tuples ); Relation r = { tuples, ntuples }; return r; } From 3ec1284b9e4044cbcfe551c2d02e58e0a4571731 Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Wed, 13 May 2015 15:18:18 -0700 Subject: [PATCH 16/50] add MAX as an aggregate --- applications/join/Aggregates.hpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/applications/join/Aggregates.hpp b/applications/join/Aggregates.hpp index 2fb80807a..0d314a86f 100644 --- a/applications/join/Aggregates.hpp +++ b/applications/join/Aggregates.hpp @@ -1,4 +1,5 @@ #pragma once +#include namespace Aggregates { template < typename State, typename UV > @@ -11,6 +12,15 @@ namespace Aggregates { return sofar + 1; } + // keep MAX macro from being used here +#pragma push_macro("MAX") +#undef MAX + template + State MAX(const State& sofar, const UV& nextval) { + return std::max(sofar, nextval); + } +#pragma pop_macro("MAX") + template N Zero() { return 0; From bd748063377e936ca80163d4fa56b27a6cb3412e Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Wed, 13 May 2015 16:48:33 -0700 Subject: [PATCH 17/50] add min --- applications/join/Aggregates.hpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/applications/join/Aggregates.hpp b/applications/join/Aggregates.hpp index 0d314a86f..f12be839d 100644 --- a/applications/join/Aggregates.hpp +++ b/applications/join/Aggregates.hpp @@ -20,9 +20,19 @@ namespace Aggregates { return std::max(sofar, nextval); } #pragma pop_macro("MAX") + // keep MIN macro from being used here +#pragma push_macro("MIN") +#undef MIN + template + State MIN(const State& sofar, const UV& nextval) { + return std::min(sofar, nextval); + } +#pragma pop_macro("MIN") + template - N Zero() { - return 0; - } + N Zero() { + return 0; + } + } From b722574820d6182f0686c86b4aff6a9e7fea9eba Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Tue, 19 May 2015 11:26:10 -0700 Subject: [PATCH 18/50] update relation tests for fix in Raco --- applications/join/Relation_io_tests.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/applications/join/Relation_io_tests.cpp b/applications/join/Relation_io_tests.cpp index b7f7e6903..61ce0e089 100644 --- a/applications/join/Relation_io_tests.cpp +++ b/applications/join/Relation_io_tests.cpp @@ -32,8 +32,6 @@ #include "strings.h" // Unfortunately Grappa addressing does not work for things > 64 bytes... -#undef MAX_STR_LEN -#define MAX_STR_LEN 8 using namespace Grappa; @@ -113,14 +111,18 @@ class MaterializedTupleRef_V1_0_1_2 { MaterializedTupleRef_V1_0_1_2 _ret; - + { ss >> _ret.f0; + // throw away comma + std::string _temp; + std::getline(ss, _temp, ','); + } { std::string _temp; - ss >> _temp; + std::getline(ss, _temp, ','); _ret.f1 = to_array(_temp); } @@ -128,7 +130,7 @@ class MaterializedTupleRef_V1_0_1_2 { { std::string _temp; - ss >> _temp; + std::getline(ss, _temp, ','); _ret.f2 = to_array(_temp); } From 2f5653b8a729b227f41db28fa447648ebcac59ea Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Tue, 19 May 2015 11:26:48 -0700 Subject: [PATCH 19/50] add readSplits to relation tests --- applications/join/CMakeLists.txt | 4 ++++ applications/join/Relation_io_tests.cpp | 14 ++++++++++++++ applications/join/test-part-00000 | 3 +++ applications/join/test-part-00001 | 3 +++ 4 files changed, 24 insertions(+) create mode 100644 applications/join/test-part-00000 create mode 100644 applications/join/test-part-00001 diff --git a/applications/join/CMakeLists.txt b/applications/join/CMakeLists.txt index c53a20cb2..203b35245 100644 --- a/applications/join/CMakeLists.txt +++ b/applications/join/CMakeLists.txt @@ -37,6 +37,8 @@ set(QUERYIO_SOURCES Tuple.hpp Tuple.cpp relation.hpp + json/json.h + jsoncpp.cpp ) set(INCLUDE_DIRS @@ -155,5 +157,7 @@ macro(add_check test_cpp nnode ppn target) add_dependencies( check-all-${target}-compile-only ${test}) endmacro() +# add test for IO add_check(Relation_io_tests.cpp 2 1 pass) +file(COPY test-part-00000 test-part-00001 DESTINATION .) diff --git a/applications/join/Relation_io_tests.cpp b/applications/join/Relation_io_tests.cpp index 61ce0e089..d0962a5c1 100644 --- a/applications/join/Relation_io_tests.cpp +++ b/applications/join/Relation_io_tests.cpp @@ -294,7 +294,21 @@ BOOST_AUTO_TEST_CASE( test1 ) { BOOST_CHECK( one.f1 == r3.f1 ); BOOST_CHECK( one.f2 == r3.f2 ); } + + + results = readSplits( "test-part" ); + BOOST_CHECK_EQUAL(results.numtuples, 6); + forall( results.data, results.numtuples, [=](MaterializedTupleRef_V1_0_1_2& t) { + std::cout << t << std::endl; + }); + }); + + + + + + Grappa::finalize(); } diff --git a/applications/join/test-part-00000 b/applications/join/test-part-00000 new file mode 100644 index 000000000..9e43c3a6c --- /dev/null +++ b/applications/join/test-part-00000 @@ -0,0 +1,3 @@ +{"a": 1, "b": "event process", "c": "8489729"} {"count": 1} +{"a": 2, "b": "process comput", "c": "8489729"} {"count": 1} +{"a": 3, "b": "comput system", "c": "8489729"} {"count": 1} diff --git a/applications/join/test-part-00001 b/applications/join/test-part-00001 new file mode 100644 index 000000000..9b51d6c01 --- /dev/null +++ b/applications/join/test-part-00001 @@ -0,0 +1,3 @@ +{"a": 3, "b": "method gsm", "c": "8018865"} {"count": 1} +{"a": 4, "b": "gsm global", "c": "8018865"} {"count": 1} +{"a": 5, "b": "global system", "c": "8018865"} {"count": 1} From 76f13730dc30ecfacd9c37aeb1c987dc996032ce Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Tue, 19 May 2015 11:28:24 -0700 Subject: [PATCH 20/50] add readSplits, hardcoded for JSON parsing for now; also fix an insidious stack space or char[] copying bug --- applications/join/json/json.h | 2017 ++++++++++++ applications/join/jsoncpp.cpp | 5122 +++++++++++++++++++++++++++++ applications/join/relation_io.hpp | 201 +- 3 files changed, 7319 insertions(+), 21 deletions(-) create mode 100644 applications/join/json/json.h create mode 100644 applications/join/jsoncpp.cpp diff --git a/applications/join/json/json.h b/applications/join/json/json.h new file mode 100644 index 000000000..e01991e0f --- /dev/null +++ b/applications/join/json/json.h @@ -0,0 +1,2017 @@ +/// Json-cpp amalgated header (http://jsoncpp.sourceforge.net/). +/// It is intended to be used with #include "json/json.h" + +// ////////////////////////////////////////////////////////////////////// +// Beginning of content of file: LICENSE +// ////////////////////////////////////////////////////////////////////// + +/* +The JsonCpp library's source code, including accompanying documentation, +tests and demonstration applications, are licensed under the following +conditions... + +The author (Baptiste Lepilleur) explicitly disclaims copyright in all +jurisdictions which recognize such a disclaimer. In such jurisdictions, +this software is released into the Public Domain. + +In jurisdictions which do not recognize Public Domain property (e.g. Germany as of +2010), this software is Copyright (c) 2007-2010 by Baptiste Lepilleur, and is +released under the terms of the MIT License (see below). + +In jurisdictions which recognize Public Domain property, the user of this +software may choose to accept it either as 1) Public Domain, 2) under the +conditions of the MIT License (see below), or 3) under the terms of dual +Public Domain/MIT License conditions described here, as they choose. + +The MIT License is about as close to Public Domain as a license can get, and is +described in clear, concise terms at: + + http://en.wikipedia.org/wiki/MIT_License + +The full text of the MIT License follows: + +======================================================================== +Copyright (c) 2007-2010 Baptiste Lepilleur + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, copy, +modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +======================================================================== +(END LICENSE TEXT) + +The MIT license is compatible with both the GPL and commercial +software, affording one all of the rights of Public Domain with the +minor nuisance of being required to keep the above copyright notice +and license text in the source code. Note also that by accepting the +Public Domain "license" you can re-license your copy using whatever +license you like. + +*/ + +// ////////////////////////////////////////////////////////////////////// +// End of content of file: LICENSE +// ////////////////////////////////////////////////////////////////////// + + + + + +#ifndef JSON_AMALGATED_H_INCLUDED +# define JSON_AMALGATED_H_INCLUDED +/// If defined, indicates that the source file is amalgated +/// to prevent private header inclusion. +#define JSON_IS_AMALGAMATION + +// ////////////////////////////////////////////////////////////////////// +// Beginning of content of file: include/json/version.h +// ////////////////////////////////////////////////////////////////////// + +// DO NOT EDIT. This file is generated by CMake from "version" +// and "version.h.in" files. +// Run CMake configure step to update it. +#ifndef JSON_VERSION_H_INCLUDED +# define JSON_VERSION_H_INCLUDED + +# define JSONCPP_VERSION_STRING "1.6.2" +# define JSONCPP_VERSION_MAJOR 1 +# define JSONCPP_VERSION_MINOR 6 +# define JSONCPP_VERSION_PATCH 2 +# define JSONCPP_VERSION_QUALIFIER +# define JSONCPP_VERSION_HEXA ((JSONCPP_VERSION_MAJOR << 24) | (JSONCPP_VERSION_MINOR << 16) | (JSONCPP_VERSION_PATCH << 8)) + +#endif // JSON_VERSION_H_INCLUDED + +// ////////////////////////////////////////////////////////////////////// +// End of content of file: include/json/version.h +// ////////////////////////////////////////////////////////////////////// + + + + + + +// ////////////////////////////////////////////////////////////////////// +// Beginning of content of file: include/json/config.h +// ////////////////////////////////////////////////////////////////////// + +// Copyright 2007-2010 Baptiste Lepilleur +// Distributed under MIT license, or public domain if desired and +// recognized in your jurisdiction. +// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE + +#ifndef JSON_CONFIG_H_INCLUDED +#define JSON_CONFIG_H_INCLUDED + +/// If defined, indicates that json library is embedded in CppTL library. +//# define JSON_IN_CPPTL 1 + +/// If defined, indicates that json may leverage CppTL library +//# define JSON_USE_CPPTL 1 +/// If defined, indicates that cpptl vector based map should be used instead of +/// std::map +/// as Value container. +//# define JSON_USE_CPPTL_SMALLMAP 1 + +// If non-zero, the library uses exceptions to report bad input instead of C +// assertion macros. The default is to use exceptions. +#ifndef JSON_USE_EXCEPTION +#define JSON_USE_EXCEPTION 1 +#endif + +/// If defined, indicates that the source file is amalgated +/// to prevent private header inclusion. +/// Remarks: it is automatically defined in the generated amalgated header. +// #define JSON_IS_AMALGAMATION + +#ifdef JSON_IN_CPPTL +#include +#ifndef JSON_USE_CPPTL +#define JSON_USE_CPPTL 1 +#endif +#endif + +#ifdef JSON_IN_CPPTL +#define JSON_API CPPTL_API +#elif defined(JSON_DLL_BUILD) +#if defined(_MSC_VER) +#define JSON_API __declspec(dllexport) +#define JSONCPP_DISABLE_DLL_INTERFACE_WARNING +#endif // if defined(_MSC_VER) +#elif defined(JSON_DLL) +#if defined(_MSC_VER) +#define JSON_API __declspec(dllimport) +#define JSONCPP_DISABLE_DLL_INTERFACE_WARNING +#endif // if defined(_MSC_VER) +#endif // ifdef JSON_IN_CPPTL +#if !defined(JSON_API) +#define JSON_API +#endif + +// If JSON_NO_INT64 is defined, then Json only support C++ "int" type for +// integer +// Storages, and 64 bits integer support is disabled. +// #define JSON_NO_INT64 1 + +#if defined(_MSC_VER) && _MSC_VER <= 1200 // MSVC 6 +// Microsoft Visual Studio 6 only support conversion from __int64 to double +// (no conversion from unsigned __int64). +#define JSON_USE_INT64_DOUBLE_CONVERSION 1 +// Disable warning 4786 for VS6 caused by STL (identifier was truncated to '255' +// characters in the debug information) +// All projects I've ever seen with VS6 were using this globally (not bothering +// with pragma push/pop). +#pragma warning(disable : 4786) +#endif // if defined(_MSC_VER) && _MSC_VER < 1200 // MSVC 6 + +#if defined(_MSC_VER) && _MSC_VER >= 1500 // MSVC 2008 +/// Indicates that the following function is deprecated. +#define JSONCPP_DEPRECATED(message) __declspec(deprecated(message)) +#elif defined(__clang__) && defined(__has_feature) +#if __has_feature(attribute_deprecated_with_message) +#define JSONCPP_DEPRECATED(message) __attribute__ ((deprecated(message))) +#endif +#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)) +#define JSONCPP_DEPRECATED(message) __attribute__ ((deprecated(message))) +#elif defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) +#define JSONCPP_DEPRECATED(message) __attribute__((__deprecated__)) +#endif + +#if !defined(JSONCPP_DEPRECATED) +#define JSONCPP_DEPRECATED(message) +#endif // if !defined(JSONCPP_DEPRECATED) + +namespace Json { +typedef int Int; +typedef unsigned int UInt; +#if defined(JSON_NO_INT64) +typedef int LargestInt; +typedef unsigned int LargestUInt; +#undef JSON_HAS_INT64 +#else // if defined(JSON_NO_INT64) +// For Microsoft Visual use specific types as long long is not supported +#if defined(_MSC_VER) // Microsoft Visual Studio +typedef __int64 Int64; +typedef unsigned __int64 UInt64; +#else // if defined(_MSC_VER) // Other platforms, use long long +typedef long long int Int64; +typedef unsigned long long int UInt64; +#endif // if defined(_MSC_VER) +typedef Int64 LargestInt; +typedef UInt64 LargestUInt; +#define JSON_HAS_INT64 +#endif // if defined(JSON_NO_INT64) +} // end namespace Json + +#endif // JSON_CONFIG_H_INCLUDED + +// ////////////////////////////////////////////////////////////////////// +// End of content of file: include/json/config.h +// ////////////////////////////////////////////////////////////////////// + + + + + + +// ////////////////////////////////////////////////////////////////////// +// Beginning of content of file: include/json/forwards.h +// ////////////////////////////////////////////////////////////////////// + +// Copyright 2007-2010 Baptiste Lepilleur +// Distributed under MIT license, or public domain if desired and +// recognized in your jurisdiction. +// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE + +#ifndef JSON_FORWARDS_H_INCLUDED +#define JSON_FORWARDS_H_INCLUDED + +#if !defined(JSON_IS_AMALGAMATION) +#include "config.h" +#endif // if !defined(JSON_IS_AMALGAMATION) + +namespace Json { + +// writer.h +class FastWriter; +class StyledWriter; + +// reader.h +class Reader; + +// features.h +class Features; + +// value.h +typedef unsigned int ArrayIndex; +class StaticString; +class Path; +class PathArgument; +class Value; +class ValueIteratorBase; +class ValueIterator; +class ValueConstIterator; + +} // namespace Json + +#endif // JSON_FORWARDS_H_INCLUDED + +// ////////////////////////////////////////////////////////////////////// +// End of content of file: include/json/forwards.h +// ////////////////////////////////////////////////////////////////////// + + + + + + +// ////////////////////////////////////////////////////////////////////// +// Beginning of content of file: include/json/features.h +// ////////////////////////////////////////////////////////////////////// + +// Copyright 2007-2010 Baptiste Lepilleur +// Distributed under MIT license, or public domain if desired and +// recognized in your jurisdiction. +// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE + +#ifndef CPPTL_JSON_FEATURES_H_INCLUDED +#define CPPTL_JSON_FEATURES_H_INCLUDED + +#if !defined(JSON_IS_AMALGAMATION) +#include "forwards.h" +#endif // if !defined(JSON_IS_AMALGAMATION) + +namespace Json { + +/** \brief Configuration passed to reader and writer. + * This configuration object can be used to force the Reader or Writer + * to behave in a standard conforming way. + */ +class JSON_API Features { +public: + /** \brief A configuration that allows all features and assumes all strings + * are UTF-8. + * - C & C++ comments are allowed + * - Root object can be any JSON value + * - Assumes Value strings are encoded in UTF-8 + */ + static Features all(); + + /** \brief A configuration that is strictly compatible with the JSON + * specification. + * - Comments are forbidden. + * - Root object must be either an array or an object value. + * - Assumes Value strings are encoded in UTF-8 + */ + static Features strictMode(); + + /** \brief Initialize the configuration like JsonConfig::allFeatures; + */ + Features(); + + /// \c true if comments are allowed. Default: \c true. + bool allowComments_; + + /// \c true if root must be either an array or an object value. Default: \c + /// false. + bool strictRoot_; + + /// \c true if dropped null placeholders are allowed. Default: \c false. + bool allowDroppedNullPlaceholders_; + + /// \c true if numeric object key are allowed. Default: \c false. + bool allowNumericKeys_; +}; + +} // namespace Json + +#endif // CPPTL_JSON_FEATURES_H_INCLUDED + +// ////////////////////////////////////////////////////////////////////// +// End of content of file: include/json/features.h +// ////////////////////////////////////////////////////////////////////// + + + + + + +// ////////////////////////////////////////////////////////////////////// +// Beginning of content of file: include/json/value.h +// ////////////////////////////////////////////////////////////////////// + +// Copyright 2007-2010 Baptiste Lepilleur +// Distributed under MIT license, or public domain if desired and +// recognized in your jurisdiction. +// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE + +#ifndef CPPTL_JSON_H_INCLUDED +#define CPPTL_JSON_H_INCLUDED + +#if !defined(JSON_IS_AMALGAMATION) +#include "forwards.h" +#endif // if !defined(JSON_IS_AMALGAMATION) +#include +#include +#include + +#ifndef JSON_USE_CPPTL_SMALLMAP +#include +#else +#include +#endif +#ifdef JSON_USE_CPPTL +#include +#endif + +// Disable warning C4251: : needs to have dll-interface to +// be used by... +#if defined(JSONCPP_DISABLE_DLL_INTERFACE_WARNING) +#pragma warning(push) +#pragma warning(disable : 4251) +#endif // if defined(JSONCPP_DISABLE_DLL_INTERFACE_WARNING) + +/** \brief JSON (JavaScript Object Notation). + */ +namespace Json { + +/** Base class for all exceptions we throw. + * + * We use nothing but these internally. Of course, STL can throw others. + */ +class JSON_API Exception; +/** Exceptions which the user cannot easily avoid. + * + * E.g. out-of-memory (when we use malloc), stack-overflow, malicious input + * + * \remark derived from Json::Exception + */ +class JSON_API RuntimeError; +/** Exceptions thrown by JSON_ASSERT/JSON_FAIL macros. + * + * These are precondition-violations (user bugs) and internal errors (our bugs). + * + * \remark derived from Json::Exception + */ +class JSON_API LogicError; + +/// used internally +void throwRuntimeError(std::string const& msg); +/// used internally +void throwLogicError(std::string const& msg); + +/** \brief Type of the value held by a Value object. + */ +enum ValueType { + nullValue = 0, ///< 'null' value + intValue, ///< signed integer value + uintValue, ///< unsigned integer value + realValue, ///< double value + stringValue, ///< UTF-8 string value + booleanValue, ///< bool value + arrayValue, ///< array value (ordered list) + objectValue ///< object value (collection of name/value pairs). +}; + +enum CommentPlacement { + commentBefore = 0, ///< a comment placed on the line before a value + commentAfterOnSameLine, ///< a comment just after a value on the same line + commentAfter, ///< a comment on the line after a value (only make sense for + /// root value) + numberOfCommentPlacement +}; + +//# ifdef JSON_USE_CPPTL +// typedef CppTL::AnyEnumerator EnumMemberNames; +// typedef CppTL::AnyEnumerator EnumValues; +//# endif + +/** \brief Lightweight wrapper to tag static string. + * + * Value constructor and objectValue member assignement takes advantage of the + * StaticString and avoid the cost of string duplication when storing the + * string or the member name. + * + * Example of usage: + * \code + * Json::Value aValue( StaticString("some text") ); + * Json::Value object; + * static const StaticString code("code"); + * object[code] = 1234; + * \endcode + */ +class JSON_API StaticString { +public: + explicit StaticString(const char* czstring) : c_str_(czstring) {} + + operator const char*() const { return c_str_; } + + const char* c_str() const { return c_str_; } + +private: + const char* c_str_; +}; + +/** \brief Represents a JSON value. + * + * This class is a discriminated union wrapper that can represents a: + * - signed integer [range: Value::minInt - Value::maxInt] + * - unsigned integer (range: 0 - Value::maxUInt) + * - double + * - UTF-8 string + * - boolean + * - 'null' + * - an ordered list of Value + * - collection of name/value pairs (javascript object) + * + * The type of the held value is represented by a #ValueType and + * can be obtained using type(). + * + * Values of an #objectValue or #arrayValue can be accessed using operator[]() + * methods. + * Non-const methods will automatically create the a #nullValue element + * if it does not exist. + * The sequence of an #arrayValue will be automatically resized and initialized + * with #nullValue. resize() can be used to enlarge or truncate an #arrayValue. + * + * The get() methods can be used to obtain default value in the case the + * required element does not exist. + * + * It is possible to iterate over the list of a #objectValue values using + * the getMemberNames() method. + * + * \note #Value string-length fit in size_t, but keys must be < 2^30. + * (The reason is an implementation detail.) A #CharReader will raise an + * exception if a bound is exceeded to avoid security holes in your app, + * but the Value API does *not* check bounds. That is the responsibility + * of the caller. + */ +class JSON_API Value { + friend class ValueIteratorBase; +public: + typedef std::vector Members; + typedef ValueIterator iterator; + typedef ValueConstIterator const_iterator; + typedef Json::UInt UInt; + typedef Json::Int Int; +#if defined(JSON_HAS_INT64) + typedef Json::UInt64 UInt64; + typedef Json::Int64 Int64; +#endif // defined(JSON_HAS_INT64) + typedef Json::LargestInt LargestInt; + typedef Json::LargestUInt LargestUInt; + typedef Json::ArrayIndex ArrayIndex; + + static const Value& null; ///< We regret this reference to a global instance; prefer the simpler Value(). + static const Value& nullRef; ///< just a kludge for binary-compatibility; same as null + /// Minimum signed integer value that can be stored in a Json::Value. + static const LargestInt minLargestInt; + /// Maximum signed integer value that can be stored in a Json::Value. + static const LargestInt maxLargestInt; + /// Maximum unsigned integer value that can be stored in a Json::Value. + static const LargestUInt maxLargestUInt; + + /// Minimum signed int value that can be stored in a Json::Value. + static const Int minInt; + /// Maximum signed int value that can be stored in a Json::Value. + static const Int maxInt; + /// Maximum unsigned int value that can be stored in a Json::Value. + static const UInt maxUInt; + +#if defined(JSON_HAS_INT64) + /// Minimum signed 64 bits int value that can be stored in a Json::Value. + static const Int64 minInt64; + /// Maximum signed 64 bits int value that can be stored in a Json::Value. + static const Int64 maxInt64; + /// Maximum unsigned 64 bits int value that can be stored in a Json::Value. + static const UInt64 maxUInt64; +#endif // defined(JSON_HAS_INT64) + +private: +#ifndef JSONCPP_DOC_EXCLUDE_IMPLEMENTATION + class CZString { + public: + enum DuplicationPolicy { + noDuplication = 0, + duplicate, + duplicateOnCopy + }; + CZString(ArrayIndex index); + CZString(char const* str, unsigned length, DuplicationPolicy allocate); + CZString(CZString const& other); + ~CZString(); + CZString& operator=(CZString other); + bool operator<(CZString const& other) const; + bool operator==(CZString const& other) const; + ArrayIndex index() const; + //const char* c_str() const; ///< \deprecated + char const* data() const; + unsigned length() const; + bool isStaticString() const; + + private: + void swap(CZString& other); + + struct StringStorage { + unsigned policy_: 2; + unsigned length_: 30; // 1GB max + }; + + char const* cstr_; // actually, a prefixed string, unless policy is noDup + union { + ArrayIndex index_; + StringStorage storage_; + }; + }; + +public: +#ifndef JSON_USE_CPPTL_SMALLMAP + typedef std::map ObjectValues; +#else + typedef CppTL::SmallMap ObjectValues; +#endif // ifndef JSON_USE_CPPTL_SMALLMAP +#endif // ifndef JSONCPP_DOC_EXCLUDE_IMPLEMENTATION + +public: + /** \brief Create a default Value of the given type. + + This is a very useful constructor. + To create an empty array, pass arrayValue. + To create an empty object, pass objectValue. + Another Value can then be set to this one by assignment. +This is useful since clear() and resize() will not alter types. + + Examples: +\code +Json::Value null_value; // null +Json::Value arr_value(Json::arrayValue); // [] +Json::Value obj_value(Json::objectValue); // {} +\endcode + */ + Value(ValueType type = nullValue); + Value(Int value); + Value(UInt value); +#if defined(JSON_HAS_INT64) + Value(Int64 value); + Value(UInt64 value); +#endif // if defined(JSON_HAS_INT64) + Value(double value); + Value(const char* value); ///< Copy til first 0. (NULL causes to seg-fault.) + Value(const char* beginValue, const char* endValue); ///< Copy all, incl zeroes. + /** \brief Constructs a value from a static string. + + * Like other value string constructor but do not duplicate the string for + * internal storage. The given string must remain alive after the call to this + * constructor. + * \note This works only for null-terminated strings. (We cannot change the + * size of this class, so we have nowhere to store the length, + * which might be computed later for various operations.) + * + * Example of usage: + * \code + * static StaticString foo("some text"); + * Json::Value aValue(foo); + * \endcode + */ + Value(const StaticString& value); + Value(const std::string& value); ///< Copy data() til size(). Embedded zeroes too. +#ifdef JSON_USE_CPPTL + Value(const CppTL::ConstString& value); +#endif + Value(bool value); + /// Deep copy. + Value(const Value& other); + ~Value(); + + /// Deep copy, then swap(other). + /// \note Over-write existing comments. To preserve comments, use #swapPayload(). + Value& operator=(Value other); + /// Swap everything. + void swap(Value& other); + /// Swap values but leave comments and source offsets in place. + void swapPayload(Value& other); + + ValueType type() const; + + /// Compare payload only, not comments etc. + bool operator<(const Value& other) const; + bool operator<=(const Value& other) const; + bool operator>=(const Value& other) const; + bool operator>(const Value& other) const; + bool operator==(const Value& other) const; + bool operator!=(const Value& other) const; + int compare(const Value& other) const; + + const char* asCString() const; ///< Embedded zeroes could cause you trouble! + std::string asString() const; ///< Embedded zeroes are possible. + /** Get raw char* of string-value. + * \return false if !string. (Seg-fault if str or end are NULL.) + */ + bool getString( + char const** str, char const** end) const; +#ifdef JSON_USE_CPPTL + CppTL::ConstString asConstString() const; +#endif + Int asInt() const; + UInt asUInt() const; +#if defined(JSON_HAS_INT64) + Int64 asInt64() const; + UInt64 asUInt64() const; +#endif // if defined(JSON_HAS_INT64) + LargestInt asLargestInt() const; + LargestUInt asLargestUInt() const; + float asFloat() const; + double asDouble() const; + bool asBool() const; + + bool isNull() const; + bool isBool() const; + bool isInt() const; + bool isInt64() const; + bool isUInt() const; + bool isUInt64() const; + bool isIntegral() const; + bool isDouble() const; + bool isNumeric() const; + bool isString() const; + bool isArray() const; + bool isObject() const; + + bool isConvertibleTo(ValueType other) const; + + /// Number of values in array or object + ArrayIndex size() const; + + /// \brief Return true if empty array, empty object, or null; + /// otherwise, false. + bool empty() const; + + /// Return isNull() + bool operator!() const; + + /// Remove all object members and array elements. + /// \pre type() is arrayValue, objectValue, or nullValue + /// \post type() is unchanged + void clear(); + + /// Resize the array to size elements. + /// New elements are initialized to null. + /// May only be called on nullValue or arrayValue. + /// \pre type() is arrayValue or nullValue + /// \post type() is arrayValue + void resize(ArrayIndex size); + + /// Access an array element (zero based index ). + /// If the array contains less than index element, then null value are + /// inserted + /// in the array so that its size is index+1. + /// (You may need to say 'value[0u]' to get your compiler to distinguish + /// this from the operator[] which takes a string.) + Value& operator[](ArrayIndex index); + + /// Access an array element (zero based index ). + /// If the array contains less than index element, then null value are + /// inserted + /// in the array so that its size is index+1. + /// (You may need to say 'value[0u]' to get your compiler to distinguish + /// this from the operator[] which takes a string.) + Value& operator[](int index); + + /// Access an array element (zero based index ) + /// (You may need to say 'value[0u]' to get your compiler to distinguish + /// this from the operator[] which takes a string.) + const Value& operator[](ArrayIndex index) const; + + /// Access an array element (zero based index ) + /// (You may need to say 'value[0u]' to get your compiler to distinguish + /// this from the operator[] which takes a string.) + const Value& operator[](int index) const; + + /// If the array contains at least index+1 elements, returns the element + /// value, + /// otherwise returns defaultValue. + Value get(ArrayIndex index, const Value& defaultValue) const; + /// Return true if index < size(). + bool isValidIndex(ArrayIndex index) const; + /// \brief Append value to array at the end. + /// + /// Equivalent to jsonvalue[jsonvalue.size()] = value; + Value& append(const Value& value); + + /// Access an object value by name, create a null member if it does not exist. + /// \note Because of our implementation, keys are limited to 2^30 -1 chars. + /// Exceeding that will cause an exception. + Value& operator[](const char* key); + /// Access an object value by name, returns null if there is no member with + /// that name. + const Value& operator[](const char* key) const; + /// Access an object value by name, create a null member if it does not exist. + /// \param key may contain embedded nulls. + Value& operator[](const std::string& key); + /// Access an object value by name, returns null if there is no member with + /// that name. + /// \param key may contain embedded nulls. + const Value& operator[](const std::string& key) const; + /** \brief Access an object value by name, create a null member if it does not + exist. + + * If the object has no entry for that name, then the member name used to store + * the new entry is not duplicated. + * Example of use: + * \code + * Json::Value object; + * static const StaticString code("code"); + * object[code] = 1234; + * \endcode + */ + Value& operator[](const StaticString& key); +#ifdef JSON_USE_CPPTL + /// Access an object value by name, create a null member if it does not exist. + Value& operator[](const CppTL::ConstString& key); + /// Access an object value by name, returns null if there is no member with + /// that name. + const Value& operator[](const CppTL::ConstString& key) const; +#endif + /// Return the member named key if it exist, defaultValue otherwise. + /// \note deep copy + Value get(const char* key, const Value& defaultValue) const; + /// Return the member named key if it exist, defaultValue otherwise. + /// \note deep copy + /// \param key may contain embedded nulls. + Value get(const char* key, const char* end, const Value& defaultValue) const; + /// Return the member named key if it exist, defaultValue otherwise. + /// \note deep copy + /// \param key may contain embedded nulls. + Value get(const std::string& key, const Value& defaultValue) const; +#ifdef JSON_USE_CPPTL + /// Return the member named key if it exist, defaultValue otherwise. + /// \note deep copy + Value get(const CppTL::ConstString& key, const Value& defaultValue) const; +#endif + /// Most general and efficient version of isMember()const, get()const, + /// and operator[]const + /// \note As stated elsewhere, behavior is undefined if (end-key) >= 2^30 + Value const* find(char const* key, char const* end) const; + /// Most general and efficient version of object-mutators. + /// \note As stated elsewhere, behavior is undefined if (end-key) >= 2^30 + /// \return non-zero, but JSON_ASSERT if this is neither object nor nullValue. + Value const* demand(char const* key, char const* end); + /// \brief Remove and return the named member. + /// + /// Do nothing if it did not exist. + /// \return the removed Value, or null. + /// \pre type() is objectValue or nullValue + /// \post type() is unchanged + /// \deprecated + Value removeMember(const char* key); + /// Same as removeMember(const char*) + /// \param key may contain embedded nulls. + /// \deprecated + Value removeMember(const std::string& key); + /// Same as removeMember(const char* key, const char* end, Value* removed), + /// but 'key' is null-terminated. + bool removeMember(const char* key, Value* removed); + /** \brief Remove the named map member. + + Update 'removed' iff removed. + \param key may contain embedded nulls. + \return true iff removed (no exceptions) + */ + bool removeMember(std::string const& key, Value* removed); + /// Same as removeMember(std::string const& key, Value* removed) + bool removeMember(const char* key, const char* end, Value* removed); + /** \brief Remove the indexed array element. + + O(n) expensive operations. + Update 'removed' iff removed. + \return true iff removed (no exceptions) + */ + bool removeIndex(ArrayIndex i, Value* removed); + + /// Return true if the object has a member named key. + /// \note 'key' must be null-terminated. + bool isMember(const char* key) const; + /// Return true if the object has a member named key. + /// \param key may contain embedded nulls. + bool isMember(const std::string& key) const; + /// Same as isMember(std::string const& key)const + bool isMember(const char* key, const char* end) const; +#ifdef JSON_USE_CPPTL + /// Return true if the object has a member named key. + bool isMember(const CppTL::ConstString& key) const; +#endif + + /// \brief Return a list of the member names. + /// + /// If null, return an empty list. + /// \pre type() is objectValue or nullValue + /// \post if type() was nullValue, it remains nullValue + Members getMemberNames() const; + + //# ifdef JSON_USE_CPPTL + // EnumMemberNames enumMemberNames() const; + // EnumValues enumValues() const; + //# endif + + /// \deprecated Always pass len. + JSONCPP_DEPRECATED("Use setComment(std::string const&) instead.") + void setComment(const char* comment, CommentPlacement placement); + /// Comments must be //... or /* ... */ + void setComment(const char* comment, size_t len, CommentPlacement placement); + /// Comments must be //... or /* ... */ + void setComment(const std::string& comment, CommentPlacement placement); + bool hasComment(CommentPlacement placement) const; + /// Include delimiters and embedded newlines. + std::string getComment(CommentPlacement placement) const; + + std::string toStyledString() const; + + const_iterator begin() const; + const_iterator end() const; + + iterator begin(); + iterator end(); + + // Accessors for the [start, limit) range of bytes within the JSON text from + // which this value was parsed, if any. + void setOffsetStart(size_t start); + void setOffsetLimit(size_t limit); + size_t getOffsetStart() const; + size_t getOffsetLimit() const; + +private: + void initBasic(ValueType type, bool allocated = false); + + Value& resolveReference(const char* key); + Value& resolveReference(const char* key, const char* end); + + struct CommentInfo { + CommentInfo(); + ~CommentInfo(); + + void setComment(const char* text, size_t len); + + char* comment_; + }; + + // struct MemberNamesTransform + //{ + // typedef const char *result_type; + // const char *operator()( const CZString &name ) const + // { + // return name.c_str(); + // } + //}; + + union ValueHolder { + LargestInt int_; + LargestUInt uint_; + double real_; + bool bool_; + char* string_; // actually ptr to unsigned, followed by str, unless !allocated_ + ObjectValues* map_; + } value_; + ValueType type_ : 8; + unsigned int allocated_ : 1; // Notes: if declared as bool, bitfield is useless. + // If not allocated_, string_ must be null-terminated. + CommentInfo* comments_; + + // [start, limit) byte offsets in the source JSON text from which this Value + // was extracted. + size_t start_; + size_t limit_; +}; + +/** \brief Experimental and untested: represents an element of the "path" to + * access a node. + */ +class JSON_API PathArgument { +public: + friend class Path; + + PathArgument(); + PathArgument(ArrayIndex index); + PathArgument(const char* key); + PathArgument(const std::string& key); + +private: + enum Kind { + kindNone = 0, + kindIndex, + kindKey + }; + std::string key_; + ArrayIndex index_; + Kind kind_; +}; + +/** \brief Experimental and untested: represents a "path" to access a node. + * + * Syntax: + * - "." => root node + * - ".[n]" => elements at index 'n' of root node (an array value) + * - ".name" => member named 'name' of root node (an object value) + * - ".name1.name2.name3" + * - ".[0][1][2].name1[3]" + * - ".%" => member name is provided as parameter + * - ".[%]" => index is provied as parameter + */ +class JSON_API Path { +public: + Path(const std::string& path, + const PathArgument& a1 = PathArgument(), + const PathArgument& a2 = PathArgument(), + const PathArgument& a3 = PathArgument(), + const PathArgument& a4 = PathArgument(), + const PathArgument& a5 = PathArgument()); + + const Value& resolve(const Value& root) const; + Value resolve(const Value& root, const Value& defaultValue) const; + /// Creates the "path" to access the specified node and returns a reference on + /// the node. + Value& make(Value& root) const; + +private: + typedef std::vector InArgs; + typedef std::vector Args; + + void makePath(const std::string& path, const InArgs& in); + void addPathInArg(const std::string& path, + const InArgs& in, + InArgs::const_iterator& itInArg, + PathArgument::Kind kind); + void invalidPath(const std::string& path, int location); + + Args args_; +}; + +/** \brief base class for Value iterators. + * + */ +class JSON_API ValueIteratorBase { +public: + typedef std::bidirectional_iterator_tag iterator_category; + typedef unsigned int size_t; + typedef int difference_type; + typedef ValueIteratorBase SelfType; + + bool operator==(const SelfType& other) const { return isEqual(other); } + + bool operator!=(const SelfType& other) const { return !isEqual(other); } + + difference_type operator-(const SelfType& other) const { + return other.computeDistance(*this); + } + + /// Return either the index or the member name of the referenced value as a + /// Value. + Value key() const; + + /// Return the index of the referenced Value, or -1 if it is not an arrayValue. + UInt index() const; + + /// Return the member name of the referenced Value, or "" if it is not an + /// objectValue. + /// \note Avoid `c_str()` on result, as embedded zeroes are possible. + std::string name() const; + + /// Return the member name of the referenced Value. "" if it is not an + /// objectValue. + /// \deprecated This cannot be used for UTF-8 strings, since there can be embedded nulls. + JSONCPP_DEPRECATED("Use `key = name();` instead.") + char const* memberName() const; + /// Return the member name of the referenced Value, or NULL if it is not an + /// objectValue. + /// \note Better version than memberName(). Allows embedded nulls. + char const* memberName(char const** end) const; + +protected: + Value& deref() const; + + void increment(); + + void decrement(); + + difference_type computeDistance(const SelfType& other) const; + + bool isEqual(const SelfType& other) const; + + void copy(const SelfType& other); + +private: + Value::ObjectValues::iterator current_; + // Indicates that iterator is for a null value. + bool isNull_; + +public: + // For some reason, BORLAND needs these at the end, rather + // than earlier. No idea why. + ValueIteratorBase(); + explicit ValueIteratorBase(const Value::ObjectValues::iterator& current); +}; + +/** \brief const iterator for object and array value. + * + */ +class JSON_API ValueConstIterator : public ValueIteratorBase { + friend class Value; + +public: + typedef const Value value_type; + //typedef unsigned int size_t; + //typedef int difference_type; + typedef const Value& reference; + typedef const Value* pointer; + typedef ValueConstIterator SelfType; + + ValueConstIterator(); + +private: +/*! \internal Use by Value to create an iterator. + */ + explicit ValueConstIterator(const Value::ObjectValues::iterator& current); +public: + SelfType& operator=(const ValueIteratorBase& other); + + SelfType operator++(int) { + SelfType temp(*this); + ++*this; + return temp; + } + + SelfType operator--(int) { + SelfType temp(*this); + --*this; + return temp; + } + + SelfType& operator--() { + decrement(); + return *this; + } + + SelfType& operator++() { + increment(); + return *this; + } + + reference operator*() const { return deref(); } + + pointer operator->() const { return &deref(); } +}; + +/** \brief Iterator for object and array value. + */ +class JSON_API ValueIterator : public ValueIteratorBase { + friend class Value; + +public: + typedef Value value_type; + typedef unsigned int size_t; + typedef int difference_type; + typedef Value& reference; + typedef Value* pointer; + typedef ValueIterator SelfType; + + ValueIterator(); + ValueIterator(const ValueConstIterator& other); + ValueIterator(const ValueIterator& other); + +private: +/*! \internal Use by Value to create an iterator. + */ + explicit ValueIterator(const Value::ObjectValues::iterator& current); +public: + SelfType& operator=(const SelfType& other); + + SelfType operator++(int) { + SelfType temp(*this); + ++*this; + return temp; + } + + SelfType operator--(int) { + SelfType temp(*this); + --*this; + return temp; + } + + SelfType& operator--() { + decrement(); + return *this; + } + + SelfType& operator++() { + increment(); + return *this; + } + + reference operator*() const { return deref(); } + + pointer operator->() const { return &deref(); } +}; + +} // namespace Json + + +namespace std { +/// Specialize std::swap() for Json::Value. +template<> +inline void swap(Json::Value& a, Json::Value& b) { a.swap(b); } +} + + +#if defined(JSONCPP_DISABLE_DLL_INTERFACE_WARNING) +#pragma warning(pop) +#endif // if defined(JSONCPP_DISABLE_DLL_INTERFACE_WARNING) + +#endif // CPPTL_JSON_H_INCLUDED + +// ////////////////////////////////////////////////////////////////////// +// End of content of file: include/json/value.h +// ////////////////////////////////////////////////////////////////////// + + + + + + +// ////////////////////////////////////////////////////////////////////// +// Beginning of content of file: include/json/reader.h +// ////////////////////////////////////////////////////////////////////// + +// Copyright 2007-2010 Baptiste Lepilleur +// Distributed under MIT license, or public domain if desired and +// recognized in your jurisdiction. +// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE + +#ifndef CPPTL_JSON_READER_H_INCLUDED +#define CPPTL_JSON_READER_H_INCLUDED + +#if !defined(JSON_IS_AMALGAMATION) +#include "features.h" +#include "value.h" +#endif // if !defined(JSON_IS_AMALGAMATION) +#include +#include +#include +#include +#include + +// Disable warning C4251: : needs to have dll-interface to +// be used by... +#if defined(JSONCPP_DISABLE_DLL_INTERFACE_WARNING) +#pragma warning(push) +#pragma warning(disable : 4251) +#endif // if defined(JSONCPP_DISABLE_DLL_INTERFACE_WARNING) + +namespace Json { + +/** \brief Unserialize a JSON document into a + *Value. + * + * \deprecated Use CharReader and CharReaderBuilder. + */ +class JSON_API Reader { +public: + typedef char Char; + typedef const Char* Location; + + /** \brief An error tagged with where in the JSON text it was encountered. + * + * The offsets give the [start, limit) range of bytes within the text. Note + * that this is bytes, not codepoints. + * + */ + struct StructuredError { + size_t offset_start; + size_t offset_limit; + std::string message; + }; + + /** \brief Constructs a Reader allowing all features + * for parsing. + */ + Reader(); + + /** \brief Constructs a Reader allowing the specified feature set + * for parsing. + */ + Reader(const Features& features); + + /** \brief Read a Value from a JSON + * document. + * \param document UTF-8 encoded string containing the document to read. + * \param root [out] Contains the root value of the document if it was + * successfully parsed. + * \param collectComments \c true to collect comment and allow writing them + * back during + * serialization, \c false to discard comments. + * This parameter is ignored if + * Features::allowComments_ + * is \c false. + * \return \c true if the document was successfully parsed, \c false if an + * error occurred. + */ + bool + parse(const std::string& document, Value& root, bool collectComments = true); + + /** \brief Read a Value from a JSON + document. + * \param beginDoc Pointer on the beginning of the UTF-8 encoded string of the + document to read. + * \param endDoc Pointer on the end of the UTF-8 encoded string of the + document to read. + * Must be >= beginDoc. + * \param root [out] Contains the root value of the document if it was + * successfully parsed. + * \param collectComments \c true to collect comment and allow writing them + back during + * serialization, \c false to discard comments. + * This parameter is ignored if + Features::allowComments_ + * is \c false. + * \return \c true if the document was successfully parsed, \c false if an + error occurred. + */ + bool parse(const char* beginDoc, + const char* endDoc, + Value& root, + bool collectComments = true); + + /// \brief Parse from input stream. + /// \see Json::operator>>(std::istream&, Json::Value&). + bool parse(std::istream& is, Value& root, bool collectComments = true); + + /** \brief Returns a user friendly string that list errors in the parsed + * document. + * \return Formatted error message with the list of errors with their location + * in + * the parsed document. An empty string is returned if no error + * occurred + * during parsing. + * \deprecated Use getFormattedErrorMessages() instead (typo fix). + */ + JSONCPP_DEPRECATED("Use getFormattedErrorMessages() instead.") + std::string getFormatedErrorMessages() const; + + /** \brief Returns a user friendly string that list errors in the parsed + * document. + * \return Formatted error message with the list of errors with their location + * in + * the parsed document. An empty string is returned if no error + * occurred + * during parsing. + */ + std::string getFormattedErrorMessages() const; + + /** \brief Returns a vector of structured erros encounted while parsing. + * \return A (possibly empty) vector of StructuredError objects. Currently + * only one error can be returned, but the caller should tolerate + * multiple + * errors. This can occur if the parser recovers from a non-fatal + * parse error and then encounters additional errors. + */ + std::vector getStructuredErrors() const; + + /** \brief Add a semantic error message. + * \param value JSON Value location associated with the error + * \param message The error message. + * \return \c true if the error was successfully added, \c false if the + * Value offset exceeds the document size. + */ + bool pushError(const Value& value, const std::string& message); + + /** \brief Add a semantic error message with extra context. + * \param value JSON Value location associated with the error + * \param message The error message. + * \param extra Additional JSON Value location to contextualize the error + * \return \c true if the error was successfully added, \c false if either + * Value offset exceeds the document size. + */ + bool pushError(const Value& value, const std::string& message, const Value& extra); + + /** \brief Return whether there are any errors. + * \return \c true if there are no errors to report \c false if + * errors have occurred. + */ + bool good() const; + +private: + enum TokenType { + tokenEndOfStream = 0, + tokenObjectBegin, + tokenObjectEnd, + tokenArrayBegin, + tokenArrayEnd, + tokenString, + tokenNumber, + tokenTrue, + tokenFalse, + tokenNull, + tokenArraySeparator, + tokenMemberSeparator, + tokenComment, + tokenError + }; + + class Token { + public: + TokenType type_; + Location start_; + Location end_; + }; + + class ErrorInfo { + public: + Token token_; + std::string message_; + Location extra_; + }; + + typedef std::deque Errors; + + bool readToken(Token& token); + void skipSpaces(); + bool match(Location pattern, int patternLength); + bool readComment(); + bool readCStyleComment(); + bool readCppStyleComment(); + bool readString(); + void readNumber(); + bool readValue(); + bool readObject(Token& token); + bool readArray(Token& token); + bool decodeNumber(Token& token); + bool decodeNumber(Token& token, Value& decoded); + bool decodeString(Token& token); + bool decodeString(Token& token, std::string& decoded); + bool decodeDouble(Token& token); + bool decodeDouble(Token& token, Value& decoded); + bool decodeUnicodeCodePoint(Token& token, + Location& current, + Location end, + unsigned int& unicode); + bool decodeUnicodeEscapeSequence(Token& token, + Location& current, + Location end, + unsigned int& unicode); + bool addError(const std::string& message, Token& token, Location extra = 0); + bool recoverFromError(TokenType skipUntilToken); + bool addErrorAndRecover(const std::string& message, + Token& token, + TokenType skipUntilToken); + void skipUntilSpace(); + Value& currentValue(); + Char getNextChar(); + void + getLocationLineAndColumn(Location location, int& line, int& column) const; + std::string getLocationLineAndColumn(Location location) const; + void addComment(Location begin, Location end, CommentPlacement placement); + void skipCommentTokens(Token& token); + + typedef std::stack Nodes; + Nodes nodes_; + Errors errors_; + std::string document_; + Location begin_; + Location end_; + Location current_; + Location lastValueEnd_; + Value* lastValue_; + std::string commentsBefore_; + Features features_; + bool collectComments_; +}; // Reader + +/** Interface for reading JSON from a char array. + */ +class JSON_API CharReader { +public: + virtual ~CharReader() {} + /** \brief Read a Value from a JSON + document. + * The document must be a UTF-8 encoded string containing the document to read. + * + * \param beginDoc Pointer on the beginning of the UTF-8 encoded string of the + document to read. + * \param endDoc Pointer on the end of the UTF-8 encoded string of the + document to read. + * Must be >= beginDoc. + * \param root [out] Contains the root value of the document if it was + * successfully parsed. + * \param errs [out] Formatted error messages (if not NULL) + * a user friendly string that lists errors in the parsed + * document. + * \return \c true if the document was successfully parsed, \c false if an + error occurred. + */ + virtual bool parse( + char const* beginDoc, char const* endDoc, + Value* root, std::string* errs) = 0; + + class Factory { + public: + virtual ~Factory() {} + /** \brief Allocate a CharReader via operator new(). + * \throw std::exception if something goes wrong (e.g. invalid settings) + */ + virtual CharReader* newCharReader() const = 0; + }; // Factory +}; // CharReader + +/** \brief Build a CharReader implementation. + +Usage: +\code + using namespace Json; + CharReaderBuilder builder; + builder["collectComments"] = false; + Value value; + std::string errs; + bool ok = parseFromStream(builder, std::cin, &value, &errs); +\endcode +*/ +class JSON_API CharReaderBuilder : public CharReader::Factory { +public: + // Note: We use a Json::Value so that we can add data-members to this class + // without a major version bump. + /** Configuration of this builder. + These are case-sensitive. + Available settings (case-sensitive): + - `"collectComments": false or true` + - true to collect comment and allow writing them + back during serialization, false to discard comments. + This parameter is ignored if allowComments is false. + - `"allowComments": false or true` + - true if comments are allowed. + - `"strictRoot": false or true` + - true if root must be either an array or an object value + - `"allowDroppedNullPlaceholders": false or true` + - true if dropped null placeholders are allowed. (See StreamWriterBuilder.) + - `"allowNumericKeys": false or true` + - true if numeric object keys are allowed. + - `"allowSingleQuotes": false or true` + - true if '' are allowed for strings (both keys and values) + - `"stackLimit": integer` + - Exceeding stackLimit (recursive depth of `readValue()`) will + cause an exception. + - This is a security issue (seg-faults caused by deeply nested JSON), + so the default is low. + - `"failIfExtra": false or true` + - If true, `parse()` returns false when extra non-whitespace trails + the JSON value in the input string. + - `"rejectDupKeys": false or true` + - If true, `parse()` returns false when a key is duplicated within an object. + + You can examine 'settings_` yourself + to see the defaults. You can also write and read them just like any + JSON Value. + \sa setDefaults() + */ + Json::Value settings_; + + CharReaderBuilder(); + virtual ~CharReaderBuilder(); + + virtual CharReader* newCharReader() const; + + /** \return true if 'settings' are legal and consistent; + * otherwise, indicate bad settings via 'invalid'. + */ + bool validate(Json::Value* invalid) const; + + /** A simple way to update a specific setting. + */ + Value& operator[](std::string key); + + /** Called by ctor, but you can use this to reset settings_. + * \pre 'settings' != NULL (but Json::null is fine) + * \remark Defaults: + * \snippet src/lib_json/json_reader.cpp CharReaderBuilderDefaults + */ + static void setDefaults(Json::Value* settings); + /** Same as old Features::strictMode(). + * \pre 'settings' != NULL (but Json::null is fine) + * \remark Defaults: + * \snippet src/lib_json/json_reader.cpp CharReaderBuilderStrictMode + */ + static void strictMode(Json::Value* settings); +}; + +/** Consume entire stream and use its begin/end. + * Someday we might have a real StreamReader, but for now this + * is convenient. + */ +bool JSON_API parseFromStream( + CharReader::Factory const&, + std::istream&, + Value* root, std::string* errs); + +/** \brief Read from 'sin' into 'root'. + + Always keep comments from the input JSON. + + This can be used to read a file into a particular sub-object. + For example: + \code + Json::Value root; + cin >> root["dir"]["file"]; + cout << root; + \endcode + Result: + \verbatim + { + "dir": { + "file": { + // The input stream JSON would be nested here. + } + } + } + \endverbatim + \throw std::exception on parse error. + \see Json::operator<<() +*/ +JSON_API std::istream& operator>>(std::istream&, Value&); + +} // namespace Json + +#if defined(JSONCPP_DISABLE_DLL_INTERFACE_WARNING) +#pragma warning(pop) +#endif // if defined(JSONCPP_DISABLE_DLL_INTERFACE_WARNING) + +#endif // CPPTL_JSON_READER_H_INCLUDED + +// ////////////////////////////////////////////////////////////////////// +// End of content of file: include/json/reader.h +// ////////////////////////////////////////////////////////////////////// + + + + + + +// ////////////////////////////////////////////////////////////////////// +// Beginning of content of file: include/json/writer.h +// ////////////////////////////////////////////////////////////////////// + +// Copyright 2007-2010 Baptiste Lepilleur +// Distributed under MIT license, or public domain if desired and +// recognized in your jurisdiction. +// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE + +#ifndef JSON_WRITER_H_INCLUDED +#define JSON_WRITER_H_INCLUDED + +#if !defined(JSON_IS_AMALGAMATION) +#include "value.h" +#endif // if !defined(JSON_IS_AMALGAMATION) +#include +#include +#include + +// Disable warning C4251: : needs to have dll-interface to +// be used by... +#if defined(JSONCPP_DISABLE_DLL_INTERFACE_WARNING) +#pragma warning(push) +#pragma warning(disable : 4251) +#endif // if defined(JSONCPP_DISABLE_DLL_INTERFACE_WARNING) + +namespace Json { + +class Value; + +/** + +Usage: +\code + using namespace Json; + void writeToStdout(StreamWriter::Factory const& factory, Value const& value) { + std::unique_ptr const writer( + factory.newStreamWriter()); + writer->write(value, &std::cout); + std::cout << std::endl; // add lf and flush + } +\endcode +*/ +class JSON_API StreamWriter { +protected: + std::ostream* sout_; // not owned; will not delete +public: + StreamWriter(); + virtual ~StreamWriter(); + /** Write Value into document as configured in sub-class. + Do not take ownership of sout, but maintain a reference during function. + \pre sout != NULL + \return zero on success (For now, we always return zero, so check the stream instead.) + \throw std::exception possibly, depending on configuration + */ + virtual int write(Value const& root, std::ostream* sout) = 0; + + /** \brief A simple abstract factory. + */ + class JSON_API Factory { + public: + virtual ~Factory(); + /** \brief Allocate a CharReader via operator new(). + * \throw std::exception if something goes wrong (e.g. invalid settings) + */ + virtual StreamWriter* newStreamWriter() const = 0; + }; // Factory +}; // StreamWriter + +/** \brief Write into stringstream, then return string, for convenience. + * A StreamWriter will be created from the factory, used, and then deleted. + */ +std::string JSON_API writeString(StreamWriter::Factory const& factory, Value const& root); + + +/** \brief Build a StreamWriter implementation. + +Usage: +\code + using namespace Json; + Value value = ...; + StreamWriterBuilder builder; + builder["commentStyle"] = "None"; + builder["indentation"] = " "; // or whatever you like + std::unique_ptr writer( + builder.newStreamWriter()); + writer->write(value, &std::cout); + std::cout << std::endl; // add lf and flush +\endcode +*/ +class JSON_API StreamWriterBuilder : public StreamWriter::Factory { +public: + // Note: We use a Json::Value so that we can add data-members to this class + // without a major version bump. + /** Configuration of this builder. + Available settings (case-sensitive): + - "commentStyle": "None" or "All" + - "indentation": "" + - "enableYAMLCompatibility": false or true + - slightly change the whitespace around colons + - "dropNullPlaceholders": false or true + - Drop the "null" string from the writer's output for nullValues. + Strictly speaking, this is not valid JSON. But when the output is being + fed to a browser's Javascript, it makes for smaller output and the + browser can handle the output just fine. + + You can examine 'settings_` yourself + to see the defaults. You can also write and read them just like any + JSON Value. + \sa setDefaults() + */ + Json::Value settings_; + + StreamWriterBuilder(); + virtual ~StreamWriterBuilder(); + + /** + * \throw std::exception if something goes wrong (e.g. invalid settings) + */ + virtual StreamWriter* newStreamWriter() const; + + /** \return true if 'settings' are legal and consistent; + * otherwise, indicate bad settings via 'invalid'. + */ + bool validate(Json::Value* invalid) const; + /** A simple way to update a specific setting. + */ + Value& operator[](std::string key); + + /** Called by ctor, but you can use this to reset settings_. + * \pre 'settings' != NULL (but Json::null is fine) + * \remark Defaults: + * \snippet src/lib_json/json_writer.cpp StreamWriterBuilderDefaults + */ + static void setDefaults(Json::Value* settings); +}; + +/** \brief Abstract class for writers. + * \deprecated Use StreamWriter. (And really, this is an implementation detail.) + */ +class JSON_API Writer { +public: + virtual ~Writer(); + + virtual std::string write(const Value& root) = 0; +}; + +/** \brief Outputs a Value in JSON format + *without formatting (not human friendly). + * + * The JSON document is written in a single line. It is not intended for 'human' + *consumption, + * but may be usefull to support feature such as RPC where bandwith is limited. + * \sa Reader, Value + * \deprecated Use StreamWriterBuilder. + */ +class JSON_API FastWriter : public Writer { + +public: + FastWriter(); + virtual ~FastWriter() {} + + void enableYAMLCompatibility(); + + /** \brief Drop the "null" string from the writer's output for nullValues. + * Strictly speaking, this is not valid JSON. But when the output is being + * fed to a browser's Javascript, it makes for smaller output and the + * browser can handle the output just fine. + */ + void dropNullPlaceholders(); + + void omitEndingLineFeed(); + +public: // overridden from Writer + virtual std::string write(const Value& root); + +private: + void writeValue(const Value& value); + + std::string document_; + bool yamlCompatiblityEnabled_; + bool dropNullPlaceholders_; + bool omitEndingLineFeed_; +}; + +/** \brief Writes a Value in JSON format in a + *human friendly way. + * + * The rules for line break and indent are as follow: + * - Object value: + * - if empty then print {} without indent and line break + * - if not empty the print '{', line break & indent, print one value per + *line + * and then unindent and line break and print '}'. + * - Array value: + * - if empty then print [] without indent and line break + * - if the array contains no object value, empty array or some other value + *types, + * and all the values fit on one lines, then print the array on a single + *line. + * - otherwise, it the values do not fit on one line, or the array contains + * object or non empty array, then print one value per line. + * + * If the Value have comments then they are outputed according to their + *#CommentPlacement. + * + * \sa Reader, Value, Value::setComment() + * \deprecated Use StreamWriterBuilder. + */ +class JSON_API StyledWriter : public Writer { +public: + StyledWriter(); + virtual ~StyledWriter() {} + +public: // overridden from Writer + /** \brief Serialize a Value in JSON format. + * \param root Value to serialize. + * \return String containing the JSON document that represents the root value. + */ + virtual std::string write(const Value& root); + +private: + void writeValue(const Value& value); + void writeArrayValue(const Value& value); + bool isMultineArray(const Value& value); + void pushValue(const std::string& value); + void writeIndent(); + void writeWithIndent(const std::string& value); + void indent(); + void unindent(); + void writeCommentBeforeValue(const Value& root); + void writeCommentAfterValueOnSameLine(const Value& root); + bool hasCommentForValue(const Value& value); + static std::string normalizeEOL(const std::string& text); + + typedef std::vector ChildValues; + + ChildValues childValues_; + std::string document_; + std::string indentString_; + int rightMargin_; + int indentSize_; + bool addChildValues_; +}; + +/** \brief Writes a Value in JSON format in a + human friendly way, + to a stream rather than to a string. + * + * The rules for line break and indent are as follow: + * - Object value: + * - if empty then print {} without indent and line break + * - if not empty the print '{', line break & indent, print one value per + line + * and then unindent and line break and print '}'. + * - Array value: + * - if empty then print [] without indent and line break + * - if the array contains no object value, empty array or some other value + types, + * and all the values fit on one lines, then print the array on a single + line. + * - otherwise, it the values do not fit on one line, or the array contains + * object or non empty array, then print one value per line. + * + * If the Value have comments then they are outputed according to their + #CommentPlacement. + * + * \param indentation Each level will be indented by this amount extra. + * \sa Reader, Value, Value::setComment() + * \deprecated Use StreamWriterBuilder. + */ +class JSON_API StyledStreamWriter { +public: + StyledStreamWriter(std::string indentation = "\t"); + ~StyledStreamWriter() {} + +public: + /** \brief Serialize a Value in JSON format. + * \param out Stream to write to. (Can be ostringstream, e.g.) + * \param root Value to serialize. + * \note There is no point in deriving from Writer, since write() should not + * return a value. + */ + void write(std::ostream& out, const Value& root); + +private: + void writeValue(const Value& value); + void writeArrayValue(const Value& value); + bool isMultineArray(const Value& value); + void pushValue(const std::string& value); + void writeIndent(); + void writeWithIndent(const std::string& value); + void indent(); + void unindent(); + void writeCommentBeforeValue(const Value& root); + void writeCommentAfterValueOnSameLine(const Value& root); + bool hasCommentForValue(const Value& value); + static std::string normalizeEOL(const std::string& text); + + typedef std::vector ChildValues; + + ChildValues childValues_; + std::ostream* document_; + std::string indentString_; + int rightMargin_; + std::string indentation_; + bool addChildValues_ : 1; + bool indented_ : 1; +}; + +#if defined(JSON_HAS_INT64) +std::string JSON_API valueToString(Int value); +std::string JSON_API valueToString(UInt value); +#endif // if defined(JSON_HAS_INT64) +std::string JSON_API valueToString(LargestInt value); +std::string JSON_API valueToString(LargestUInt value); +std::string JSON_API valueToString(double value); +std::string JSON_API valueToString(bool value); +std::string JSON_API valueToQuotedString(const char* value); + +/// \brief Output using the StyledStreamWriter. +/// \see Json::operator>>() +JSON_API std::ostream& operator<<(std::ostream&, const Value& root); + +} // namespace Json + +#if defined(JSONCPP_DISABLE_DLL_INTERFACE_WARNING) +#pragma warning(pop) +#endif // if defined(JSONCPP_DISABLE_DLL_INTERFACE_WARNING) + +#endif // JSON_WRITER_H_INCLUDED + +// ////////////////////////////////////////////////////////////////////// +// End of content of file: include/json/writer.h +// ////////////////////////////////////////////////////////////////////// + + + + + + +// ////////////////////////////////////////////////////////////////////// +// Beginning of content of file: include/json/assertions.h +// ////////////////////////////////////////////////////////////////////// + +// Copyright 2007-2010 Baptiste Lepilleur +// Distributed under MIT license, or public domain if desired and +// recognized in your jurisdiction. +// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE + +#ifndef CPPTL_JSON_ASSERTIONS_H_INCLUDED +#define CPPTL_JSON_ASSERTIONS_H_INCLUDED + +#include +#include + +#if !defined(JSON_IS_AMALGAMATION) +#include "config.h" +#endif // if !defined(JSON_IS_AMALGAMATION) + +/** It should not be possible for a maliciously designed file to + * cause an abort() or seg-fault, so these macros are used only + * for pre-condition violations and internal logic errors. + */ +#if JSON_USE_EXCEPTION + +// @todo <= add detail about condition in exception +# define JSON_ASSERT(condition) \ + {if (!(condition)) {Json::throwLogicError( "assert json failed" );}} + +# define JSON_FAIL_MESSAGE(message) \ + { \ + std::ostringstream oss; oss << message; \ + Json::throwLogicError(oss.str()); \ + abort(); \ + } + +#else // JSON_USE_EXCEPTION + +# define JSON_ASSERT(condition) assert(condition) + +// The call to assert() will show the failure message in debug builds. In +// release builds we abort, for a core-dump or debugger. +# define JSON_FAIL_MESSAGE(message) \ + { \ + std::ostringstream oss; oss << message; \ + assert(false && oss.str().c_str()); \ + abort(); \ + } + + +#endif + +#define JSON_ASSERT_MESSAGE(condition, message) \ + if (!(condition)) { \ + JSON_FAIL_MESSAGE(message); \ + } + +#endif // CPPTL_JSON_ASSERTIONS_H_INCLUDED + +// ////////////////////////////////////////////////////////////////////// +// End of content of file: include/json/assertions.h +// ////////////////////////////////////////////////////////////////////// + + + + + +#endif //ifndef JSON_AMALGATED_H_INCLUDED diff --git a/applications/join/jsoncpp.cpp b/applications/join/jsoncpp.cpp new file mode 100644 index 000000000..ac73f831a --- /dev/null +++ b/applications/join/jsoncpp.cpp @@ -0,0 +1,5122 @@ +/// Json-cpp amalgated source (http://jsoncpp.sourceforge.net/). +/// It is intended to be used with #include "json/json.h" + +// ////////////////////////////////////////////////////////////////////// +// Beginning of content of file: LICENSE +// ////////////////////////////////////////////////////////////////////// + +/* +The JsonCpp library's source code, including accompanying documentation, +tests and demonstration applications, are licensed under the following +conditions... + +The author (Baptiste Lepilleur) explicitly disclaims copyright in all +jurisdictions which recognize such a disclaimer. In such jurisdictions, +this software is released into the Public Domain. + +In jurisdictions which do not recognize Public Domain property (e.g. Germany as of +2010), this software is Copyright (c) 2007-2010 by Baptiste Lepilleur, and is +released under the terms of the MIT License (see below). + +In jurisdictions which recognize Public Domain property, the user of this +software may choose to accept it either as 1) Public Domain, 2) under the +conditions of the MIT License (see below), or 3) under the terms of dual +Public Domain/MIT License conditions described here, as they choose. + +The MIT License is about as close to Public Domain as a license can get, and is +described in clear, concise terms at: + + http://en.wikipedia.org/wiki/MIT_License + +The full text of the MIT License follows: + +======================================================================== +Copyright (c) 2007-2010 Baptiste Lepilleur + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, copy, +modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +======================================================================== +(END LICENSE TEXT) + +The MIT license is compatible with both the GPL and commercial +software, affording one all of the rights of Public Domain with the +minor nuisance of being required to keep the above copyright notice +and license text in the source code. Note also that by accepting the +Public Domain "license" you can re-license your copy using whatever +license you like. + +*/ + +// ////////////////////////////////////////////////////////////////////// +// End of content of file: LICENSE +// ////////////////////////////////////////////////////////////////////// + + + + + + +#include "json/json.h" + +#ifndef JSON_IS_AMALGAMATION +#error "Compile with -I PATH_TO_JSON_DIRECTORY" +#endif + + +// ////////////////////////////////////////////////////////////////////// +// Beginning of content of file: src/lib_json/json_tool.h +// ////////////////////////////////////////////////////////////////////// + +// Copyright 2007-2010 Baptiste Lepilleur +// Distributed under MIT license, or public domain if desired and +// recognized in your jurisdiction. +// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE + +#ifndef LIB_JSONCPP_JSON_TOOL_H_INCLUDED +#define LIB_JSONCPP_JSON_TOOL_H_INCLUDED + +/* This header provides common string manipulation support, such as UTF-8, + * portable conversion from/to string... + * + * It is an internal header that must not be exposed. + */ + +namespace Json { + +/// Converts a unicode code-point to UTF-8. +static inline std::string codePointToUTF8(unsigned int cp) { + std::string result; + + // based on description from http://en.wikipedia.org/wiki/UTF-8 + + if (cp <= 0x7f) { + result.resize(1); + result[0] = static_cast(cp); + } else if (cp <= 0x7FF) { + result.resize(2); + result[1] = static_cast(0x80 | (0x3f & cp)); + result[0] = static_cast(0xC0 | (0x1f & (cp >> 6))); + } else if (cp <= 0xFFFF) { + result.resize(3); + result[2] = static_cast(0x80 | (0x3f & cp)); + result[1] = 0x80 | static_cast((0x3f & (cp >> 6))); + result[0] = 0xE0 | static_cast((0xf & (cp >> 12))); + } else if (cp <= 0x10FFFF) { + result.resize(4); + result[3] = static_cast(0x80 | (0x3f & cp)); + result[2] = static_cast(0x80 | (0x3f & (cp >> 6))); + result[1] = static_cast(0x80 | (0x3f & (cp >> 12))); + result[0] = static_cast(0xF0 | (0x7 & (cp >> 18))); + } + + return result; +} + +/// Returns true if ch is a control character (in range [0,32[). +static inline bool isControlCharacter(char ch) { return ch > 0 && ch <= 0x1F; } + +enum { + /// Constant that specify the size of the buffer that must be passed to + /// uintToString. + uintToStringBufferSize = 3 * sizeof(LargestUInt) + 1 +}; + +// Defines a char buffer for use with uintToString(). +typedef char UIntToStringBuffer[uintToStringBufferSize]; + +/** Converts an unsigned integer to string. + * @param value Unsigned interger to convert to string + * @param current Input/Output string buffer. + * Must have at least uintToStringBufferSize chars free. + */ +static inline void uintToString(LargestUInt value, char*& current) { + *--current = 0; + do { + *--current = char(value % 10) + '0'; + value /= 10; + } while (value != 0); +} + +/** Change ',' to '.' everywhere in buffer. + * + * We had a sophisticated way, but it did not work in WinCE. + * @see https://github.com/open-source-parsers/jsoncpp/pull/9 + */ +static inline void fixNumericLocale(char* begin, char* end) { + while (begin < end) { + if (*begin == ',') { + *begin = '.'; + } + ++begin; + } +} + +} // namespace Json { + +#endif // LIB_JSONCPP_JSON_TOOL_H_INCLUDED + +// ////////////////////////////////////////////////////////////////////// +// End of content of file: src/lib_json/json_tool.h +// ////////////////////////////////////////////////////////////////////// + + + + + + +// ////////////////////////////////////////////////////////////////////// +// Beginning of content of file: src/lib_json/json_reader.cpp +// ////////////////////////////////////////////////////////////////////// + +// Copyright 2007-2011 Baptiste Lepilleur +// Distributed under MIT license, or public domain if desired and +// recognized in your jurisdiction. +// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE + +#if !defined(JSON_IS_AMALGAMATION) +#include +#include +#include +#include "json_tool.h" +#endif // if !defined(JSON_IS_AMALGAMATION) +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(_MSC_VER) && _MSC_VER < 1500 // VC++ 8.0 and below +#define snprintf _snprintf +#endif + +#if defined(_MSC_VER) && _MSC_VER >= 1400 // VC++ 8.0 +// Disable warning about strdup being deprecated. +#pragma warning(disable : 4996) +#endif + +static int const stackLimit_g = 1000; +static int stackDepth_g = 0; // see readValue() + +namespace Json { + +#if __cplusplus >= 201103L +typedef std::unique_ptr CharReaderPtr; +#else +typedef std::auto_ptr CharReaderPtr; +#endif + +// Implementation of class Features +// //////////////////////////////// + +Features::Features() + : allowComments_(true), strictRoot_(false), + allowDroppedNullPlaceholders_(false), allowNumericKeys_(false) {} + +Features Features::all() { return Features(); } + +Features Features::strictMode() { + Features features; + features.allowComments_ = false; + features.strictRoot_ = true; + features.allowDroppedNullPlaceholders_ = false; + features.allowNumericKeys_ = false; + return features; +} + +// Implementation of class Reader +// //////////////////////////////// + +static bool containsNewLine(Reader::Location begin, Reader::Location end) { + for (; begin < end; ++begin) + if (*begin == '\n' || *begin == '\r') + return true; + return false; +} + +// Class Reader +// ////////////////////////////////////////////////////////////////// + +Reader::Reader() + : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(), + lastValue_(), commentsBefore_(), features_(Features::all()), + collectComments_() {} + +Reader::Reader(const Features& features) + : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(), + lastValue_(), commentsBefore_(), features_(features), collectComments_() { +} + +bool +Reader::parse(const std::string& document, Value& root, bool collectComments) { + document_ = document; + const char* begin = document_.c_str(); + const char* end = begin + document_.length(); + return parse(begin, end, root, collectComments); +} + +bool Reader::parse(std::istream& sin, Value& root, bool collectComments) { + // std::istream_iterator begin(sin); + // std::istream_iterator end; + // Those would allow streamed input from a file, if parse() were a + // template function. + + // Since std::string is reference-counted, this at least does not + // create an extra copy. + std::string doc; + std::getline(sin, doc, (char)EOF); + return parse(doc, root, collectComments); +} + +bool Reader::parse(const char* beginDoc, + const char* endDoc, + Value& root, + bool collectComments) { + if (!features_.allowComments_) { + collectComments = false; + } + + begin_ = beginDoc; + end_ = endDoc; + collectComments_ = collectComments; + current_ = begin_; + lastValueEnd_ = 0; + lastValue_ = 0; + commentsBefore_ = ""; + errors_.clear(); + while (!nodes_.empty()) + nodes_.pop(); + nodes_.push(&root); + + stackDepth_g = 0; // Yes, this is bad coding, but options are limited. + bool successful = readValue(); + Token token; + skipCommentTokens(token); + if (collectComments_ && !commentsBefore_.empty()) + root.setComment(commentsBefore_, commentAfter); + if (features_.strictRoot_) { + if (!root.isArray() && !root.isObject()) { + // Set error location to start of doc, ideally should be first token found + // in doc + token.type_ = tokenError; + token.start_ = beginDoc; + token.end_ = endDoc; + addError( + "A valid JSON document must be either an array or an object value.", + token); + return false; + } + } + return successful; +} + +bool Reader::readValue() { + // This is a non-reentrant way to support a stackLimit. Terrible! + // But this deprecated class has a security problem: Bad input can + // cause a seg-fault. This seems like a fair, binary-compatible way + // to prevent the problem. + if (stackDepth_g >= stackLimit_g) throwRuntimeError("Exceeded stackLimit in readValue()."); + ++stackDepth_g; + + Token token; + skipCommentTokens(token); + bool successful = true; + + if (collectComments_ && !commentsBefore_.empty()) { + currentValue().setComment(commentsBefore_, commentBefore); + commentsBefore_ = ""; + } + + switch (token.type_) { + case tokenObjectBegin: + successful = readObject(token); + currentValue().setOffsetLimit(current_ - begin_); + break; + case tokenArrayBegin: + successful = readArray(token); + currentValue().setOffsetLimit(current_ - begin_); + break; + case tokenNumber: + successful = decodeNumber(token); + break; + case tokenString: + successful = decodeString(token); + break; + case tokenTrue: + { + Value v(true); + currentValue().swapPayload(v); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + } + break; + case tokenFalse: + { + Value v(false); + currentValue().swapPayload(v); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + } + break; + case tokenNull: + { + Value v; + currentValue().swapPayload(v); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + } + break; + case tokenArraySeparator: + case tokenObjectEnd: + case tokenArrayEnd: + if (features_.allowDroppedNullPlaceholders_) { + // "Un-read" the current token and mark the current value as a null + // token. + current_--; + Value v; + currentValue().swapPayload(v); + currentValue().setOffsetStart(current_ - begin_ - 1); + currentValue().setOffsetLimit(current_ - begin_); + break; + } // Else, fall through... + default: + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + return addError("Syntax error: value, object or array expected.", token); + } + + if (collectComments_) { + lastValueEnd_ = current_; + lastValue_ = ¤tValue(); + } + + --stackDepth_g; + return successful; +} + +void Reader::skipCommentTokens(Token& token) { + if (features_.allowComments_) { + do { + readToken(token); + } while (token.type_ == tokenComment); + } else { + readToken(token); + } +} + +bool Reader::readToken(Token& token) { + skipSpaces(); + token.start_ = current_; + Char c = getNextChar(); + bool ok = true; + switch (c) { + case '{': + token.type_ = tokenObjectBegin; + break; + case '}': + token.type_ = tokenObjectEnd; + break; + case '[': + token.type_ = tokenArrayBegin; + break; + case ']': + token.type_ = tokenArrayEnd; + break; + case '"': + token.type_ = tokenString; + ok = readString(); + break; + case '/': + token.type_ = tokenComment; + ok = readComment(); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '-': + token.type_ = tokenNumber; + readNumber(); + break; + case 't': + token.type_ = tokenTrue; + ok = match("rue", 3); + break; + case 'f': + token.type_ = tokenFalse; + ok = match("alse", 4); + break; + case 'n': + token.type_ = tokenNull; + ok = match("ull", 3); + break; + case ',': + token.type_ = tokenArraySeparator; + break; + case ':': + token.type_ = tokenMemberSeparator; + break; + case 0: + token.type_ = tokenEndOfStream; + break; + default: + ok = false; + break; + } + if (!ok) + token.type_ = tokenError; + token.end_ = current_; + return true; +} + +void Reader::skipSpaces() { + while (current_ != end_) { + Char c = *current_; + if (c == ' ' || c == '\t' || c == '\r' || c == '\n') + ++current_; + else + break; + } +} + +bool Reader::match(Location pattern, int patternLength) { + if (end_ - current_ < patternLength) + return false; + int index = patternLength; + while (index--) + if (current_[index] != pattern[index]) + return false; + current_ += patternLength; + return true; +} + +bool Reader::readComment() { + Location commentBegin = current_ - 1; + Char c = getNextChar(); + bool successful = false; + if (c == '*') + successful = readCStyleComment(); + else if (c == '/') + successful = readCppStyleComment(); + if (!successful) + return false; + + if (collectComments_) { + CommentPlacement placement = commentBefore; + if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) { + if (c != '*' || !containsNewLine(commentBegin, current_)) + placement = commentAfterOnSameLine; + } + + addComment(commentBegin, current_, placement); + } + return true; +} + +static std::string normalizeEOL(Reader::Location begin, Reader::Location end) { + std::string normalized; + normalized.reserve(end - begin); + Reader::Location current = begin; + while (current != end) { + char c = *current++; + if (c == '\r') { + if (current != end && *current == '\n') + // convert dos EOL + ++current; + // convert Mac EOL + normalized += '\n'; + } else { + normalized += c; + } + } + return normalized; +} + +void +Reader::addComment(Location begin, Location end, CommentPlacement placement) { + assert(collectComments_); + const std::string& normalized = normalizeEOL(begin, end); + if (placement == commentAfterOnSameLine) { + assert(lastValue_ != 0); + lastValue_->setComment(normalized, placement); + } else { + commentsBefore_ += normalized; + } +} + +bool Reader::readCStyleComment() { + while (current_ != end_) { + Char c = getNextChar(); + if (c == '*' && *current_ == '/') + break; + } + return getNextChar() == '/'; +} + +bool Reader::readCppStyleComment() { + while (current_ != end_) { + Char c = getNextChar(); + if (c == '\n') + break; + if (c == '\r') { + // Consume DOS EOL. It will be normalized in addComment. + if (current_ != end_ && *current_ == '\n') + getNextChar(); + // Break on Moc OS 9 EOL. + break; + } + } + return true; +} + +void Reader::readNumber() { + const char *p = current_; + char c = '0'; // stopgap for already consumed character + // integral part + while (c >= '0' && c <= '9') + c = (current_ = p) < end_ ? *p++ : 0; + // fractional part + if (c == '.') { + c = (current_ = p) < end_ ? *p++ : 0; + while (c >= '0' && c <= '9') + c = (current_ = p) < end_ ? *p++ : 0; + } + // exponential part + if (c == 'e' || c == 'E') { + c = (current_ = p) < end_ ? *p++ : 0; + if (c == '+' || c == '-') + c = (current_ = p) < end_ ? *p++ : 0; + while (c >= '0' && c <= '9') + c = (current_ = p) < end_ ? *p++ : 0; + } +} + +bool Reader::readString() { + Char c = 0; + while (current_ != end_) { + c = getNextChar(); + if (c == '\\') + getNextChar(); + else if (c == '"') + break; + } + return c == '"'; +} + +bool Reader::readObject(Token& tokenStart) { + Token tokenName; + std::string name; + Value init(objectValue); + currentValue().swapPayload(init); + currentValue().setOffsetStart(tokenStart.start_ - begin_); + while (readToken(tokenName)) { + bool initialTokenOk = true; + while (tokenName.type_ == tokenComment && initialTokenOk) + initialTokenOk = readToken(tokenName); + if (!initialTokenOk) + break; + if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object + return true; + name = ""; + if (tokenName.type_ == tokenString) { + if (!decodeString(tokenName, name)) + return recoverFromError(tokenObjectEnd); + } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) { + Value numberName; + if (!decodeNumber(tokenName, numberName)) + return recoverFromError(tokenObjectEnd); + name = numberName.asString(); + } else { + break; + } + + Token colon; + if (!readToken(colon) || colon.type_ != tokenMemberSeparator) { + return addErrorAndRecover( + "Missing ':' after object member name", colon, tokenObjectEnd); + } + Value& value = currentValue()[name]; + nodes_.push(&value); + bool ok = readValue(); + nodes_.pop(); + if (!ok) // error already set + return recoverFromError(tokenObjectEnd); + + Token comma; + if (!readToken(comma) || + (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator && + comma.type_ != tokenComment)) { + return addErrorAndRecover( + "Missing ',' or '}' in object declaration", comma, tokenObjectEnd); + } + bool finalizeTokenOk = true; + while (comma.type_ == tokenComment && finalizeTokenOk) + finalizeTokenOk = readToken(comma); + if (comma.type_ == tokenObjectEnd) + return true; + } + return addErrorAndRecover( + "Missing '}' or object member name", tokenName, tokenObjectEnd); +} + +bool Reader::readArray(Token& tokenStart) { + Value init(arrayValue); + currentValue().swapPayload(init); + currentValue().setOffsetStart(tokenStart.start_ - begin_); + skipSpaces(); + if (*current_ == ']') // empty array + { + Token endArray; + readToken(endArray); + return true; + } + int index = 0; + for (;;) { + Value& value = currentValue()[index++]; + nodes_.push(&value); + bool ok = readValue(); + nodes_.pop(); + if (!ok) // error already set + return recoverFromError(tokenArrayEnd); + + Token token; + // Accept Comment after last item in the array. + ok = readToken(token); + while (token.type_ == tokenComment && ok) { + ok = readToken(token); + } + bool badTokenType = + (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd); + if (!ok || badTokenType) { + return addErrorAndRecover( + "Missing ',' or ']' in array declaration", token, tokenArrayEnd); + } + if (token.type_ == tokenArrayEnd) + break; + } + return true; +} + +bool Reader::decodeNumber(Token& token) { + Value decoded; + if (!decodeNumber(token, decoded)) + return false; + currentValue().swapPayload(decoded); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + return true; +} + +bool Reader::decodeNumber(Token& token, Value& decoded) { + // Attempts to parse the number as an integer. If the number is + // larger than the maximum supported value of an integer then + // we decode the number as a double. + Location current = token.start_; + bool isNegative = *current == '-'; + if (isNegative) + ++current; + // TODO: Help the compiler do the div and mod at compile time or get rid of them. + Value::LargestUInt maxIntegerValue = + isNegative ? Value::LargestUInt(-Value::minLargestInt) + : Value::maxLargestUInt; + Value::LargestUInt threshold = maxIntegerValue / 10; + Value::LargestUInt value = 0; + while (current < token.end_) { + Char c = *current++; + if (c < '0' || c > '9') + return decodeDouble(token, decoded); + Value::UInt digit(c - '0'); + if (value >= threshold) { + // We've hit or exceeded the max value divided by 10 (rounded down). If + // a) we've only just touched the limit, b) this is the last digit, and + // c) it's small enough to fit in that rounding delta, we're okay. + // Otherwise treat this number as a double to avoid overflow. + if (value > threshold || current != token.end_ || + digit > maxIntegerValue % 10) { + return decodeDouble(token, decoded); + } + } + value = value * 10 + digit; + } + if (isNegative) + decoded = -Value::LargestInt(value); + else if (value <= Value::LargestUInt(Value::maxInt)) + decoded = Value::LargestInt(value); + else + decoded = value; + return true; +} + +bool Reader::decodeDouble(Token& token) { + Value decoded; + if (!decodeDouble(token, decoded)) + return false; + currentValue().swapPayload(decoded); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + return true; +} + +bool Reader::decodeDouble(Token& token, Value& decoded) { + double value = 0; + const int bufferSize = 32; + int count; + int length = int(token.end_ - token.start_); + + // Sanity check to avoid buffer overflow exploits. + if (length < 0) { + return addError("Unable to parse token length", token); + } + + // Avoid using a string constant for the format control string given to + // sscanf, as this can cause hard to debug crashes on OS X. See here for more + // info: + // + // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html + char format[] = "%lf"; + + if (length <= bufferSize) { + Char buffer[bufferSize + 1]; + memcpy(buffer, token.start_, length); + buffer[length] = 0; + count = sscanf(buffer, format, &value); + } else { + std::string buffer(token.start_, token.end_); + count = sscanf(buffer.c_str(), format, &value); + } + + if (count != 1) + return addError("'" + std::string(token.start_, token.end_) + + "' is not a number.", + token); + decoded = value; + return true; +} + +bool Reader::decodeString(Token& token) { + std::string decoded_string; + if (!decodeString(token, decoded_string)) + return false; + Value decoded(decoded_string); + currentValue().swapPayload(decoded); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + return true; +} + +bool Reader::decodeString(Token& token, std::string& decoded) { + decoded.reserve(token.end_ - token.start_ - 2); + Location current = token.start_ + 1; // skip '"' + Location end = token.end_ - 1; // do not include '"' + while (current != end) { + Char c = *current++; + if (c == '"') + break; + else if (c == '\\') { + if (current == end) + return addError("Empty escape sequence in string", token, current); + Char escape = *current++; + switch (escape) { + case '"': + decoded += '"'; + break; + case '/': + decoded += '/'; + break; + case '\\': + decoded += '\\'; + break; + case 'b': + decoded += '\b'; + break; + case 'f': + decoded += '\f'; + break; + case 'n': + decoded += '\n'; + break; + case 'r': + decoded += '\r'; + break; + case 't': + decoded += '\t'; + break; + case 'u': { + unsigned int unicode; + if (!decodeUnicodeCodePoint(token, current, end, unicode)) + return false; + decoded += codePointToUTF8(unicode); + } break; + default: + return addError("Bad escape sequence in string", token, current); + } + } else { + decoded += c; + } + } + return true; +} + +bool Reader::decodeUnicodeCodePoint(Token& token, + Location& current, + Location end, + unsigned int& unicode) { + + if (!decodeUnicodeEscapeSequence(token, current, end, unicode)) + return false; + if (unicode >= 0xD800 && unicode <= 0xDBFF) { + // surrogate pairs + if (end - current < 6) + return addError( + "additional six characters expected to parse unicode surrogate pair.", + token, + current); + unsigned int surrogatePair; + if (*(current++) == '\\' && *(current++) == 'u') { + if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) { + unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF); + } else + return false; + } else + return addError("expecting another \\u token to begin the second half of " + "a unicode surrogate pair", + token, + current); + } + return true; +} + +bool Reader::decodeUnicodeEscapeSequence(Token& token, + Location& current, + Location end, + unsigned int& unicode) { + if (end - current < 4) + return addError( + "Bad unicode escape sequence in string: four digits expected.", + token, + current); + unicode = 0; + for (int index = 0; index < 4; ++index) { + Char c = *current++; + unicode *= 16; + if (c >= '0' && c <= '9') + unicode += c - '0'; + else if (c >= 'a' && c <= 'f') + unicode += c - 'a' + 10; + else if (c >= 'A' && c <= 'F') + unicode += c - 'A' + 10; + else + return addError( + "Bad unicode escape sequence in string: hexadecimal digit expected.", + token, + current); + } + return true; +} + +bool +Reader::addError(const std::string& message, Token& token, Location extra) { + ErrorInfo info; + info.token_ = token; + info.message_ = message; + info.extra_ = extra; + errors_.push_back(info); + return false; +} + +bool Reader::recoverFromError(TokenType skipUntilToken) { + int errorCount = int(errors_.size()); + Token skip; + for (;;) { + if (!readToken(skip)) + errors_.resize(errorCount); // discard errors caused by recovery + if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream) + break; + } + errors_.resize(errorCount); + return false; +} + +bool Reader::addErrorAndRecover(const std::string& message, + Token& token, + TokenType skipUntilToken) { + addError(message, token); + return recoverFromError(skipUntilToken); +} + +Value& Reader::currentValue() { return *(nodes_.top()); } + +Reader::Char Reader::getNextChar() { + if (current_ == end_) + return 0; + return *current_++; +} + +void Reader::getLocationLineAndColumn(Location location, + int& line, + int& column) const { + Location current = begin_; + Location lastLineStart = current; + line = 0; + while (current < location && current != end_) { + Char c = *current++; + if (c == '\r') { + if (*current == '\n') + ++current; + lastLineStart = current; + ++line; + } else if (c == '\n') { + lastLineStart = current; + ++line; + } + } + // column & line start at 1 + column = int(location - lastLineStart) + 1; + ++line; +} + +std::string Reader::getLocationLineAndColumn(Location location) const { + int line, column; + getLocationLineAndColumn(location, line, column); + char buffer[18 + 16 + 16 + 1]; +#if defined(_MSC_VER) && defined(__STDC_SECURE_LIB__) +#if defined(WINCE) + _snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column); +#else + sprintf_s(buffer, sizeof(buffer), "Line %d, Column %d", line, column); +#endif +#else + snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column); +#endif + return buffer; +} + +// Deprecated. Preserved for backward compatibility +std::string Reader::getFormatedErrorMessages() const { + return getFormattedErrorMessages(); +} + +std::string Reader::getFormattedErrorMessages() const { + std::string formattedMessage; + for (Errors::const_iterator itError = errors_.begin(); + itError != errors_.end(); + ++itError) { + const ErrorInfo& error = *itError; + formattedMessage += + "* " + getLocationLineAndColumn(error.token_.start_) + "\n"; + formattedMessage += " " + error.message_ + "\n"; + if (error.extra_) + formattedMessage += + "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n"; + } + return formattedMessage; +} + +std::vector Reader::getStructuredErrors() const { + std::vector allErrors; + for (Errors::const_iterator itError = errors_.begin(); + itError != errors_.end(); + ++itError) { + const ErrorInfo& error = *itError; + Reader::StructuredError structured; + structured.offset_start = error.token_.start_ - begin_; + structured.offset_limit = error.token_.end_ - begin_; + structured.message = error.message_; + allErrors.push_back(structured); + } + return allErrors; +} + +bool Reader::pushError(const Value& value, const std::string& message) { + size_t length = end_ - begin_; + if(value.getOffsetStart() > length + || value.getOffsetLimit() > length) + return false; + Token token; + token.type_ = tokenError; + token.start_ = begin_ + value.getOffsetStart(); + token.end_ = end_ + value.getOffsetLimit(); + ErrorInfo info; + info.token_ = token; + info.message_ = message; + info.extra_ = 0; + errors_.push_back(info); + return true; +} + +bool Reader::pushError(const Value& value, const std::string& message, const Value& extra) { + size_t length = end_ - begin_; + if(value.getOffsetStart() > length + || value.getOffsetLimit() > length + || extra.getOffsetLimit() > length) + return false; + Token token; + token.type_ = tokenError; + token.start_ = begin_ + value.getOffsetStart(); + token.end_ = begin_ + value.getOffsetLimit(); + ErrorInfo info; + info.token_ = token; + info.message_ = message; + info.extra_ = begin_ + extra.getOffsetStart(); + errors_.push_back(info); + return true; +} + +bool Reader::good() const { + return !errors_.size(); +} + +// exact copy of Features +class OurFeatures { +public: + static OurFeatures all(); + OurFeatures(); + bool allowComments_; + bool strictRoot_; + bool allowDroppedNullPlaceholders_; + bool allowNumericKeys_; + bool allowSingleQuotes_; + bool failIfExtra_; + bool rejectDupKeys_; + int stackLimit_; +}; // OurFeatures + +// exact copy of Implementation of class Features +// //////////////////////////////// + +OurFeatures::OurFeatures() + : allowComments_(true), strictRoot_(false) + , allowDroppedNullPlaceholders_(false), allowNumericKeys_(false) + , allowSingleQuotes_(false) + , failIfExtra_(false) +{ +} + +OurFeatures OurFeatures::all() { return OurFeatures(); } + +// Implementation of class Reader +// //////////////////////////////// + +// exact copy of Reader, renamed to OurReader +class OurReader { +public: + typedef char Char; + typedef const Char* Location; + struct StructuredError { + size_t offset_start; + size_t offset_limit; + std::string message; + }; + + OurReader(OurFeatures const& features); + bool parse(const char* beginDoc, + const char* endDoc, + Value& root, + bool collectComments = true); + std::string getFormattedErrorMessages() const; + std::vector getStructuredErrors() const; + bool pushError(const Value& value, const std::string& message); + bool pushError(const Value& value, const std::string& message, const Value& extra); + bool good() const; + +private: + OurReader(OurReader const&); // no impl + void operator=(OurReader const&); // no impl + + enum TokenType { + tokenEndOfStream = 0, + tokenObjectBegin, + tokenObjectEnd, + tokenArrayBegin, + tokenArrayEnd, + tokenString, + tokenNumber, + tokenTrue, + tokenFalse, + tokenNull, + tokenArraySeparator, + tokenMemberSeparator, + tokenComment, + tokenError + }; + + class Token { + public: + TokenType type_; + Location start_; + Location end_; + }; + + class ErrorInfo { + public: + Token token_; + std::string message_; + Location extra_; + }; + + typedef std::deque Errors; + + bool readToken(Token& token); + void skipSpaces(); + bool match(Location pattern, int patternLength); + bool readComment(); + bool readCStyleComment(); + bool readCppStyleComment(); + bool readString(); + bool readStringSingleQuote(); + void readNumber(); + bool readValue(); + bool readObject(Token& token); + bool readArray(Token& token); + bool decodeNumber(Token& token); + bool decodeNumber(Token& token, Value& decoded); + bool decodeString(Token& token); + bool decodeString(Token& token, std::string& decoded); + bool decodeDouble(Token& token); + bool decodeDouble(Token& token, Value& decoded); + bool decodeUnicodeCodePoint(Token& token, + Location& current, + Location end, + unsigned int& unicode); + bool decodeUnicodeEscapeSequence(Token& token, + Location& current, + Location end, + unsigned int& unicode); + bool addError(const std::string& message, Token& token, Location extra = 0); + bool recoverFromError(TokenType skipUntilToken); + bool addErrorAndRecover(const std::string& message, + Token& token, + TokenType skipUntilToken); + void skipUntilSpace(); + Value& currentValue(); + Char getNextChar(); + void + getLocationLineAndColumn(Location location, int& line, int& column) const; + std::string getLocationLineAndColumn(Location location) const; + void addComment(Location begin, Location end, CommentPlacement placement); + void skipCommentTokens(Token& token); + + typedef std::stack Nodes; + Nodes nodes_; + Errors errors_; + std::string document_; + Location begin_; + Location end_; + Location current_; + Location lastValueEnd_; + Value* lastValue_; + std::string commentsBefore_; + int stackDepth_; + + OurFeatures const features_; + bool collectComments_; +}; // OurReader + +// complete copy of Read impl, for OurReader + +OurReader::OurReader(OurFeatures const& features) + : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(), + lastValue_(), commentsBefore_(), features_(features), collectComments_() { +} + +bool OurReader::parse(const char* beginDoc, + const char* endDoc, + Value& root, + bool collectComments) { + if (!features_.allowComments_) { + collectComments = false; + } + + begin_ = beginDoc; + end_ = endDoc; + collectComments_ = collectComments; + current_ = begin_; + lastValueEnd_ = 0; + lastValue_ = 0; + commentsBefore_ = ""; + errors_.clear(); + while (!nodes_.empty()) + nodes_.pop(); + nodes_.push(&root); + + stackDepth_ = 0; + bool successful = readValue(); + Token token; + skipCommentTokens(token); + if (features_.failIfExtra_) { + if (token.type_ != tokenError && token.type_ != tokenEndOfStream) { + addError("Extra non-whitespace after JSON value.", token); + return false; + } + } + if (collectComments_ && !commentsBefore_.empty()) + root.setComment(commentsBefore_, commentAfter); + if (features_.strictRoot_) { + if (!root.isArray() && !root.isObject()) { + // Set error location to start of doc, ideally should be first token found + // in doc + token.type_ = tokenError; + token.start_ = beginDoc; + token.end_ = endDoc; + addError( + "A valid JSON document must be either an array or an object value.", + token); + return false; + } + } + return successful; +} + +bool OurReader::readValue() { + if (stackDepth_ >= features_.stackLimit_) throwRuntimeError("Exceeded stackLimit in readValue()."); + ++stackDepth_; + Token token; + skipCommentTokens(token); + bool successful = true; + + if (collectComments_ && !commentsBefore_.empty()) { + currentValue().setComment(commentsBefore_, commentBefore); + commentsBefore_ = ""; + } + + switch (token.type_) { + case tokenObjectBegin: + successful = readObject(token); + currentValue().setOffsetLimit(current_ - begin_); + break; + case tokenArrayBegin: + successful = readArray(token); + currentValue().setOffsetLimit(current_ - begin_); + break; + case tokenNumber: + successful = decodeNumber(token); + break; + case tokenString: + successful = decodeString(token); + break; + case tokenTrue: + { + Value v(true); + currentValue().swapPayload(v); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + } + break; + case tokenFalse: + { + Value v(false); + currentValue().swapPayload(v); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + } + break; + case tokenNull: + { + Value v; + currentValue().swapPayload(v); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + } + break; + case tokenArraySeparator: + case tokenObjectEnd: + case tokenArrayEnd: + if (features_.allowDroppedNullPlaceholders_) { + // "Un-read" the current token and mark the current value as a null + // token. + current_--; + Value v; + currentValue().swapPayload(v); + currentValue().setOffsetStart(current_ - begin_ - 1); + currentValue().setOffsetLimit(current_ - begin_); + break; + } // else, fall through ... + default: + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + return addError("Syntax error: value, object or array expected.", token); + } + + if (collectComments_) { + lastValueEnd_ = current_; + lastValue_ = ¤tValue(); + } + + --stackDepth_; + return successful; +} + +void OurReader::skipCommentTokens(Token& token) { + if (features_.allowComments_) { + do { + readToken(token); + } while (token.type_ == tokenComment); + } else { + readToken(token); + } +} + +bool OurReader::readToken(Token& token) { + skipSpaces(); + token.start_ = current_; + Char c = getNextChar(); + bool ok = true; + switch (c) { + case '{': + token.type_ = tokenObjectBegin; + break; + case '}': + token.type_ = tokenObjectEnd; + break; + case '[': + token.type_ = tokenArrayBegin; + break; + case ']': + token.type_ = tokenArrayEnd; + break; + case '"': + token.type_ = tokenString; + ok = readString(); + break; + case '\'': + if (features_.allowSingleQuotes_) { + token.type_ = tokenString; + ok = readStringSingleQuote(); + break; + } // else continue + case '/': + token.type_ = tokenComment; + ok = readComment(); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '-': + token.type_ = tokenNumber; + readNumber(); + break; + case 't': + token.type_ = tokenTrue; + ok = match("rue", 3); + break; + case 'f': + token.type_ = tokenFalse; + ok = match("alse", 4); + break; + case 'n': + token.type_ = tokenNull; + ok = match("ull", 3); + break; + case ',': + token.type_ = tokenArraySeparator; + break; + case ':': + token.type_ = tokenMemberSeparator; + break; + case 0: + token.type_ = tokenEndOfStream; + break; + default: + ok = false; + break; + } + if (!ok) + token.type_ = tokenError; + token.end_ = current_; + return true; +} + +void OurReader::skipSpaces() { + while (current_ != end_) { + Char c = *current_; + if (c == ' ' || c == '\t' || c == '\r' || c == '\n') + ++current_; + else + break; + } +} + +bool OurReader::match(Location pattern, int patternLength) { + if (end_ - current_ < patternLength) + return false; + int index = patternLength; + while (index--) + if (current_[index] != pattern[index]) + return false; + current_ += patternLength; + return true; +} + +bool OurReader::readComment() { + Location commentBegin = current_ - 1; + Char c = getNextChar(); + bool successful = false; + if (c == '*') + successful = readCStyleComment(); + else if (c == '/') + successful = readCppStyleComment(); + if (!successful) + return false; + + if (collectComments_) { + CommentPlacement placement = commentBefore; + if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) { + if (c != '*' || !containsNewLine(commentBegin, current_)) + placement = commentAfterOnSameLine; + } + + addComment(commentBegin, current_, placement); + } + return true; +} + +void +OurReader::addComment(Location begin, Location end, CommentPlacement placement) { + assert(collectComments_); + const std::string& normalized = normalizeEOL(begin, end); + if (placement == commentAfterOnSameLine) { + assert(lastValue_ != 0); + lastValue_->setComment(normalized, placement); + } else { + commentsBefore_ += normalized; + } +} + +bool OurReader::readCStyleComment() { + while (current_ != end_) { + Char c = getNextChar(); + if (c == '*' && *current_ == '/') + break; + } + return getNextChar() == '/'; +} + +bool OurReader::readCppStyleComment() { + while (current_ != end_) { + Char c = getNextChar(); + if (c == '\n') + break; + if (c == '\r') { + // Consume DOS EOL. It will be normalized in addComment. + if (current_ != end_ && *current_ == '\n') + getNextChar(); + // Break on Moc OS 9 EOL. + break; + } + } + return true; +} + +void OurReader::readNumber() { + const char *p = current_; + char c = '0'; // stopgap for already consumed character + // integral part + while (c >= '0' && c <= '9') + c = (current_ = p) < end_ ? *p++ : 0; + // fractional part + if (c == '.') { + c = (current_ = p) < end_ ? *p++ : 0; + while (c >= '0' && c <= '9') + c = (current_ = p) < end_ ? *p++ : 0; + } + // exponential part + if (c == 'e' || c == 'E') { + c = (current_ = p) < end_ ? *p++ : 0; + if (c == '+' || c == '-') + c = (current_ = p) < end_ ? *p++ : 0; + while (c >= '0' && c <= '9') + c = (current_ = p) < end_ ? *p++ : 0; + } +} +bool OurReader::readString() { + Char c = 0; + while (current_ != end_) { + c = getNextChar(); + if (c == '\\') + getNextChar(); + else if (c == '"') + break; + } + return c == '"'; +} + + +bool OurReader::readStringSingleQuote() { + Char c = 0; + while (current_ != end_) { + c = getNextChar(); + if (c == '\\') + getNextChar(); + else if (c == '\'') + break; + } + return c == '\''; +} + +bool OurReader::readObject(Token& tokenStart) { + Token tokenName; + std::string name; + Value init(objectValue); + currentValue().swapPayload(init); + currentValue().setOffsetStart(tokenStart.start_ - begin_); + while (readToken(tokenName)) { + bool initialTokenOk = true; + while (tokenName.type_ == tokenComment && initialTokenOk) + initialTokenOk = readToken(tokenName); + if (!initialTokenOk) + break; + if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object + return true; + name = ""; + if (tokenName.type_ == tokenString) { + if (!decodeString(tokenName, name)) + return recoverFromError(tokenObjectEnd); + } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) { + Value numberName; + if (!decodeNumber(tokenName, numberName)) + return recoverFromError(tokenObjectEnd); + name = numberName.asString(); + } else { + break; + } + + Token colon; + if (!readToken(colon) || colon.type_ != tokenMemberSeparator) { + return addErrorAndRecover( + "Missing ':' after object member name", colon, tokenObjectEnd); + } + if (name.length() >= (1U<<30)) throwRuntimeError("keylength >= 2^30"); + if (features_.rejectDupKeys_ && currentValue().isMember(name)) { + std::string msg = "Duplicate key: '" + name + "'"; + return addErrorAndRecover( + msg, tokenName, tokenObjectEnd); + } + Value& value = currentValue()[name]; + nodes_.push(&value); + bool ok = readValue(); + nodes_.pop(); + if (!ok) // error already set + return recoverFromError(tokenObjectEnd); + + Token comma; + if (!readToken(comma) || + (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator && + comma.type_ != tokenComment)) { + return addErrorAndRecover( + "Missing ',' or '}' in object declaration", comma, tokenObjectEnd); + } + bool finalizeTokenOk = true; + while (comma.type_ == tokenComment && finalizeTokenOk) + finalizeTokenOk = readToken(comma); + if (comma.type_ == tokenObjectEnd) + return true; + } + return addErrorAndRecover( + "Missing '}' or object member name", tokenName, tokenObjectEnd); +} + +bool OurReader::readArray(Token& tokenStart) { + Value init(arrayValue); + currentValue().swapPayload(init); + currentValue().setOffsetStart(tokenStart.start_ - begin_); + skipSpaces(); + if (*current_ == ']') // empty array + { + Token endArray; + readToken(endArray); + return true; + } + int index = 0; + for (;;) { + Value& value = currentValue()[index++]; + nodes_.push(&value); + bool ok = readValue(); + nodes_.pop(); + if (!ok) // error already set + return recoverFromError(tokenArrayEnd); + + Token token; + // Accept Comment after last item in the array. + ok = readToken(token); + while (token.type_ == tokenComment && ok) { + ok = readToken(token); + } + bool badTokenType = + (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd); + if (!ok || badTokenType) { + return addErrorAndRecover( + "Missing ',' or ']' in array declaration", token, tokenArrayEnd); + } + if (token.type_ == tokenArrayEnd) + break; + } + return true; +} + +bool OurReader::decodeNumber(Token& token) { + Value decoded; + if (!decodeNumber(token, decoded)) + return false; + currentValue().swapPayload(decoded); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + return true; +} + +bool OurReader::decodeNumber(Token& token, Value& decoded) { + // Attempts to parse the number as an integer. If the number is + // larger than the maximum supported value of an integer then + // we decode the number as a double. + Location current = token.start_; + bool isNegative = *current == '-'; + if (isNegative) + ++current; + // TODO: Help the compiler do the div and mod at compile time or get rid of them. + Value::LargestUInt maxIntegerValue = + isNegative ? Value::LargestUInt(-Value::minLargestInt) + : Value::maxLargestUInt; + Value::LargestUInt threshold = maxIntegerValue / 10; + Value::LargestUInt value = 0; + while (current < token.end_) { + Char c = *current++; + if (c < '0' || c > '9') + return decodeDouble(token, decoded); + Value::UInt digit(c - '0'); + if (value >= threshold) { + // We've hit or exceeded the max value divided by 10 (rounded down). If + // a) we've only just touched the limit, b) this is the last digit, and + // c) it's small enough to fit in that rounding delta, we're okay. + // Otherwise treat this number as a double to avoid overflow. + if (value > threshold || current != token.end_ || + digit > maxIntegerValue % 10) { + return decodeDouble(token, decoded); + } + } + value = value * 10 + digit; + } + if (isNegative) + decoded = -Value::LargestInt(value); + else if (value <= Value::LargestUInt(Value::maxInt)) + decoded = Value::LargestInt(value); + else + decoded = value; + return true; +} + +bool OurReader::decodeDouble(Token& token) { + Value decoded; + if (!decodeDouble(token, decoded)) + return false; + currentValue().swapPayload(decoded); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + return true; +} + +bool OurReader::decodeDouble(Token& token, Value& decoded) { + double value = 0; + const int bufferSize = 32; + int count; + int length = int(token.end_ - token.start_); + + // Sanity check to avoid buffer overflow exploits. + if (length < 0) { + return addError("Unable to parse token length", token); + } + + // Avoid using a string constant for the format control string given to + // sscanf, as this can cause hard to debug crashes on OS X. See here for more + // info: + // + // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html + char format[] = "%lf"; + + if (length <= bufferSize) { + Char buffer[bufferSize + 1]; + memcpy(buffer, token.start_, length); + buffer[length] = 0; + count = sscanf(buffer, format, &value); + } else { + std::string buffer(token.start_, token.end_); + count = sscanf(buffer.c_str(), format, &value); + } + + if (count != 1) + return addError("'" + std::string(token.start_, token.end_) + + "' is not a number.", + token); + decoded = value; + return true; +} + +bool OurReader::decodeString(Token& token) { + std::string decoded_string; + if (!decodeString(token, decoded_string)) + return false; + Value decoded(decoded_string); + currentValue().swapPayload(decoded); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + return true; +} + +bool OurReader::decodeString(Token& token, std::string& decoded) { + decoded.reserve(token.end_ - token.start_ - 2); + Location current = token.start_ + 1; // skip '"' + Location end = token.end_ - 1; // do not include '"' + while (current != end) { + Char c = *current++; + if (c == '"') + break; + else if (c == '\\') { + if (current == end) + return addError("Empty escape sequence in string", token, current); + Char escape = *current++; + switch (escape) { + case '"': + decoded += '"'; + break; + case '/': + decoded += '/'; + break; + case '\\': + decoded += '\\'; + break; + case 'b': + decoded += '\b'; + break; + case 'f': + decoded += '\f'; + break; + case 'n': + decoded += '\n'; + break; + case 'r': + decoded += '\r'; + break; + case 't': + decoded += '\t'; + break; + case 'u': { + unsigned int unicode; + if (!decodeUnicodeCodePoint(token, current, end, unicode)) + return false; + decoded += codePointToUTF8(unicode); + } break; + default: + return addError("Bad escape sequence in string", token, current); + } + } else { + decoded += c; + } + } + return true; +} + +bool OurReader::decodeUnicodeCodePoint(Token& token, + Location& current, + Location end, + unsigned int& unicode) { + + if (!decodeUnicodeEscapeSequence(token, current, end, unicode)) + return false; + if (unicode >= 0xD800 && unicode <= 0xDBFF) { + // surrogate pairs + if (end - current < 6) + return addError( + "additional six characters expected to parse unicode surrogate pair.", + token, + current); + unsigned int surrogatePair; + if (*(current++) == '\\' && *(current++) == 'u') { + if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) { + unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF); + } else + return false; + } else + return addError("expecting another \\u token to begin the second half of " + "a unicode surrogate pair", + token, + current); + } + return true; +} + +bool OurReader::decodeUnicodeEscapeSequence(Token& token, + Location& current, + Location end, + unsigned int& unicode) { + if (end - current < 4) + return addError( + "Bad unicode escape sequence in string: four digits expected.", + token, + current); + unicode = 0; + for (int index = 0; index < 4; ++index) { + Char c = *current++; + unicode *= 16; + if (c >= '0' && c <= '9') + unicode += c - '0'; + else if (c >= 'a' && c <= 'f') + unicode += c - 'a' + 10; + else if (c >= 'A' && c <= 'F') + unicode += c - 'A' + 10; + else + return addError( + "Bad unicode escape sequence in string: hexadecimal digit expected.", + token, + current); + } + return true; +} + +bool +OurReader::addError(const std::string& message, Token& token, Location extra) { + ErrorInfo info; + info.token_ = token; + info.message_ = message; + info.extra_ = extra; + errors_.push_back(info); + return false; +} + +bool OurReader::recoverFromError(TokenType skipUntilToken) { + int errorCount = int(errors_.size()); + Token skip; + for (;;) { + if (!readToken(skip)) + errors_.resize(errorCount); // discard errors caused by recovery + if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream) + break; + } + errors_.resize(errorCount); + return false; +} + +bool OurReader::addErrorAndRecover(const std::string& message, + Token& token, + TokenType skipUntilToken) { + addError(message, token); + return recoverFromError(skipUntilToken); +} + +Value& OurReader::currentValue() { return *(nodes_.top()); } + +OurReader::Char OurReader::getNextChar() { + if (current_ == end_) + return 0; + return *current_++; +} + +void OurReader::getLocationLineAndColumn(Location location, + int& line, + int& column) const { + Location current = begin_; + Location lastLineStart = current; + line = 0; + while (current < location && current != end_) { + Char c = *current++; + if (c == '\r') { + if (*current == '\n') + ++current; + lastLineStart = current; + ++line; + } else if (c == '\n') { + lastLineStart = current; + ++line; + } + } + // column & line start at 1 + column = int(location - lastLineStart) + 1; + ++line; +} + +std::string OurReader::getLocationLineAndColumn(Location location) const { + int line, column; + getLocationLineAndColumn(location, line, column); + char buffer[18 + 16 + 16 + 1]; +#if defined(_MSC_VER) && defined(__STDC_SECURE_LIB__) +#if defined(WINCE) + _snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column); +#else + sprintf_s(buffer, sizeof(buffer), "Line %d, Column %d", line, column); +#endif +#else + snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column); +#endif + return buffer; +} + +std::string OurReader::getFormattedErrorMessages() const { + std::string formattedMessage; + for (Errors::const_iterator itError = errors_.begin(); + itError != errors_.end(); + ++itError) { + const ErrorInfo& error = *itError; + formattedMessage += + "* " + getLocationLineAndColumn(error.token_.start_) + "\n"; + formattedMessage += " " + error.message_ + "\n"; + if (error.extra_) + formattedMessage += + "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n"; + } + return formattedMessage; +} + +std::vector OurReader::getStructuredErrors() const { + std::vector allErrors; + for (Errors::const_iterator itError = errors_.begin(); + itError != errors_.end(); + ++itError) { + const ErrorInfo& error = *itError; + OurReader::StructuredError structured; + structured.offset_start = error.token_.start_ - begin_; + structured.offset_limit = error.token_.end_ - begin_; + structured.message = error.message_; + allErrors.push_back(structured); + } + return allErrors; +} + +bool OurReader::pushError(const Value& value, const std::string& message) { + size_t length = end_ - begin_; + if(value.getOffsetStart() > length + || value.getOffsetLimit() > length) + return false; + Token token; + token.type_ = tokenError; + token.start_ = begin_ + value.getOffsetStart(); + token.end_ = end_ + value.getOffsetLimit(); + ErrorInfo info; + info.token_ = token; + info.message_ = message; + info.extra_ = 0; + errors_.push_back(info); + return true; +} + +bool OurReader::pushError(const Value& value, const std::string& message, const Value& extra) { + size_t length = end_ - begin_; + if(value.getOffsetStart() > length + || value.getOffsetLimit() > length + || extra.getOffsetLimit() > length) + return false; + Token token; + token.type_ = tokenError; + token.start_ = begin_ + value.getOffsetStart(); + token.end_ = begin_ + value.getOffsetLimit(); + ErrorInfo info; + info.token_ = token; + info.message_ = message; + info.extra_ = begin_ + extra.getOffsetStart(); + errors_.push_back(info); + return true; +} + +bool OurReader::good() const { + return !errors_.size(); +} + + +class OurCharReader : public CharReader { + bool const collectComments_; + OurReader reader_; +public: + OurCharReader( + bool collectComments, + OurFeatures const& features) + : collectComments_(collectComments) + , reader_(features) + {} + virtual bool parse( + char const* beginDoc, char const* endDoc, + Value* root, std::string* errs) { + bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_); + if (errs) { + *errs = reader_.getFormattedErrorMessages(); + } + return ok; + } +}; + +CharReaderBuilder::CharReaderBuilder() +{ + setDefaults(&settings_); +} +CharReaderBuilder::~CharReaderBuilder() +{} +CharReader* CharReaderBuilder::newCharReader() const +{ + bool collectComments = settings_["collectComments"].asBool(); + OurFeatures features = OurFeatures::all(); + features.allowComments_ = settings_["allowComments"].asBool(); + features.strictRoot_ = settings_["strictRoot"].asBool(); + features.allowDroppedNullPlaceholders_ = settings_["allowDroppedNullPlaceholders"].asBool(); + features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool(); + features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool(); + features.stackLimit_ = settings_["stackLimit"].asInt(); + features.failIfExtra_ = settings_["failIfExtra"].asBool(); + features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool(); + return new OurCharReader(collectComments, features); +} +static void getValidReaderKeys(std::set* valid_keys) +{ + valid_keys->clear(); + valid_keys->insert("collectComments"); + valid_keys->insert("allowComments"); + valid_keys->insert("strictRoot"); + valid_keys->insert("allowDroppedNullPlaceholders"); + valid_keys->insert("allowNumericKeys"); + valid_keys->insert("allowSingleQuotes"); + valid_keys->insert("stackLimit"); + valid_keys->insert("failIfExtra"); + valid_keys->insert("rejectDupKeys"); +} +bool CharReaderBuilder::validate(Json::Value* invalid) const +{ + Json::Value my_invalid; + if (!invalid) invalid = &my_invalid; // so we do not need to test for NULL + Json::Value& inv = *invalid; + std::set valid_keys; + getValidReaderKeys(&valid_keys); + Value::Members keys = settings_.getMemberNames(); + size_t n = keys.size(); + for (size_t i = 0; i < n; ++i) { + std::string const& key = keys[i]; + if (valid_keys.find(key) == valid_keys.end()) { + inv[key] = settings_[key]; + } + } + return 0u == inv.size(); +} +Value& CharReaderBuilder::operator[](std::string key) +{ + return settings_[key]; +} +// static +void CharReaderBuilder::strictMode(Json::Value* settings) +{ +//! [CharReaderBuilderStrictMode] + (*settings)["allowComments"] = false; + (*settings)["strictRoot"] = true; + (*settings)["allowDroppedNullPlaceholders"] = false; + (*settings)["allowNumericKeys"] = false; + (*settings)["allowSingleQuotes"] = false; + (*settings)["failIfExtra"] = true; + (*settings)["rejectDupKeys"] = true; +//! [CharReaderBuilderStrictMode] +} +// static +void CharReaderBuilder::setDefaults(Json::Value* settings) +{ +//! [CharReaderBuilderDefaults] + (*settings)["collectComments"] = true; + (*settings)["allowComments"] = true; + (*settings)["strictRoot"] = false; + (*settings)["allowDroppedNullPlaceholders"] = false; + (*settings)["allowNumericKeys"] = false; + (*settings)["allowSingleQuotes"] = false; + (*settings)["stackLimit"] = 1000; + (*settings)["failIfExtra"] = false; + (*settings)["rejectDupKeys"] = false; +//! [CharReaderBuilderDefaults] +} + +////////////////////////////////// +// global functions + +bool parseFromStream( + CharReader::Factory const& fact, std::istream& sin, + Value* root, std::string* errs) +{ + std::ostringstream ssin; + ssin << sin.rdbuf(); + std::string doc = ssin.str(); + char const* begin = doc.data(); + char const* end = begin + doc.size(); + // Note that we do not actually need a null-terminator. + CharReaderPtr const reader(fact.newCharReader()); + return reader->parse(begin, end, root, errs); +} + +std::istream& operator>>(std::istream& sin, Value& root) { + CharReaderBuilder b; + std::string errs; + bool ok = parseFromStream(b, sin, &root, &errs); + if (!ok) { + fprintf(stderr, + "Error from reader: %s", + errs.c_str()); + + throwRuntimeError("reader error"); + } + return sin; +} + +} // namespace Json + +// ////////////////////////////////////////////////////////////////////// +// End of content of file: src/lib_json/json_reader.cpp +// ////////////////////////////////////////////////////////////////////// + + + + + + +// ////////////////////////////////////////////////////////////////////// +// Beginning of content of file: src/lib_json/json_valueiterator.inl +// ////////////////////////////////////////////////////////////////////// + +// Copyright 2007-2010 Baptiste Lepilleur +// Distributed under MIT license, or public domain if desired and +// recognized in your jurisdiction. +// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE + +// included by json_value.cpp + +namespace Json { + +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// class ValueIteratorBase +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// + +ValueIteratorBase::ValueIteratorBase() + : current_(), isNull_(true) { +} + +ValueIteratorBase::ValueIteratorBase( + const Value::ObjectValues::iterator& current) + : current_(current), isNull_(false) {} + +Value& ValueIteratorBase::deref() const { + return current_->second; +} + +void ValueIteratorBase::increment() { + ++current_; +} + +void ValueIteratorBase::decrement() { + --current_; +} + +ValueIteratorBase::difference_type +ValueIteratorBase::computeDistance(const SelfType& other) const { +#ifdef JSON_USE_CPPTL_SMALLMAP + return other.current_ - current_; +#else + // Iterator for null value are initialized using the default + // constructor, which initialize current_ to the default + // std::map::iterator. As begin() and end() are two instance + // of the default std::map::iterator, they can not be compared. + // To allow this, we handle this comparison specifically. + if (isNull_ && other.isNull_) { + return 0; + } + + // Usage of std::distance is not portable (does not compile with Sun Studio 12 + // RogueWave STL, + // which is the one used by default). + // Using a portable hand-made version for non random iterator instead: + // return difference_type( std::distance( current_, other.current_ ) ); + difference_type myDistance = 0; + for (Value::ObjectValues::iterator it = current_; it != other.current_; + ++it) { + ++myDistance; + } + return myDistance; +#endif +} + +bool ValueIteratorBase::isEqual(const SelfType& other) const { + if (isNull_) { + return other.isNull_; + } + return current_ == other.current_; +} + +void ValueIteratorBase::copy(const SelfType& other) { + current_ = other.current_; + isNull_ = other.isNull_; +} + +Value ValueIteratorBase::key() const { + const Value::CZString czstring = (*current_).first; + if (czstring.data()) { + if (czstring.isStaticString()) + return Value(StaticString(czstring.data())); + return Value(czstring.data(), czstring.data() + czstring.length()); + } + return Value(czstring.index()); +} + +UInt ValueIteratorBase::index() const { + const Value::CZString czstring = (*current_).first; + if (!czstring.data()) + return czstring.index(); + return Value::UInt(-1); +} + +std::string ValueIteratorBase::name() const { + char const* key; + char const* end; + key = memberName(&end); + if (!key) return std::string(); + return std::string(key, end); +} + +char const* ValueIteratorBase::memberName() const { + const char* name = (*current_).first.data(); + return name ? name : ""; +} + +char const* ValueIteratorBase::memberName(char const** end) const { + const char* name = (*current_).first.data(); + if (!name) { + *end = NULL; + return NULL; + } + *end = name + (*current_).first.length(); + return name; +} + +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// class ValueConstIterator +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// + +ValueConstIterator::ValueConstIterator() {} + +ValueConstIterator::ValueConstIterator( + const Value::ObjectValues::iterator& current) + : ValueIteratorBase(current) {} + +ValueConstIterator& ValueConstIterator:: +operator=(const ValueIteratorBase& other) { + copy(other); + return *this; +} + +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// class ValueIterator +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// + +ValueIterator::ValueIterator() {} + +ValueIterator::ValueIterator(const Value::ObjectValues::iterator& current) + : ValueIteratorBase(current) {} + +ValueIterator::ValueIterator(const ValueConstIterator& other) + : ValueIteratorBase(other) {} + +ValueIterator::ValueIterator(const ValueIterator& other) + : ValueIteratorBase(other) {} + +ValueIterator& ValueIterator::operator=(const SelfType& other) { + copy(other); + return *this; +} + +} // namespace Json + +// ////////////////////////////////////////////////////////////////////// +// End of content of file: src/lib_json/json_valueiterator.inl +// ////////////////////////////////////////////////////////////////////// + + + + + + +// ////////////////////////////////////////////////////////////////////// +// Beginning of content of file: src/lib_json/json_value.cpp +// ////////////////////////////////////////////////////////////////////// + +// Copyright 2011 Baptiste Lepilleur +// Distributed under MIT license, or public domain if desired and +// recognized in your jurisdiction. +// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE + +#if !defined(JSON_IS_AMALGAMATION) +#include +#include +#include +#endif // if !defined(JSON_IS_AMALGAMATION) +#include +#include +#include +#include +#include +#ifdef JSON_USE_CPPTL +#include +#endif +#include // size_t +#include // min() + +#define JSON_ASSERT_UNREACHABLE assert(false) + +namespace Json { + +// This is a walkaround to avoid the static initialization of Value::null. +// kNull must be word-aligned to avoid crashing on ARM. We use an alignment of +// 8 (instead of 4) as a bit of future-proofing. +#if defined(__ARMEL__) +#define ALIGNAS(byte_alignment) __attribute__((aligned(byte_alignment))) +#else +#define ALIGNAS(byte_alignment) +#endif +static const unsigned char ALIGNAS(8) kNull[sizeof(Value)] = { 0 }; +const unsigned char& kNullRef = kNull[0]; +const Value& Value::null = reinterpret_cast(kNullRef); +const Value& Value::nullRef = null; + +const Int Value::minInt = Int(~(UInt(-1) / 2)); +const Int Value::maxInt = Int(UInt(-1) / 2); +const UInt Value::maxUInt = UInt(-1); +#if defined(JSON_HAS_INT64) +const Int64 Value::minInt64 = Int64(~(UInt64(-1) / 2)); +const Int64 Value::maxInt64 = Int64(UInt64(-1) / 2); +const UInt64 Value::maxUInt64 = UInt64(-1); +// The constant is hard-coded because some compiler have trouble +// converting Value::maxUInt64 to a double correctly (AIX/xlC). +// Assumes that UInt64 is a 64 bits integer. +static const double maxUInt64AsDouble = 18446744073709551615.0; +#endif // defined(JSON_HAS_INT64) +const LargestInt Value::minLargestInt = LargestInt(~(LargestUInt(-1) / 2)); +const LargestInt Value::maxLargestInt = LargestInt(LargestUInt(-1) / 2); +const LargestUInt Value::maxLargestUInt = LargestUInt(-1); + +#if !defined(JSON_USE_INT64_DOUBLE_CONVERSION) +template +static inline bool InRange(double d, T min, U max) { + return d >= min && d <= max; +} +#else // if !defined(JSON_USE_INT64_DOUBLE_CONVERSION) +static inline double integerToDouble(Json::UInt64 value) { + return static_cast(Int64(value / 2)) * 2.0 + Int64(value & 1); +} + +template static inline double integerToDouble(T value) { + return static_cast(value); +} + +template +static inline bool InRange(double d, T min, U max) { + return d >= integerToDouble(min) && d <= integerToDouble(max); +} +#endif // if !defined(JSON_USE_INT64_DOUBLE_CONVERSION) + +/** Duplicates the specified string value. + * @param value Pointer to the string to duplicate. Must be zero-terminated if + * length is "unknown". + * @param length Length of the value. if equals to unknown, then it will be + * computed using strlen(value). + * @return Pointer on the duplicate instance of string. + */ +static inline char* duplicateStringValue(const char* value, + size_t length) { + // Avoid an integer overflow in the call to malloc below by limiting length + // to a sane value. + if (length >= (size_t)Value::maxInt) + length = Value::maxInt - 1; + + char* newString = static_cast(malloc(length + 1)); + if (newString == NULL) { + throwRuntimeError( + "in Json::Value::duplicateStringValue(): " + "Failed to allocate string value buffer"); + } + memcpy(newString, value, length); + newString[length] = 0; + return newString; +} + +/* Record the length as a prefix. + */ +static inline char* duplicateAndPrefixStringValue( + const char* value, + unsigned int length) +{ + // Avoid an integer overflow in the call to malloc below by limiting length + // to a sane value. + JSON_ASSERT_MESSAGE(length <= (unsigned)Value::maxInt - sizeof(unsigned) - 1U, + "in Json::Value::duplicateAndPrefixStringValue(): " + "length too big for prefixing"); + unsigned actualLength = length + sizeof(unsigned) + 1U; + char* newString = static_cast(malloc(actualLength)); + if (newString == 0) { + throwRuntimeError( + "in Json::Value::duplicateAndPrefixStringValue(): " + "Failed to allocate string value buffer"); + } + *reinterpret_cast(newString) = length; + memcpy(newString + sizeof(unsigned), value, length); + newString[actualLength - 1U] = 0; // to avoid buffer over-run accidents by users later + return newString; +} +inline static void decodePrefixedString( + bool isPrefixed, char const* prefixed, + unsigned* length, char const** value) +{ + if (!isPrefixed) { + *length = strlen(prefixed); + *value = prefixed; + } else { + *length = *reinterpret_cast(prefixed); + *value = prefixed + sizeof(unsigned); + } +} +/** Free the string duplicated by duplicateStringValue()/duplicateAndPrefixStringValue(). + */ +static inline void releaseStringValue(char* value) { free(value); } + +} // namespace Json + +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ValueInternals... +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +#if !defined(JSON_IS_AMALGAMATION) + +#include "json_valueiterator.inl" +#endif // if !defined(JSON_IS_AMALGAMATION) + +namespace Json { + +class JSON_API Exception : public std::exception { +public: + Exception(std::string const& msg); + virtual ~Exception() throw(); + virtual char const* what() const throw(); +protected: + std::string const msg_; +}; +class JSON_API RuntimeError : public Exception { +public: + RuntimeError(std::string const& msg); +}; +class JSON_API LogicError : public Exception { +public: + LogicError(std::string const& msg); +}; + +Exception::Exception(std::string const& msg) + : msg_(msg) +{} +Exception::~Exception() throw() +{} +char const* Exception::what() const throw() +{ + return msg_.c_str(); +} +RuntimeError::RuntimeError(std::string const& msg) + : Exception(msg) +{} +LogicError::LogicError(std::string const& msg) + : Exception(msg) +{} +void throwRuntimeError(std::string const& msg) +{ + throw RuntimeError(msg); +} +void throwLogicError(std::string const& msg) +{ + throw LogicError(msg); +} + +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// class Value::CommentInfo +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// + +Value::CommentInfo::CommentInfo() : comment_(0) {} + +Value::CommentInfo::~CommentInfo() { + if (comment_) + releaseStringValue(comment_); +} + +void Value::CommentInfo::setComment(const char* text, size_t len) { + if (comment_) { + releaseStringValue(comment_); + comment_ = 0; + } + JSON_ASSERT(text != 0); + JSON_ASSERT_MESSAGE( + text[0] == '\0' || text[0] == '/', + "in Json::Value::setComment(): Comments must start with /"); + // It seems that /**/ style comments are acceptable as well. + comment_ = duplicateStringValue(text, len); +} + +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// class Value::CZString +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// + +// Notes: policy_ indicates if the string was allocated when +// a string is stored. + +Value::CZString::CZString(ArrayIndex index) : cstr_(0), index_(index) {} + +Value::CZString::CZString(char const* str, unsigned length, DuplicationPolicy allocate) + : cstr_(str) +{ + // allocate != duplicate + storage_.policy_ = allocate; + storage_.length_ = length; +} + +Value::CZString::CZString(const CZString& other) + : cstr_(other.storage_.policy_ != noDuplication && other.cstr_ != 0 + ? duplicateStringValue(other.cstr_, other.storage_.length_) + : other.cstr_) +{ + storage_.policy_ = (other.cstr_ + ? (other.storage_.policy_ == noDuplication + ? noDuplication : duplicate) + : other.storage_.policy_); + storage_.length_ = other.storage_.length_; +} + +Value::CZString::~CZString() { + if (cstr_ && storage_.policy_ == duplicate) + releaseStringValue(const_cast(cstr_)); +} + +void Value::CZString::swap(CZString& other) { + std::swap(cstr_, other.cstr_); + std::swap(index_, other.index_); +} + +Value::CZString& Value::CZString::operator=(CZString other) { + swap(other); + return *this; +} + +bool Value::CZString::operator<(const CZString& other) const { + if (!cstr_) return index_ < other.index_; + //return strcmp(cstr_, other.cstr_) < 0; + // Assume both are strings. + unsigned this_len = this->storage_.length_; + unsigned other_len = other.storage_.length_; + unsigned min_len = std::min(this_len, other_len); + int comp = memcmp(this->cstr_, other.cstr_, min_len); + if (comp < 0) return true; + if (comp > 0) return false; + return (this_len < other_len); +} + +bool Value::CZString::operator==(const CZString& other) const { + if (!cstr_) return index_ == other.index_; + //return strcmp(cstr_, other.cstr_) == 0; + // Assume both are strings. + unsigned this_len = this->storage_.length_; + unsigned other_len = other.storage_.length_; + if (this_len != other_len) return false; + int comp = memcmp(this->cstr_, other.cstr_, this_len); + return comp == 0; +} + +ArrayIndex Value::CZString::index() const { return index_; } + +//const char* Value::CZString::c_str() const { return cstr_; } +const char* Value::CZString::data() const { return cstr_; } +unsigned Value::CZString::length() const { return storage_.length_; } +bool Value::CZString::isStaticString() const { return storage_.policy_ == noDuplication; } + +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// class Value::Value +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// + +/*! \internal Default constructor initialization must be equivalent to: + * memset( this, 0, sizeof(Value) ) + * This optimization is used in ValueInternalMap fast allocator. + */ +Value::Value(ValueType type) { + initBasic(type); + switch (type) { + case nullValue: + break; + case intValue: + case uintValue: + value_.int_ = 0; + break; + case realValue: + value_.real_ = 0.0; + break; + case stringValue: + value_.string_ = 0; + break; + case arrayValue: + case objectValue: + value_.map_ = new ObjectValues(); + break; + case booleanValue: + value_.bool_ = false; + break; + default: + JSON_ASSERT_UNREACHABLE; + } +} + +Value::Value(Int value) { + initBasic(intValue); + value_.int_ = value; +} + +Value::Value(UInt value) { + initBasic(uintValue); + value_.uint_ = value; +} +#if defined(JSON_HAS_INT64) +Value::Value(Int64 value) { + initBasic(intValue); + value_.int_ = value; +} +Value::Value(UInt64 value) { + initBasic(uintValue); + value_.uint_ = value; +} +#endif // defined(JSON_HAS_INT64) + +Value::Value(double value) { + initBasic(realValue); + value_.real_ = value; +} + +Value::Value(const char* value) { + initBasic(stringValue, true); + value_.string_ = duplicateAndPrefixStringValue(value, static_cast(strlen(value))); +} + +Value::Value(const char* beginValue, const char* endValue) { + initBasic(stringValue, true); + value_.string_ = + duplicateAndPrefixStringValue(beginValue, static_cast(endValue - beginValue)); +} + +Value::Value(const std::string& value) { + initBasic(stringValue, true); + value_.string_ = + duplicateAndPrefixStringValue(value.data(), static_cast(value.length())); +} + +Value::Value(const StaticString& value) { + initBasic(stringValue); + value_.string_ = const_cast(value.c_str()); +} + +#ifdef JSON_USE_CPPTL +Value::Value(const CppTL::ConstString& value) { + initBasic(stringValue, true); + value_.string_ = duplicateAndPrefixStringValue(value, static_cast(value.length())); +} +#endif + +Value::Value(bool value) { + initBasic(booleanValue); + value_.bool_ = value; +} + +Value::Value(Value const& other) + : type_(other.type_), allocated_(false) + , + comments_(0), start_(other.start_), limit_(other.limit_) +{ + switch (type_) { + case nullValue: + case intValue: + case uintValue: + case realValue: + case booleanValue: + value_ = other.value_; + break; + case stringValue: + if (other.value_.string_ && other.allocated_) { + unsigned len; + char const* str; + decodePrefixedString(other.allocated_, other.value_.string_, + &len, &str); + value_.string_ = duplicateAndPrefixStringValue(str, len); + allocated_ = true; + } else { + value_.string_ = other.value_.string_; + allocated_ = false; + } + break; + case arrayValue: + case objectValue: + value_.map_ = new ObjectValues(*other.value_.map_); + break; + default: + JSON_ASSERT_UNREACHABLE; + } + if (other.comments_) { + comments_ = new CommentInfo[numberOfCommentPlacement]; + for (int comment = 0; comment < numberOfCommentPlacement; ++comment) { + const CommentInfo& otherComment = other.comments_[comment]; + if (otherComment.comment_) + comments_[comment].setComment( + otherComment.comment_, strlen(otherComment.comment_)); + } + } +} + +Value::~Value() { + switch (type_) { + case nullValue: + case intValue: + case uintValue: + case realValue: + case booleanValue: + break; + case stringValue: + if (allocated_) + releaseStringValue(value_.string_); + break; + case arrayValue: + case objectValue: + delete value_.map_; + break; + default: + JSON_ASSERT_UNREACHABLE; + } + + if (comments_) + delete[] comments_; +} + +Value& Value::operator=(Value other) { + swap(other); + return *this; +} + +void Value::swapPayload(Value& other) { + ValueType temp = type_; + type_ = other.type_; + other.type_ = temp; + std::swap(value_, other.value_); + int temp2 = allocated_; + allocated_ = other.allocated_; + other.allocated_ = temp2; +} + +void Value::swap(Value& other) { + swapPayload(other); + std::swap(comments_, other.comments_); + std::swap(start_, other.start_); + std::swap(limit_, other.limit_); +} + +ValueType Value::type() const { return type_; } + +int Value::compare(const Value& other) const { + if (*this < other) + return -1; + if (*this > other) + return 1; + return 0; +} + +bool Value::operator<(const Value& other) const { + int typeDelta = type_ - other.type_; + if (typeDelta) + return typeDelta < 0 ? true : false; + switch (type_) { + case nullValue: + return false; + case intValue: + return value_.int_ < other.value_.int_; + case uintValue: + return value_.uint_ < other.value_.uint_; + case realValue: + return value_.real_ < other.value_.real_; + case booleanValue: + return value_.bool_ < other.value_.bool_; + case stringValue: + { + if ((value_.string_ == 0) || (other.value_.string_ == 0)) { + if (other.value_.string_) return true; + else return false; + } + unsigned this_len; + unsigned other_len; + char const* this_str; + char const* other_str; + decodePrefixedString(this->allocated_, this->value_.string_, &this_len, &this_str); + decodePrefixedString(other.allocated_, other.value_.string_, &other_len, &other_str); + unsigned min_len = std::min(this_len, other_len); + int comp = memcmp(this_str, other_str, min_len); + if (comp < 0) return true; + if (comp > 0) return false; + return (this_len < other_len); + } + case arrayValue: + case objectValue: { + int delta = int(value_.map_->size() - other.value_.map_->size()); + if (delta) + return delta < 0; + return (*value_.map_) < (*other.value_.map_); + } + default: + JSON_ASSERT_UNREACHABLE; + } + return false; // unreachable +} + +bool Value::operator<=(const Value& other) const { return !(other < *this); } + +bool Value::operator>=(const Value& other) const { return !(*this < other); } + +bool Value::operator>(const Value& other) const { return other < *this; } + +bool Value::operator==(const Value& other) const { + // if ( type_ != other.type_ ) + // GCC 2.95.3 says: + // attempt to take address of bit-field structure member `Json::Value::type_' + // Beats me, but a temp solves the problem. + int temp = other.type_; + if (type_ != temp) + return false; + switch (type_) { + case nullValue: + return true; + case intValue: + return value_.int_ == other.value_.int_; + case uintValue: + return value_.uint_ == other.value_.uint_; + case realValue: + return value_.real_ == other.value_.real_; + case booleanValue: + return value_.bool_ == other.value_.bool_; + case stringValue: + { + if ((value_.string_ == 0) || (other.value_.string_ == 0)) { + return (value_.string_ == other.value_.string_); + } + unsigned this_len; + unsigned other_len; + char const* this_str; + char const* other_str; + decodePrefixedString(this->allocated_, this->value_.string_, &this_len, &this_str); + decodePrefixedString(other.allocated_, other.value_.string_, &other_len, &other_str); + if (this_len != other_len) return false; + int comp = memcmp(this_str, other_str, this_len); + return comp == 0; + } + case arrayValue: + case objectValue: + return value_.map_->size() == other.value_.map_->size() && + (*value_.map_) == (*other.value_.map_); + default: + JSON_ASSERT_UNREACHABLE; + } + return false; // unreachable +} + +bool Value::operator!=(const Value& other) const { return !(*this == other); } + +const char* Value::asCString() const { + JSON_ASSERT_MESSAGE(type_ == stringValue, + "in Json::Value::asCString(): requires stringValue"); + if (value_.string_ == 0) return 0; + unsigned this_len; + char const* this_str; + decodePrefixedString(this->allocated_, this->value_.string_, &this_len, &this_str); + return this_str; +} + +bool Value::getString(char const** str, char const** end) const { + if (type_ != stringValue) return false; + if (value_.string_ == 0) return false; + unsigned length; + decodePrefixedString(this->allocated_, this->value_.string_, &length, str); + *end = *str + length; + return true; +} + +std::string Value::asString() const { + switch (type_) { + case nullValue: + return ""; + case stringValue: + { + if (value_.string_ == 0) return ""; + unsigned this_len; + char const* this_str; + decodePrefixedString(this->allocated_, this->value_.string_, &this_len, &this_str); + return std::string(this_str, this_len); + } + case booleanValue: + return value_.bool_ ? "true" : "false"; + case intValue: + return valueToString(value_.int_); + case uintValue: + return valueToString(value_.uint_); + case realValue: + return valueToString(value_.real_); + default: + JSON_FAIL_MESSAGE("Type is not convertible to string"); + } +} + +#ifdef JSON_USE_CPPTL +CppTL::ConstString Value::asConstString() const { + unsigned len; + char const* str; + decodePrefixedString(allocated_, value_.string_, + &len, &str); + return CppTL::ConstString(str, len); +} +#endif + +Value::Int Value::asInt() const { + switch (type_) { + case intValue: + JSON_ASSERT_MESSAGE(isInt(), "LargestInt out of Int range"); + return Int(value_.int_); + case uintValue: + JSON_ASSERT_MESSAGE(isInt(), "LargestUInt out of Int range"); + return Int(value_.uint_); + case realValue: + JSON_ASSERT_MESSAGE(InRange(value_.real_, minInt, maxInt), + "double out of Int range"); + return Int(value_.real_); + case nullValue: + return 0; + case booleanValue: + return value_.bool_ ? 1 : 0; + default: + break; + } + JSON_FAIL_MESSAGE("Value is not convertible to Int."); +} + +Value::UInt Value::asUInt() const { + switch (type_) { + case intValue: + JSON_ASSERT_MESSAGE(isUInt(), "LargestInt out of UInt range"); + return UInt(value_.int_); + case uintValue: + JSON_ASSERT_MESSAGE(isUInt(), "LargestUInt out of UInt range"); + return UInt(value_.uint_); + case realValue: + JSON_ASSERT_MESSAGE(InRange(value_.real_, 0, maxUInt), + "double out of UInt range"); + return UInt(value_.real_); + case nullValue: + return 0; + case booleanValue: + return value_.bool_ ? 1 : 0; + default: + break; + } + JSON_FAIL_MESSAGE("Value is not convertible to UInt."); +} + +#if defined(JSON_HAS_INT64) + +Value::Int64 Value::asInt64() const { + switch (type_) { + case intValue: + return Int64(value_.int_); + case uintValue: + JSON_ASSERT_MESSAGE(isInt64(), "LargestUInt out of Int64 range"); + return Int64(value_.uint_); + case realValue: + JSON_ASSERT_MESSAGE(InRange(value_.real_, minInt64, maxInt64), + "double out of Int64 range"); + return Int64(value_.real_); + case nullValue: + return 0; + case booleanValue: + return value_.bool_ ? 1 : 0; + default: + break; + } + JSON_FAIL_MESSAGE("Value is not convertible to Int64."); +} + +Value::UInt64 Value::asUInt64() const { + switch (type_) { + case intValue: + JSON_ASSERT_MESSAGE(isUInt64(), "LargestInt out of UInt64 range"); + return UInt64(value_.int_); + case uintValue: + return UInt64(value_.uint_); + case realValue: + JSON_ASSERT_MESSAGE(InRange(value_.real_, 0, maxUInt64), + "double out of UInt64 range"); + return UInt64(value_.real_); + case nullValue: + return 0; + case booleanValue: + return value_.bool_ ? 1 : 0; + default: + break; + } + JSON_FAIL_MESSAGE("Value is not convertible to UInt64."); +} +#endif // if defined(JSON_HAS_INT64) + +LargestInt Value::asLargestInt() const { +#if defined(JSON_NO_INT64) + return asInt(); +#else + return asInt64(); +#endif +} + +LargestUInt Value::asLargestUInt() const { +#if defined(JSON_NO_INT64) + return asUInt(); +#else + return asUInt64(); +#endif +} + +double Value::asDouble() const { + switch (type_) { + case intValue: + return static_cast(value_.int_); + case uintValue: +#if !defined(JSON_USE_INT64_DOUBLE_CONVERSION) + return static_cast(value_.uint_); +#else // if !defined(JSON_USE_INT64_DOUBLE_CONVERSION) + return integerToDouble(value_.uint_); +#endif // if !defined(JSON_USE_INT64_DOUBLE_CONVERSION) + case realValue: + return value_.real_; + case nullValue: + return 0.0; + case booleanValue: + return value_.bool_ ? 1.0 : 0.0; + default: + break; + } + JSON_FAIL_MESSAGE("Value is not convertible to double."); +} + +float Value::asFloat() const { + switch (type_) { + case intValue: + return static_cast(value_.int_); + case uintValue: +#if !defined(JSON_USE_INT64_DOUBLE_CONVERSION) + return static_cast(value_.uint_); +#else // if !defined(JSON_USE_INT64_DOUBLE_CONVERSION) + return integerToDouble(value_.uint_); +#endif // if !defined(JSON_USE_INT64_DOUBLE_CONVERSION) + case realValue: + return static_cast(value_.real_); + case nullValue: + return 0.0; + case booleanValue: + return value_.bool_ ? 1.0f : 0.0f; + default: + break; + } + JSON_FAIL_MESSAGE("Value is not convertible to float."); +} + +bool Value::asBool() const { + switch (type_) { + case booleanValue: + return value_.bool_; + case nullValue: + return false; + case intValue: + return value_.int_ ? true : false; + case uintValue: + return value_.uint_ ? true : false; + case realValue: + return value_.real_ ? true : false; + default: + break; + } + JSON_FAIL_MESSAGE("Value is not convertible to bool."); +} + +bool Value::isConvertibleTo(ValueType other) const { + switch (other) { + case nullValue: + return (isNumeric() && asDouble() == 0.0) || + (type_ == booleanValue && value_.bool_ == false) || + (type_ == stringValue && asString() == "") || + (type_ == arrayValue && value_.map_->size() == 0) || + (type_ == objectValue && value_.map_->size() == 0) || + type_ == nullValue; + case intValue: + return isInt() || + (type_ == realValue && InRange(value_.real_, minInt, maxInt)) || + type_ == booleanValue || type_ == nullValue; + case uintValue: + return isUInt() || + (type_ == realValue && InRange(value_.real_, 0, maxUInt)) || + type_ == booleanValue || type_ == nullValue; + case realValue: + return isNumeric() || type_ == booleanValue || type_ == nullValue; + case booleanValue: + return isNumeric() || type_ == booleanValue || type_ == nullValue; + case stringValue: + return isNumeric() || type_ == booleanValue || type_ == stringValue || + type_ == nullValue; + case arrayValue: + return type_ == arrayValue || type_ == nullValue; + case objectValue: + return type_ == objectValue || type_ == nullValue; + } + JSON_ASSERT_UNREACHABLE; + return false; +} + +/// Number of values in array or object +ArrayIndex Value::size() const { + switch (type_) { + case nullValue: + case intValue: + case uintValue: + case realValue: + case booleanValue: + case stringValue: + return 0; + case arrayValue: // size of the array is highest index + 1 + if (!value_.map_->empty()) { + ObjectValues::const_iterator itLast = value_.map_->end(); + --itLast; + return (*itLast).first.index() + 1; + } + return 0; + case objectValue: + return ArrayIndex(value_.map_->size()); + } + JSON_ASSERT_UNREACHABLE; + return 0; // unreachable; +} + +bool Value::empty() const { + if (isNull() || isArray() || isObject()) + return size() == 0u; + else + return false; +} + +bool Value::operator!() const { return isNull(); } + +void Value::clear() { + JSON_ASSERT_MESSAGE(type_ == nullValue || type_ == arrayValue || + type_ == objectValue, + "in Json::Value::clear(): requires complex value"); + start_ = 0; + limit_ = 0; + switch (type_) { + case arrayValue: + case objectValue: + value_.map_->clear(); + break; + default: + break; + } +} + +void Value::resize(ArrayIndex newSize) { + JSON_ASSERT_MESSAGE(type_ == nullValue || type_ == arrayValue, + "in Json::Value::resize(): requires arrayValue"); + if (type_ == nullValue) + *this = Value(arrayValue); + ArrayIndex oldSize = size(); + if (newSize == 0) + clear(); + else if (newSize > oldSize) + (*this)[newSize - 1]; + else { + for (ArrayIndex index = newSize; index < oldSize; ++index) { + value_.map_->erase(index); + } + assert(size() == newSize); + } +} + +Value& Value::operator[](ArrayIndex index) { + JSON_ASSERT_MESSAGE( + type_ == nullValue || type_ == arrayValue, + "in Json::Value::operator[](ArrayIndex): requires arrayValue"); + if (type_ == nullValue) + *this = Value(arrayValue); + CZString key(index); + ObjectValues::iterator it = value_.map_->lower_bound(key); + if (it != value_.map_->end() && (*it).first == key) + return (*it).second; + + ObjectValues::value_type defaultValue(key, nullRef); + it = value_.map_->insert(it, defaultValue); + return (*it).second; +} + +Value& Value::operator[](int index) { + JSON_ASSERT_MESSAGE( + index >= 0, + "in Json::Value::operator[](int index): index cannot be negative"); + return (*this)[ArrayIndex(index)]; +} + +const Value& Value::operator[](ArrayIndex index) const { + JSON_ASSERT_MESSAGE( + type_ == nullValue || type_ == arrayValue, + "in Json::Value::operator[](ArrayIndex)const: requires arrayValue"); + if (type_ == nullValue) + return nullRef; + CZString key(index); + ObjectValues::const_iterator it = value_.map_->find(key); + if (it == value_.map_->end()) + return nullRef; + return (*it).second; +} + +const Value& Value::operator[](int index) const { + JSON_ASSERT_MESSAGE( + index >= 0, + "in Json::Value::operator[](int index) const: index cannot be negative"); + return (*this)[ArrayIndex(index)]; +} + +void Value::initBasic(ValueType type, bool allocated) { + type_ = type; + allocated_ = allocated; + comments_ = 0; + start_ = 0; + limit_ = 0; +} + +// Access an object value by name, create a null member if it does not exist. +// @pre Type of '*this' is object or null. +// @param key is null-terminated. +Value& Value::resolveReference(const char* key) { + JSON_ASSERT_MESSAGE( + type_ == nullValue || type_ == objectValue, + "in Json::Value::resolveReference(): requires objectValue"); + if (type_ == nullValue) + *this = Value(objectValue); + CZString actualKey( + key, static_cast(strlen(key)), CZString::noDuplication); // NOTE! + ObjectValues::iterator it = value_.map_->lower_bound(actualKey); + if (it != value_.map_->end() && (*it).first == actualKey) + return (*it).second; + + ObjectValues::value_type defaultValue(actualKey, nullRef); + it = value_.map_->insert(it, defaultValue); + Value& value = (*it).second; + return value; +} + +// @param key is not null-terminated. +Value& Value::resolveReference(char const* key, char const* end) +{ + JSON_ASSERT_MESSAGE( + type_ == nullValue || type_ == objectValue, + "in Json::Value::resolveReference(key, end): requires objectValue"); + if (type_ == nullValue) + *this = Value(objectValue); + CZString actualKey( + key, static_cast(end-key), CZString::duplicateOnCopy); + ObjectValues::iterator it = value_.map_->lower_bound(actualKey); + if (it != value_.map_->end() && (*it).first == actualKey) + return (*it).second; + + ObjectValues::value_type defaultValue(actualKey, nullRef); + it = value_.map_->insert(it, defaultValue); + Value& value = (*it).second; + return value; +} + +Value Value::get(ArrayIndex index, const Value& defaultValue) const { + const Value* value = &((*this)[index]); + return value == &nullRef ? defaultValue : *value; +} + +bool Value::isValidIndex(ArrayIndex index) const { return index < size(); } + +Value const* Value::find(char const* key, char const* end) const +{ + JSON_ASSERT_MESSAGE( + type_ == nullValue || type_ == objectValue, + "in Json::Value::find(key, end, found): requires objectValue or nullValue"); + if (type_ == nullValue) return NULL; + CZString actualKey(key, static_cast(end-key), CZString::noDuplication); + ObjectValues::const_iterator it = value_.map_->find(actualKey); + if (it == value_.map_->end()) return NULL; + return &(*it).second; +} +const Value& Value::operator[](const char* key) const +{ + Value const* found = find(key, key + strlen(key)); + if (!found) return nullRef; + return *found; +} +Value const& Value::operator[](std::string const& key) const +{ + Value const* found = find(key.data(), key.data() + key.length()); + if (!found) return nullRef; + return *found; +} + +Value& Value::operator[](const char* key) { + return resolveReference(key, key + strlen(key)); +} + +Value& Value::operator[](const std::string& key) { + return resolveReference(key.data(), key.data() + key.length()); +} + +Value& Value::operator[](const StaticString& key) { + return resolveReference(key.c_str()); +} + +#ifdef JSON_USE_CPPTL +Value& Value::operator[](const CppTL::ConstString& key) { + return resolveReference(key.c_str(), key.end_c_str()); +} +Value const& Value::operator[](CppTL::ConstString const& key) const +{ + Value const* found = find(key.c_str(), key.end_c_str()); + if (!found) return nullRef; + return *found; +} +#endif + +Value& Value::append(const Value& value) { return (*this)[size()] = value; } + +Value Value::get(char const* key, char const* end, Value const& defaultValue) const +{ + Value const* found = find(key, end); + return !found ? defaultValue : *found; +} +Value Value::get(char const* key, Value const& defaultValue) const +{ + return get(key, key + strlen(key), defaultValue); +} +Value Value::get(std::string const& key, Value const& defaultValue) const +{ + return get(key.data(), key.data() + key.length(), defaultValue); +} + + +bool Value::removeMember(const char* key, const char* end, Value* removed) +{ + if (type_ != objectValue) { + return false; + } + CZString actualKey(key, static_cast(end-key), CZString::noDuplication); + ObjectValues::iterator it = value_.map_->find(actualKey); + if (it == value_.map_->end()) + return false; + *removed = it->second; + value_.map_->erase(it); + return true; +} +bool Value::removeMember(const char* key, Value* removed) +{ + return removeMember(key, key + strlen(key), removed); +} +bool Value::removeMember(std::string const& key, Value* removed) +{ + return removeMember(key.data(), key.data() + key.length(), removed); +} +Value Value::removeMember(const char* key) +{ + JSON_ASSERT_MESSAGE(type_ == nullValue || type_ == objectValue, + "in Json::Value::removeMember(): requires objectValue"); + if (type_ == nullValue) + return nullRef; + + Value removed; // null + removeMember(key, key + strlen(key), &removed); + return removed; // still null if removeMember() did nothing +} +Value Value::removeMember(const std::string& key) +{ + return removeMember(key.c_str()); +} + +bool Value::removeIndex(ArrayIndex index, Value* removed) { + if (type_ != arrayValue) { + return false; + } + CZString key(index); + ObjectValues::iterator it = value_.map_->find(key); + if (it == value_.map_->end()) { + return false; + } + *removed = it->second; + ArrayIndex oldSize = size(); + // shift left all items left, into the place of the "removed" + for (ArrayIndex i = index; i < (oldSize - 1); ++i){ + CZString key(i); + (*value_.map_)[key] = (*this)[i + 1]; + } + // erase the last one ("leftover") + CZString keyLast(oldSize - 1); + ObjectValues::iterator itLast = value_.map_->find(keyLast); + value_.map_->erase(itLast); + return true; +} + +#ifdef JSON_USE_CPPTL +Value Value::get(const CppTL::ConstString& key, + const Value& defaultValue) const { + return get(key.c_str(), key.end_c_str(), defaultValue); +} +#endif + +bool Value::isMember(char const* key, char const* end) const +{ + Value const* value = find(key, end); + return NULL != value; +} +bool Value::isMember(char const* key) const +{ + return isMember(key, key + strlen(key)); +} +bool Value::isMember(std::string const& key) const +{ + return isMember(key.data(), key.data() + key.length()); +} + +#ifdef JSON_USE_CPPTL +bool Value::isMember(const CppTL::ConstString& key) const { + return isMember(key.c_str(), key.end_c_str()); +} +#endif + +Value::Members Value::getMemberNames() const { + JSON_ASSERT_MESSAGE( + type_ == nullValue || type_ == objectValue, + "in Json::Value::getMemberNames(), value must be objectValue"); + if (type_ == nullValue) + return Value::Members(); + Members members; + members.reserve(value_.map_->size()); + ObjectValues::const_iterator it = value_.map_->begin(); + ObjectValues::const_iterator itEnd = value_.map_->end(); + for (; it != itEnd; ++it) { + members.push_back(std::string((*it).first.data(), + (*it).first.length())); + } + return members; +} +// +//# ifdef JSON_USE_CPPTL +// EnumMemberNames +// Value::enumMemberNames() const +//{ +// if ( type_ == objectValue ) +// { +// return CppTL::Enum::any( CppTL::Enum::transform( +// CppTL::Enum::keys( *(value_.map_), CppTL::Type() ), +// MemberNamesTransform() ) ); +// } +// return EnumMemberNames(); +//} +// +// +// EnumValues +// Value::enumValues() const +//{ +// if ( type_ == objectValue || type_ == arrayValue ) +// return CppTL::Enum::anyValues( *(value_.map_), +// CppTL::Type() ); +// return EnumValues(); +//} +// +//# endif + +static bool IsIntegral(double d) { + double integral_part; + return modf(d, &integral_part) == 0.0; +} + +bool Value::isNull() const { return type_ == nullValue; } + +bool Value::isBool() const { return type_ == booleanValue; } + +bool Value::isInt() const { + switch (type_) { + case intValue: + return value_.int_ >= minInt && value_.int_ <= maxInt; + case uintValue: + return value_.uint_ <= UInt(maxInt); + case realValue: + return value_.real_ >= minInt && value_.real_ <= maxInt && + IsIntegral(value_.real_); + default: + break; + } + return false; +} + +bool Value::isUInt() const { + switch (type_) { + case intValue: + return value_.int_ >= 0 && LargestUInt(value_.int_) <= LargestUInt(maxUInt); + case uintValue: + return value_.uint_ <= maxUInt; + case realValue: + return value_.real_ >= 0 && value_.real_ <= maxUInt && + IsIntegral(value_.real_); + default: + break; + } + return false; +} + +bool Value::isInt64() const { +#if defined(JSON_HAS_INT64) + switch (type_) { + case intValue: + return true; + case uintValue: + return value_.uint_ <= UInt64(maxInt64); + case realValue: + // Note that maxInt64 (= 2^63 - 1) is not exactly representable as a + // double, so double(maxInt64) will be rounded up to 2^63. Therefore we + // require the value to be strictly less than the limit. + return value_.real_ >= double(minInt64) && + value_.real_ < double(maxInt64) && IsIntegral(value_.real_); + default: + break; + } +#endif // JSON_HAS_INT64 + return false; +} + +bool Value::isUInt64() const { +#if defined(JSON_HAS_INT64) + switch (type_) { + case intValue: + return value_.int_ >= 0; + case uintValue: + return true; + case realValue: + // Note that maxUInt64 (= 2^64 - 1) is not exactly representable as a + // double, so double(maxUInt64) will be rounded up to 2^64. Therefore we + // require the value to be strictly less than the limit. + return value_.real_ >= 0 && value_.real_ < maxUInt64AsDouble && + IsIntegral(value_.real_); + default: + break; + } +#endif // JSON_HAS_INT64 + return false; +} + +bool Value::isIntegral() const { +#if defined(JSON_HAS_INT64) + return isInt64() || isUInt64(); +#else + return isInt() || isUInt(); +#endif +} + +bool Value::isDouble() const { return type_ == realValue || isIntegral(); } + +bool Value::isNumeric() const { return isIntegral() || isDouble(); } + +bool Value::isString() const { return type_ == stringValue; } + +bool Value::isArray() const { return type_ == arrayValue; } + +bool Value::isObject() const { return type_ == objectValue; } + +void Value::setComment(const char* comment, size_t len, CommentPlacement placement) { + if (!comments_) + comments_ = new CommentInfo[numberOfCommentPlacement]; + if ((len > 0) && (comment[len-1] == '\n')) { + // Always discard trailing newline, to aid indentation. + len -= 1; + } + comments_[placement].setComment(comment, len); +} + +void Value::setComment(const char* comment, CommentPlacement placement) { + setComment(comment, strlen(comment), placement); +} + +void Value::setComment(const std::string& comment, CommentPlacement placement) { + setComment(comment.c_str(), comment.length(), placement); +} + +bool Value::hasComment(CommentPlacement placement) const { + return comments_ != 0 && comments_[placement].comment_ != 0; +} + +std::string Value::getComment(CommentPlacement placement) const { + if (hasComment(placement)) + return comments_[placement].comment_; + return ""; +} + +void Value::setOffsetStart(size_t start) { start_ = start; } + +void Value::setOffsetLimit(size_t limit) { limit_ = limit; } + +size_t Value::getOffsetStart() const { return start_; } + +size_t Value::getOffsetLimit() const { return limit_; } + +std::string Value::toStyledString() const { + StyledWriter writer; + return writer.write(*this); +} + +Value::const_iterator Value::begin() const { + switch (type_) { + case arrayValue: + case objectValue: + if (value_.map_) + return const_iterator(value_.map_->begin()); + break; + default: + break; + } + return const_iterator(); +} + +Value::const_iterator Value::end() const { + switch (type_) { + case arrayValue: + case objectValue: + if (value_.map_) + return const_iterator(value_.map_->end()); + break; + default: + break; + } + return const_iterator(); +} + +Value::iterator Value::begin() { + switch (type_) { + case arrayValue: + case objectValue: + if (value_.map_) + return iterator(value_.map_->begin()); + break; + default: + break; + } + return iterator(); +} + +Value::iterator Value::end() { + switch (type_) { + case arrayValue: + case objectValue: + if (value_.map_) + return iterator(value_.map_->end()); + break; + default: + break; + } + return iterator(); +} + +// class PathArgument +// ////////////////////////////////////////////////////////////////// + +PathArgument::PathArgument() : key_(), index_(), kind_(kindNone) {} + +PathArgument::PathArgument(ArrayIndex index) + : key_(), index_(index), kind_(kindIndex) {} + +PathArgument::PathArgument(const char* key) + : key_(key), index_(), kind_(kindKey) {} + +PathArgument::PathArgument(const std::string& key) + : key_(key.c_str()), index_(), kind_(kindKey) {} + +// class Path +// ////////////////////////////////////////////////////////////////// + +Path::Path(const std::string& path, + const PathArgument& a1, + const PathArgument& a2, + const PathArgument& a3, + const PathArgument& a4, + const PathArgument& a5) { + InArgs in; + in.push_back(&a1); + in.push_back(&a2); + in.push_back(&a3); + in.push_back(&a4); + in.push_back(&a5); + makePath(path, in); +} + +void Path::makePath(const std::string& path, const InArgs& in) { + const char* current = path.c_str(); + const char* end = current + path.length(); + InArgs::const_iterator itInArg = in.begin(); + while (current != end) { + if (*current == '[') { + ++current; + if (*current == '%') + addPathInArg(path, in, itInArg, PathArgument::kindIndex); + else { + ArrayIndex index = 0; + for (; current != end && *current >= '0' && *current <= '9'; ++current) + index = index * 10 + ArrayIndex(*current - '0'); + args_.push_back(index); + } + if (current == end || *current++ != ']') + invalidPath(path, int(current - path.c_str())); + } else if (*current == '%') { + addPathInArg(path, in, itInArg, PathArgument::kindKey); + ++current; + } else if (*current == '.') { + ++current; + } else { + const char* beginName = current; + while (current != end && !strchr("[.", *current)) + ++current; + args_.push_back(std::string(beginName, current)); + } + } +} + +void Path::addPathInArg(const std::string& /*path*/, + const InArgs& in, + InArgs::const_iterator& itInArg, + PathArgument::Kind kind) { + if (itInArg == in.end()) { + // Error: missing argument %d + } else if ((*itInArg)->kind_ != kind) { + // Error: bad argument type + } else { + args_.push_back(**itInArg); + } +} + +void Path::invalidPath(const std::string& /*path*/, int /*location*/) { + // Error: invalid path. +} + +const Value& Path::resolve(const Value& root) const { + const Value* node = &root; + for (Args::const_iterator it = args_.begin(); it != args_.end(); ++it) { + const PathArgument& arg = *it; + if (arg.kind_ == PathArgument::kindIndex) { + if (!node->isArray() || !node->isValidIndex(arg.index_)) { + // Error: unable to resolve path (array value expected at position... + } + node = &((*node)[arg.index_]); + } else if (arg.kind_ == PathArgument::kindKey) { + if (!node->isObject()) { + // Error: unable to resolve path (object value expected at position...) + } + node = &((*node)[arg.key_]); + if (node == &Value::nullRef) { + // Error: unable to resolve path (object has no member named '' at + // position...) + } + } + } + return *node; +} + +Value Path::resolve(const Value& root, const Value& defaultValue) const { + const Value* node = &root; + for (Args::const_iterator it = args_.begin(); it != args_.end(); ++it) { + const PathArgument& arg = *it; + if (arg.kind_ == PathArgument::kindIndex) { + if (!node->isArray() || !node->isValidIndex(arg.index_)) + return defaultValue; + node = &((*node)[arg.index_]); + } else if (arg.kind_ == PathArgument::kindKey) { + if (!node->isObject()) + return defaultValue; + node = &((*node)[arg.key_]); + if (node == &Value::nullRef) + return defaultValue; + } + } + return *node; +} + +Value& Path::make(Value& root) const { + Value* node = &root; + for (Args::const_iterator it = args_.begin(); it != args_.end(); ++it) { + const PathArgument& arg = *it; + if (arg.kind_ == PathArgument::kindIndex) { + if (!node->isArray()) { + // Error: node is not an array at position ... + } + node = &((*node)[arg.index_]); + } else if (arg.kind_ == PathArgument::kindKey) { + if (!node->isObject()) { + // Error: node is not an object at position... + } + node = &((*node)[arg.key_]); + } + } + return *node; +} + +} // namespace Json + +// ////////////////////////////////////////////////////////////////////// +// End of content of file: src/lib_json/json_value.cpp +// ////////////////////////////////////////////////////////////////////// + + + + + + +// ////////////////////////////////////////////////////////////////////// +// Beginning of content of file: src/lib_json/json_writer.cpp +// ////////////////////////////////////////////////////////////////////// + +// Copyright 2011 Baptiste Lepilleur +// Distributed under MIT license, or public domain if desired and +// recognized in your jurisdiction. +// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE + +#if !defined(JSON_IS_AMALGAMATION) +#include +#include "json_tool.h" +#endif // if !defined(JSON_IS_AMALGAMATION) +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(_MSC_VER) && _MSC_VER >= 1200 && _MSC_VER < 1800 // Between VC++ 6.0 and VC++ 11.0 +#include +#define isfinite _finite +#elif defined(__sun) && defined(__SVR4) //Solaris +#include +#define isfinite finite +#else +#include +#define isfinite std::isfinite +#endif + +#if defined(_MSC_VER) && _MSC_VER < 1500 // VC++ 8.0 and below +#define snprintf _snprintf +#elif __cplusplus >= 201103L +#define snprintf std::snprintf +#endif + +#if defined(__BORLANDC__) +#include +#define isfinite _finite +#define snprintf _snprintf +#endif + +#if defined(_MSC_VER) && _MSC_VER >= 1400 // VC++ 8.0 +// Disable warning about strdup being deprecated. +#pragma warning(disable : 4996) +#endif + +namespace Json { + +#if __cplusplus >= 201103L +typedef std::unique_ptr StreamWriterPtr; +#else +typedef std::auto_ptr StreamWriterPtr; +#endif + +static bool containsControlCharacter(const char* str) { + while (*str) { + if (isControlCharacter(*(str++))) + return true; + } + return false; +} + +static bool containsControlCharacter0(const char* str, unsigned len) { + char const* end = str + len; + while (end != str) { + if (isControlCharacter(*str) || 0==*str) + return true; + ++str; + } + return false; +} + +std::string valueToString(LargestInt value) { + UIntToStringBuffer buffer; + char* current = buffer + sizeof(buffer); + bool isNegative = value < 0; + if (isNegative) + value = -value; + uintToString(LargestUInt(value), current); + if (isNegative) + *--current = '-'; + assert(current >= buffer); + return current; +} + +std::string valueToString(LargestUInt value) { + UIntToStringBuffer buffer; + char* current = buffer + sizeof(buffer); + uintToString(value, current); + assert(current >= buffer); + return current; +} + +#if defined(JSON_HAS_INT64) + +std::string valueToString(Int value) { + return valueToString(LargestInt(value)); +} + +std::string valueToString(UInt value) { + return valueToString(LargestUInt(value)); +} + +#endif // # if defined(JSON_HAS_INT64) + +std::string valueToString(double value) { + // Allocate a buffer that is more than large enough to store the 16 digits of + // precision requested below. + char buffer[32]; + int len = -1; + +// Print into the buffer. We need not request the alternative representation +// that always has a decimal point because JSON doesn't distingish the +// concepts of reals and integers. +#if defined(_MSC_VER) && defined(__STDC_SECURE_LIB__) // Use secure version with + // visual studio 2005 to + // avoid warning. +#if defined(WINCE) + len = _snprintf(buffer, sizeof(buffer), "%.17g", value); +#else + len = sprintf_s(buffer, sizeof(buffer), "%.17g", value); +#endif +#else + if (isfinite(value)) { + len = snprintf(buffer, sizeof(buffer), "%.17g", value); + } else { + // IEEE standard states that NaN values will not compare to themselves + if (value != value) { + len = snprintf(buffer, sizeof(buffer), "null"); + } else if (value < 0) { + len = snprintf(buffer, sizeof(buffer), "-1e+9999"); + } else { + len = snprintf(buffer, sizeof(buffer), "1e+9999"); + } + // For those, we do not need to call fixNumLoc, but it is fast. + } +#endif + assert(len >= 0); + fixNumericLocale(buffer, buffer + len); + return buffer; +} + +std::string valueToString(bool value) { return value ? "true" : "false"; } + +std::string valueToQuotedString(const char* value) { + if (value == NULL) + return ""; + // Not sure how to handle unicode... + if (strpbrk(value, "\"\\\b\f\n\r\t") == NULL && + !containsControlCharacter(value)) + return std::string("\"") + value + "\""; + // We have to walk value and escape any special characters. + // Appending to std::string is not efficient, but this should be rare. + // (Note: forward slashes are *not* rare, but I am not escaping them.) + std::string::size_type maxsize = + strlen(value) * 2 + 3; // allescaped+quotes+NULL + std::string result; + result.reserve(maxsize); // to avoid lots of mallocs + result += "\""; + for (const char* c = value; *c != 0; ++c) { + switch (*c) { + case '\"': + result += "\\\""; + break; + case '\\': + result += "\\\\"; + break; + case '\b': + result += "\\b"; + break; + case '\f': + result += "\\f"; + break; + case '\n': + result += "\\n"; + break; + case '\r': + result += "\\r"; + break; + case '\t': + result += "\\t"; + break; + // case '/': + // Even though \/ is considered a legal escape in JSON, a bare + // slash is also legal, so I see no reason to escape it. + // (I hope I am not misunderstanding something. + // blep notes: actually escaping \/ may be useful in javascript to avoid (*c); + result += oss.str(); + } else { + result += *c; + } + break; + } + } + result += "\""; + return result; +} + +// https://github.com/upcaste/upcaste/blob/master/src/upcore/src/cstring/strnpbrk.cpp +static char const* strnpbrk(char const* s, char const* accept, size_t n) { + assert((s || !n) && accept); + + char const* const end = s + n; + for (char const* cur = s; cur < end; ++cur) { + int const c = *cur; + for (char const* a = accept; *a; ++a) { + if (*a == c) { + return cur; + } + } + } + return NULL; +} +static std::string valueToQuotedStringN(const char* value, unsigned length) { + if (value == NULL) + return ""; + // Not sure how to handle unicode... + if (strnpbrk(value, "\"\\\b\f\n\r\t", length) == NULL && + !containsControlCharacter0(value, length)) + return std::string("\"") + value + "\""; + // We have to walk value and escape any special characters. + // Appending to std::string is not efficient, but this should be rare. + // (Note: forward slashes are *not* rare, but I am not escaping them.) + std::string::size_type maxsize = + length * 2 + 3; // allescaped+quotes+NULL + std::string result; + result.reserve(maxsize); // to avoid lots of mallocs + result += "\""; + char const* end = value + length; + for (const char* c = value; c != end; ++c) { + switch (*c) { + case '\"': + result += "\\\""; + break; + case '\\': + result += "\\\\"; + break; + case '\b': + result += "\\b"; + break; + case '\f': + result += "\\f"; + break; + case '\n': + result += "\\n"; + break; + case '\r': + result += "\\r"; + break; + case '\t': + result += "\\t"; + break; + // case '/': + // Even though \/ is considered a legal escape in JSON, a bare + // slash is also legal, so I see no reason to escape it. + // (I hope I am not misunderstanding something.) + // blep notes: actually escaping \/ may be useful in javascript to avoid (*c); + result += oss.str(); + } else { + result += *c; + } + break; + } + } + result += "\""; + return result; +} + +// Class Writer +// ////////////////////////////////////////////////////////////////// +Writer::~Writer() {} + +// Class FastWriter +// ////////////////////////////////////////////////////////////////// + +FastWriter::FastWriter() + : yamlCompatiblityEnabled_(false), dropNullPlaceholders_(false), + omitEndingLineFeed_(false) {} + +void FastWriter::enableYAMLCompatibility() { yamlCompatiblityEnabled_ = true; } + +void FastWriter::dropNullPlaceholders() { dropNullPlaceholders_ = true; } + +void FastWriter::omitEndingLineFeed() { omitEndingLineFeed_ = true; } + +std::string FastWriter::write(const Value& root) { + document_ = ""; + writeValue(root); + if (!omitEndingLineFeed_) + document_ += "\n"; + return document_; +} + +void FastWriter::writeValue(const Value& value) { + switch (value.type()) { + case nullValue: + if (!dropNullPlaceholders_) + document_ += "null"; + break; + case intValue: + document_ += valueToString(value.asLargestInt()); + break; + case uintValue: + document_ += valueToString(value.asLargestUInt()); + break; + case realValue: + document_ += valueToString(value.asDouble()); + break; + case stringValue: + { + // Is NULL possible for value.string_? + char const* str; + char const* end; + bool ok = value.getString(&str, &end); + if (ok) document_ += valueToQuotedStringN(str, static_cast(end-str)); + break; + } + case booleanValue: + document_ += valueToString(value.asBool()); + break; + case arrayValue: { + document_ += '['; + int size = value.size(); + for (int index = 0; index < size; ++index) { + if (index > 0) + document_ += ','; + writeValue(value[index]); + } + document_ += ']'; + } break; + case objectValue: { + Value::Members members(value.getMemberNames()); + document_ += '{'; + for (Value::Members::iterator it = members.begin(); it != members.end(); + ++it) { + const std::string& name = *it; + if (it != members.begin()) + document_ += ','; + document_ += valueToQuotedStringN(name.data(), name.length()); + document_ += yamlCompatiblityEnabled_ ? ": " : ":"; + writeValue(value[name]); + } + document_ += '}'; + } break; + } +} + +// Class StyledWriter +// ////////////////////////////////////////////////////////////////// + +StyledWriter::StyledWriter() + : rightMargin_(74), indentSize_(3), addChildValues_() {} + +std::string StyledWriter::write(const Value& root) { + document_ = ""; + addChildValues_ = false; + indentString_ = ""; + writeCommentBeforeValue(root); + writeValue(root); + writeCommentAfterValueOnSameLine(root); + document_ += "\n"; + return document_; +} + +void StyledWriter::writeValue(const Value& value) { + switch (value.type()) { + case nullValue: + pushValue("null"); + break; + case intValue: + pushValue(valueToString(value.asLargestInt())); + break; + case uintValue: + pushValue(valueToString(value.asLargestUInt())); + break; + case realValue: + pushValue(valueToString(value.asDouble())); + break; + case stringValue: + { + // Is NULL possible for value.string_? + char const* str; + char const* end; + bool ok = value.getString(&str, &end); + if (ok) pushValue(valueToQuotedStringN(str, static_cast(end-str))); + else pushValue(""); + break; + } + case booleanValue: + pushValue(valueToString(value.asBool())); + break; + case arrayValue: + writeArrayValue(value); + break; + case objectValue: { + Value::Members members(value.getMemberNames()); + if (members.empty()) + pushValue("{}"); + else { + writeWithIndent("{"); + indent(); + Value::Members::iterator it = members.begin(); + for (;;) { + const std::string& name = *it; + const Value& childValue = value[name]; + writeCommentBeforeValue(childValue); + writeWithIndent(valueToQuotedString(name.c_str())); + document_ += " : "; + writeValue(childValue); + if (++it == members.end()) { + writeCommentAfterValueOnSameLine(childValue); + break; + } + document_ += ','; + writeCommentAfterValueOnSameLine(childValue); + } + unindent(); + writeWithIndent("}"); + } + } break; + } +} + +void StyledWriter::writeArrayValue(const Value& value) { + unsigned size = value.size(); + if (size == 0) + pushValue("[]"); + else { + bool isArrayMultiLine = isMultineArray(value); + if (isArrayMultiLine) { + writeWithIndent("["); + indent(); + bool hasChildValue = !childValues_.empty(); + unsigned index = 0; + for (;;) { + const Value& childValue = value[index]; + writeCommentBeforeValue(childValue); + if (hasChildValue) + writeWithIndent(childValues_[index]); + else { + writeIndent(); + writeValue(childValue); + } + if (++index == size) { + writeCommentAfterValueOnSameLine(childValue); + break; + } + document_ += ','; + writeCommentAfterValueOnSameLine(childValue); + } + unindent(); + writeWithIndent("]"); + } else // output on a single line + { + assert(childValues_.size() == size); + document_ += "[ "; + for (unsigned index = 0; index < size; ++index) { + if (index > 0) + document_ += ", "; + document_ += childValues_[index]; + } + document_ += " ]"; + } + } +} + +bool StyledWriter::isMultineArray(const Value& value) { + int size = value.size(); + bool isMultiLine = size * 3 >= rightMargin_; + childValues_.clear(); + for (int index = 0; index < size && !isMultiLine; ++index) { + const Value& childValue = value[index]; + isMultiLine = + isMultiLine || ((childValue.isArray() || childValue.isObject()) && + childValue.size() > 0); + } + if (!isMultiLine) // check if line length > max line length + { + childValues_.reserve(size); + addChildValues_ = true; + int lineLength = 4 + (size - 1) * 2; // '[ ' + ', '*n + ' ]' + for (int index = 0; index < size; ++index) { + if (hasCommentForValue(value[index])) { + isMultiLine = true; + } + writeValue(value[index]); + lineLength += int(childValues_[index].length()); + } + addChildValues_ = false; + isMultiLine = isMultiLine || lineLength >= rightMargin_; + } + return isMultiLine; +} + +void StyledWriter::pushValue(const std::string& value) { + if (addChildValues_) + childValues_.push_back(value); + else + document_ += value; +} + +void StyledWriter::writeIndent() { + if (!document_.empty()) { + char last = document_[document_.length() - 1]; + if (last == ' ') // already indented + return; + if (last != '\n') // Comments may add new-line + document_ += '\n'; + } + document_ += indentString_; +} + +void StyledWriter::writeWithIndent(const std::string& value) { + writeIndent(); + document_ += value; +} + +void StyledWriter::indent() { indentString_ += std::string(indentSize_, ' '); } + +void StyledWriter::unindent() { + assert(int(indentString_.size()) >= indentSize_); + indentString_.resize(indentString_.size() - indentSize_); +} + +void StyledWriter::writeCommentBeforeValue(const Value& root) { + if (!root.hasComment(commentBefore)) + return; + + document_ += "\n"; + writeIndent(); + const std::string& comment = root.getComment(commentBefore); + std::string::const_iterator iter = comment.begin(); + while (iter != comment.end()) { + document_ += *iter; + if (*iter == '\n' && + (iter != comment.end() && *(iter + 1) == '/')) + writeIndent(); + ++iter; + } + + // Comments are stripped of trailing newlines, so add one here + document_ += "\n"; +} + +void StyledWriter::writeCommentAfterValueOnSameLine(const Value& root) { + if (root.hasComment(commentAfterOnSameLine)) + document_ += " " + root.getComment(commentAfterOnSameLine); + + if (root.hasComment(commentAfter)) { + document_ += "\n"; + document_ += root.getComment(commentAfter); + document_ += "\n"; + } +} + +bool StyledWriter::hasCommentForValue(const Value& value) { + return value.hasComment(commentBefore) || + value.hasComment(commentAfterOnSameLine) || + value.hasComment(commentAfter); +} + +// Class StyledStreamWriter +// ////////////////////////////////////////////////////////////////// + +StyledStreamWriter::StyledStreamWriter(std::string indentation) + : document_(NULL), rightMargin_(74), indentation_(indentation), + addChildValues_() {} + +void StyledStreamWriter::write(std::ostream& out, const Value& root) { + document_ = &out; + addChildValues_ = false; + indentString_ = ""; + indented_ = true; + writeCommentBeforeValue(root); + if (!indented_) writeIndent(); + indented_ = true; + writeValue(root); + writeCommentAfterValueOnSameLine(root); + *document_ << "\n"; + document_ = NULL; // Forget the stream, for safety. +} + +void StyledStreamWriter::writeValue(const Value& value) { + switch (value.type()) { + case nullValue: + pushValue("null"); + break; + case intValue: + pushValue(valueToString(value.asLargestInt())); + break; + case uintValue: + pushValue(valueToString(value.asLargestUInt())); + break; + case realValue: + pushValue(valueToString(value.asDouble())); + break; + case stringValue: + { + // Is NULL possible for value.string_? + char const* str; + char const* end; + bool ok = value.getString(&str, &end); + if (ok) pushValue(valueToQuotedStringN(str, static_cast(end-str))); + else pushValue(""); + break; + } + case booleanValue: + pushValue(valueToString(value.asBool())); + break; + case arrayValue: + writeArrayValue(value); + break; + case objectValue: { + Value::Members members(value.getMemberNames()); + if (members.empty()) + pushValue("{}"); + else { + writeWithIndent("{"); + indent(); + Value::Members::iterator it = members.begin(); + for (;;) { + const std::string& name = *it; + const Value& childValue = value[name]; + writeCommentBeforeValue(childValue); + writeWithIndent(valueToQuotedString(name.c_str())); + *document_ << " : "; + writeValue(childValue); + if (++it == members.end()) { + writeCommentAfterValueOnSameLine(childValue); + break; + } + *document_ << ","; + writeCommentAfterValueOnSameLine(childValue); + } + unindent(); + writeWithIndent("}"); + } + } break; + } +} + +void StyledStreamWriter::writeArrayValue(const Value& value) { + unsigned size = value.size(); + if (size == 0) + pushValue("[]"); + else { + bool isArrayMultiLine = isMultineArray(value); + if (isArrayMultiLine) { + writeWithIndent("["); + indent(); + bool hasChildValue = !childValues_.empty(); + unsigned index = 0; + for (;;) { + const Value& childValue = value[index]; + writeCommentBeforeValue(childValue); + if (hasChildValue) + writeWithIndent(childValues_[index]); + else { + if (!indented_) writeIndent(); + indented_ = true; + writeValue(childValue); + indented_ = false; + } + if (++index == size) { + writeCommentAfterValueOnSameLine(childValue); + break; + } + *document_ << ","; + writeCommentAfterValueOnSameLine(childValue); + } + unindent(); + writeWithIndent("]"); + } else // output on a single line + { + assert(childValues_.size() == size); + *document_ << "[ "; + for (unsigned index = 0; index < size; ++index) { + if (index > 0) + *document_ << ", "; + *document_ << childValues_[index]; + } + *document_ << " ]"; + } + } +} + +bool StyledStreamWriter::isMultineArray(const Value& value) { + int size = value.size(); + bool isMultiLine = size * 3 >= rightMargin_; + childValues_.clear(); + for (int index = 0; index < size && !isMultiLine; ++index) { + const Value& childValue = value[index]; + isMultiLine = + isMultiLine || ((childValue.isArray() || childValue.isObject()) && + childValue.size() > 0); + } + if (!isMultiLine) // check if line length > max line length + { + childValues_.reserve(size); + addChildValues_ = true; + int lineLength = 4 + (size - 1) * 2; // '[ ' + ', '*n + ' ]' + for (int index = 0; index < size; ++index) { + if (hasCommentForValue(value[index])) { + isMultiLine = true; + } + writeValue(value[index]); + lineLength += int(childValues_[index].length()); + } + addChildValues_ = false; + isMultiLine = isMultiLine || lineLength >= rightMargin_; + } + return isMultiLine; +} + +void StyledStreamWriter::pushValue(const std::string& value) { + if (addChildValues_) + childValues_.push_back(value); + else + *document_ << value; +} + +void StyledStreamWriter::writeIndent() { + // blep intended this to look at the so-far-written string + // to determine whether we are already indented, but + // with a stream we cannot do that. So we rely on some saved state. + // The caller checks indented_. + *document_ << '\n' << indentString_; +} + +void StyledStreamWriter::writeWithIndent(const std::string& value) { + if (!indented_) writeIndent(); + *document_ << value; + indented_ = false; +} + +void StyledStreamWriter::indent() { indentString_ += indentation_; } + +void StyledStreamWriter::unindent() { + assert(indentString_.size() >= indentation_.size()); + indentString_.resize(indentString_.size() - indentation_.size()); +} + +void StyledStreamWriter::writeCommentBeforeValue(const Value& root) { + if (!root.hasComment(commentBefore)) + return; + + if (!indented_) writeIndent(); + const std::string& comment = root.getComment(commentBefore); + std::string::const_iterator iter = comment.begin(); + while (iter != comment.end()) { + *document_ << *iter; + if (*iter == '\n' && + (iter != comment.end() && *(iter + 1) == '/')) + // writeIndent(); // would include newline + *document_ << indentString_; + ++iter; + } + indented_ = false; +} + +void StyledStreamWriter::writeCommentAfterValueOnSameLine(const Value& root) { + if (root.hasComment(commentAfterOnSameLine)) + *document_ << ' ' << root.getComment(commentAfterOnSameLine); + + if (root.hasComment(commentAfter)) { + writeIndent(); + *document_ << root.getComment(commentAfter); + } + indented_ = false; +} + +bool StyledStreamWriter::hasCommentForValue(const Value& value) { + return value.hasComment(commentBefore) || + value.hasComment(commentAfterOnSameLine) || + value.hasComment(commentAfter); +} + +////////////////////////// +// BuiltStyledStreamWriter + +/// Scoped enums are not available until C++11. +struct CommentStyle { + /// Decide whether to write comments. + enum Enum { + None, ///< Drop all comments. + Most, ///< Recover odd behavior of previous versions (not implemented yet). + All ///< Keep all comments. + }; +}; + +struct BuiltStyledStreamWriter : public StreamWriter +{ + BuiltStyledStreamWriter( + std::string const& indentation, + CommentStyle::Enum cs, + std::string const& colonSymbol, + std::string const& nullSymbol, + std::string const& endingLineFeedSymbol); + virtual int write(Value const& root, std::ostream* sout); +private: + void writeValue(Value const& value); + void writeArrayValue(Value const& value); + bool isMultineArray(Value const& value); + void pushValue(std::string const& value); + void writeIndent(); + void writeWithIndent(std::string const& value); + void indent(); + void unindent(); + void writeCommentBeforeValue(Value const& root); + void writeCommentAfterValueOnSameLine(Value const& root); + static bool hasCommentForValue(const Value& value); + + typedef std::vector ChildValues; + + ChildValues childValues_; + std::string indentString_; + int rightMargin_; + std::string indentation_; + CommentStyle::Enum cs_; + std::string colonSymbol_; + std::string nullSymbol_; + std::string endingLineFeedSymbol_; + bool addChildValues_ : 1; + bool indented_ : 1; +}; +BuiltStyledStreamWriter::BuiltStyledStreamWriter( + std::string const& indentation, + CommentStyle::Enum cs, + std::string const& colonSymbol, + std::string const& nullSymbol, + std::string const& endingLineFeedSymbol) + : rightMargin_(74) + , indentation_(indentation) + , cs_(cs) + , colonSymbol_(colonSymbol) + , nullSymbol_(nullSymbol) + , endingLineFeedSymbol_(endingLineFeedSymbol) + , addChildValues_(false) + , indented_(false) +{ +} +int BuiltStyledStreamWriter::write(Value const& root, std::ostream* sout) +{ + sout_ = sout; + addChildValues_ = false; + indented_ = true; + indentString_ = ""; + writeCommentBeforeValue(root); + if (!indented_) writeIndent(); + indented_ = true; + writeValue(root); + writeCommentAfterValueOnSameLine(root); + *sout_ << endingLineFeedSymbol_; + sout_ = NULL; + return 0; +} +void BuiltStyledStreamWriter::writeValue(Value const& value) { + switch (value.type()) { + case nullValue: + pushValue(nullSymbol_); + break; + case intValue: + pushValue(valueToString(value.asLargestInt())); + break; + case uintValue: + pushValue(valueToString(value.asLargestUInt())); + break; + case realValue: + pushValue(valueToString(value.asDouble())); + break; + case stringValue: + { + // Is NULL is possible for value.string_? + char const* str; + char const* end; + bool ok = value.getString(&str, &end); + if (ok) pushValue(valueToQuotedStringN(str, static_cast(end-str))); + else pushValue(""); + break; + } + case booleanValue: + pushValue(valueToString(value.asBool())); + break; + case arrayValue: + writeArrayValue(value); + break; + case objectValue: { + Value::Members members(value.getMemberNames()); + if (members.empty()) + pushValue("{}"); + else { + writeWithIndent("{"); + indent(); + Value::Members::iterator it = members.begin(); + for (;;) { + std::string const& name = *it; + Value const& childValue = value[name]; + writeCommentBeforeValue(childValue); + writeWithIndent(valueToQuotedStringN(name.data(), name.length())); + *sout_ << colonSymbol_; + writeValue(childValue); + if (++it == members.end()) { + writeCommentAfterValueOnSameLine(childValue); + break; + } + *sout_ << ","; + writeCommentAfterValueOnSameLine(childValue); + } + unindent(); + writeWithIndent("}"); + } + } break; + } +} + +void BuiltStyledStreamWriter::writeArrayValue(Value const& value) { + unsigned size = value.size(); + if (size == 0) + pushValue("[]"); + else { + bool isMultiLine = (cs_ == CommentStyle::All) || isMultineArray(value); + if (isMultiLine) { + writeWithIndent("["); + indent(); + bool hasChildValue = !childValues_.empty(); + unsigned index = 0; + for (;;) { + Value const& childValue = value[index]; + writeCommentBeforeValue(childValue); + if (hasChildValue) + writeWithIndent(childValues_[index]); + else { + if (!indented_) writeIndent(); + indented_ = true; + writeValue(childValue); + indented_ = false; + } + if (++index == size) { + writeCommentAfterValueOnSameLine(childValue); + break; + } + *sout_ << ","; + writeCommentAfterValueOnSameLine(childValue); + } + unindent(); + writeWithIndent("]"); + } else // output on a single line + { + assert(childValues_.size() == size); + *sout_ << "["; + if (!indentation_.empty()) *sout_ << " "; + for (unsigned index = 0; index < size; ++index) { + if (index > 0) + *sout_ << ", "; + *sout_ << childValues_[index]; + } + if (!indentation_.empty()) *sout_ << " "; + *sout_ << "]"; + } + } +} + +bool BuiltStyledStreamWriter::isMultineArray(Value const& value) { + int size = value.size(); + bool isMultiLine = size * 3 >= rightMargin_; + childValues_.clear(); + for (int index = 0; index < size && !isMultiLine; ++index) { + Value const& childValue = value[index]; + isMultiLine = + isMultiLine || ((childValue.isArray() || childValue.isObject()) && + childValue.size() > 0); + } + if (!isMultiLine) // check if line length > max line length + { + childValues_.reserve(size); + addChildValues_ = true; + int lineLength = 4 + (size - 1) * 2; // '[ ' + ', '*n + ' ]' + for (int index = 0; index < size; ++index) { + if (hasCommentForValue(value[index])) { + isMultiLine = true; + } + writeValue(value[index]); + lineLength += int(childValues_[index].length()); + } + addChildValues_ = false; + isMultiLine = isMultiLine || lineLength >= rightMargin_; + } + return isMultiLine; +} + +void BuiltStyledStreamWriter::pushValue(std::string const& value) { + if (addChildValues_) + childValues_.push_back(value); + else + *sout_ << value; +} + +void BuiltStyledStreamWriter::writeIndent() { + // blep intended this to look at the so-far-written string + // to determine whether we are already indented, but + // with a stream we cannot do that. So we rely on some saved state. + // The caller checks indented_. + + if (!indentation_.empty()) { + // In this case, drop newlines too. + *sout_ << '\n' << indentString_; + } +} + +void BuiltStyledStreamWriter::writeWithIndent(std::string const& value) { + if (!indented_) writeIndent(); + *sout_ << value; + indented_ = false; +} + +void BuiltStyledStreamWriter::indent() { indentString_ += indentation_; } + +void BuiltStyledStreamWriter::unindent() { + assert(indentString_.size() >= indentation_.size()); + indentString_.resize(indentString_.size() - indentation_.size()); +} + +void BuiltStyledStreamWriter::writeCommentBeforeValue(Value const& root) { + if (cs_ == CommentStyle::None) return; + if (!root.hasComment(commentBefore)) + return; + + if (!indented_) writeIndent(); + const std::string& comment = root.getComment(commentBefore); + std::string::const_iterator iter = comment.begin(); + while (iter != comment.end()) { + *sout_ << *iter; + if (*iter == '\n' && + (iter != comment.end() && *(iter + 1) == '/')) + // writeIndent(); // would write extra newline + *sout_ << indentString_; + ++iter; + } + indented_ = false; +} + +void BuiltStyledStreamWriter::writeCommentAfterValueOnSameLine(Value const& root) { + if (cs_ == CommentStyle::None) return; + if (root.hasComment(commentAfterOnSameLine)) + *sout_ << " " + root.getComment(commentAfterOnSameLine); + + if (root.hasComment(commentAfter)) { + writeIndent(); + *sout_ << root.getComment(commentAfter); + } +} + +// static +bool BuiltStyledStreamWriter::hasCommentForValue(const Value& value) { + return value.hasComment(commentBefore) || + value.hasComment(commentAfterOnSameLine) || + value.hasComment(commentAfter); +} + +/////////////// +// StreamWriter + +StreamWriter::StreamWriter() + : sout_(NULL) +{ +} +StreamWriter::~StreamWriter() +{ +} +StreamWriter::Factory::~Factory() +{} +StreamWriterBuilder::StreamWriterBuilder() +{ + setDefaults(&settings_); +} +StreamWriterBuilder::~StreamWriterBuilder() +{} +StreamWriter* StreamWriterBuilder::newStreamWriter() const +{ + std::string indentation = settings_["indentation"].asString(); + std::string cs_str = settings_["commentStyle"].asString(); + bool eyc = settings_["enableYAMLCompatibility"].asBool(); + bool dnp = settings_["dropNullPlaceholders"].asBool(); + CommentStyle::Enum cs = CommentStyle::All; + if (cs_str == "All") { + cs = CommentStyle::All; + } else if (cs_str == "None") { + cs = CommentStyle::None; + } else { + throwRuntimeError("commentStyle must be 'All' or 'None'"); + } + std::string colonSymbol = " : "; + if (eyc) { + colonSymbol = ": "; + } else if (indentation.empty()) { + colonSymbol = ":"; + } + std::string nullSymbol = "null"; + if (dnp) { + nullSymbol = ""; + } + std::string endingLineFeedSymbol = ""; + return new BuiltStyledStreamWriter( + indentation, cs, + colonSymbol, nullSymbol, endingLineFeedSymbol); +} +static void getValidWriterKeys(std::set* valid_keys) +{ + valid_keys->clear(); + valid_keys->insert("indentation"); + valid_keys->insert("commentStyle"); + valid_keys->insert("enableYAMLCompatibility"); + valid_keys->insert("dropNullPlaceholders"); +} +bool StreamWriterBuilder::validate(Json::Value* invalid) const +{ + Json::Value my_invalid; + if (!invalid) invalid = &my_invalid; // so we do not need to test for NULL + Json::Value& inv = *invalid; + std::set valid_keys; + getValidWriterKeys(&valid_keys); + Value::Members keys = settings_.getMemberNames(); + size_t n = keys.size(); + for (size_t i = 0; i < n; ++i) { + std::string const& key = keys[i]; + if (valid_keys.find(key) == valid_keys.end()) { + inv[key] = settings_[key]; + } + } + return 0u == inv.size(); +} +Value& StreamWriterBuilder::operator[](std::string key) +{ + return settings_[key]; +} +// static +void StreamWriterBuilder::setDefaults(Json::Value* settings) +{ + //! [StreamWriterBuilderDefaults] + (*settings)["commentStyle"] = "All"; + (*settings)["indentation"] = "\t"; + (*settings)["enableYAMLCompatibility"] = false; + (*settings)["dropNullPlaceholders"] = false; + //! [StreamWriterBuilderDefaults] +} + +std::string writeString(StreamWriter::Factory const& builder, Value const& root) { + std::ostringstream sout; + StreamWriterPtr const writer(builder.newStreamWriter()); + writer->write(root, &sout); + return sout.str(); +} + +std::ostream& operator<<(std::ostream& sout, Value const& root) { + StreamWriterBuilder builder; + StreamWriterPtr const writer(builder.newStreamWriter()); + writer->write(root, &sout); + return sout; +} + +} // namespace Json + +// ////////////////////////////////////////////////////////////////////// +// End of content of file: src/lib_json/json_writer.cpp +// ////////////////////////////////////////////////////////////////////// + + + + + diff --git a/applications/join/relation_io.hpp b/applications/join/relation_io.hpp index 5021a1819..ed475a74f 100644 --- a/applications/join/relation_io.hpp +++ b/applications/join/relation_io.hpp @@ -3,8 +3,11 @@ #include #include #include +#include #include #include +#include +#include "json/json.h" //#include #include @@ -125,6 +128,154 @@ tuple_graph readEdges( std::string fn, int64_t numTuples ) { } +std::string get_split_name(std::string base, int part) { + const int digits = 5; + assert(part <= 99999); + + std::stringstream ss; + ss << base << "-" << std::setw(digits) << std::setfill('0') << part; + return ss.str(); +} + +size_t get_lines( std::string fname ) { +#if 0 + // POPEN and MPI don't mix well + char cmd[256]; + sprintf(cmd, "wc -l %s | awk '{print $1}'", fname.c_str()); + FILE* pipe = popen(cmd, "r"); + if (!pipe) return -1; + char buffer[128]; + std::string result = ""; + while(!feof(pipe)) { + if(fgets(buffer, 128, pipe) != NULL) + result += buffer; + } + pclose(pipe); + return std::stoi(result); +#endif + size_t count = 0; + std::ifstream inp(fname); + std::string line; + while (std::getline(inp, line)) { + ++count; + } + return count; +} + + +size_t get_total_lines( std::string basename ) { +#if 0 + // POPEN and MPI don't mix well + char cmd[128]; + sprintf(cmd, "wc -l %s-* | tail -n 1 | awk '{print $1}'", basename.c_str()); + FILE* pipe = popen(cmd, "r"); + CHECK( pipe != NULL); + if (!pipe) return -1; + char buffer[128]; + std::string result = ""; + while(!feof(pipe)) { + if(fgets(buffer, 128, pipe) != NULL) + result += buffer; + } + CHECK( pclose(pipe) != -1 ); + return std::stoi(result); +#endif + int i = 0; + int64_t sum = 0; + while (true) { + auto fname = get_split_name(basename, i); + std::ifstream inp(fname); + // if file doesn't exist then stop + if (!inp.good()) break; + inp.close(); + sum += get_lines(fname); + ++i; + } + return sum; +} + +// Wraps character array in a non-array type (non-pointer converted type) so that +// we can copy strings between cores +struct CharArray { + char arr[180]; + CharArray() { } // no-arg constructor required for various object copying codes + CharArray(std::string s) { + CHECK(s.size() < 180) << "filenames limited to 180 bytes"; + sprintf(arr, "%s", s.c_str()); + } +}; + +template +size_t readSplits( std::string basename, GlobalAddress * buf_addr ) { + + uint64_t part = 0; + auto part_addr = make_global(&part); + bool done = false; + auto done_addr = make_global(&done); + uint64_t offset_counter = 0; + auto offset_counter_addr = make_global( &offset_counter ); + + auto ntuples = get_total_lines(basename); + CHECK(ntuples >= 0); + auto tuples = Grappa::global_alloc(ntuples); + + // choose to new here simply to save stack space + auto basename_ptr = make_global(new CharArray(basename)); + + on_all_cores( [part_addr,done_addr,offset_counter_addr,tuples,basename_ptr] { + auto fname_arr = delegate::read(basename_ptr); + auto basename_copy = std::string(fname_arr.arr); + + VLOG(5) << "readSplits reporting in"; + while (!delegate::read(done_addr)) { + auto my_part = delegate::fetch_and_add( part_addr, 1 ); + auto fname = get_split_name(basename_copy, my_part); + + VLOG(5) << "split " << fname; + std::ifstream inp(fname); + if (!inp.good()) { + delegate::call(done_addr.core(), [=] { + *(done_addr.pointer()) = true; + }); + } else { + // two pass; count lines in part then reserve and read + auto nlines = get_lines(fname); + + auto offset = delegate::fetch_and_add( offset_counter_addr, nlines ); + auto suboffset = 0; + + std::string line; + while (std::getline(inp, line)) { + // get first attribute, which is a json string + std::stringstream inss(line); + Json::Value root; + inss >> root; // ignore other json objects + + VLOG(5) << root; + + // json to csv to use fromIStream + std::stringstream ascii_s; + for ( Json::ValueIterator itr = root.begin(); itr != root.end(); itr++ ) { + ascii_s << *itr << ","; + } + + VLOG(5) << ascii_s.str(); + + auto val = T::fromIStream(ascii_s); + //Grappa::delegate::write(tuples+offset+suboffset, val); + Grappa::delegate::write(tuples+offset+suboffset, val); + ++suboffset; + } + } + } + }); +// Grappa::impl::local_gce.wait(); + delete basename_ptr.pointer(); + + *buf_addr = tuples; + return ntuples; +} + // assumes that for object T, the address of T is the address of its fields template size_t readTuplesUnordered( std::string fn, GlobalAddress * buf_addr ) { @@ -152,15 +303,11 @@ size_t readTuplesUnordered( std::string fn, GlobalAddress * buf_addr ) { size_t offset_counter = 0; auto offset_counter_addr = make_global( &offset_counter, Grappa::mycore() ); - // we will broadcast the file name as bytes - CHECK( data_path.size() <= 2040 ); - char data_path_char[2048]; - sprintf(data_path_char, "%s", data_path.c_str()); + auto data_path_ptr = make_global(new CharArray(data_path)); on_all_cores( [=] { - VLOG(5) << "opening addr next"; - VLOG(5) << "opening addr " << &data_path_char; - VLOG(5) << "opening " << data_path_char; + auto data_path_arr = delegate::read(data_path_ptr); + auto fname = std::string(data_path_arr.arr); // find my array split auto local_start = tuples.localize(); @@ -171,8 +318,8 @@ size_t readTuplesUnordered( std::string fn, GlobalAddress * buf_addr ) { int64_t offset = Grappa::delegate::fetch_and_add( offset_counter_addr, local_count ); VLOG(2) << "file offset " << offset; - std::ifstream data_file(data_path_char, std::ios_base::in | std::ios_base::binary); - CHECK( data_file.is_open() ) << data_path_char << " failed to open"; + std::ifstream data_file(fname, std::ios_base::in | std::ios_base::binary); + CHECK( data_file.is_open() ) << fname << " failed to open"; VLOG(5) << "seeking"; data_file.seekg( offset * row_size_bytes ); VLOG(5) << "reading"; @@ -193,6 +340,8 @@ size_t readTuplesUnordered( std::string fn, GlobalAddress * buf_addr ) { VLOG(4) << "local first row: " << *local_start; }); + delete data_path_ptr.pointer(); + *buf_addr = tuples; return ntuples; } @@ -210,28 +359,38 @@ Relation readTuplesUnordered( std::string fn ) { return r; } +// convenient version for Relation type +template +Relation readSplits( std::string base ) { + VLOG(5) << "called readSplits"; + GlobalAddress tuples; + + T sample; + CHECK( reinterpret_cast(&sample.f0) == reinterpret_cast(&sample) ) << "IO assumes f0 is the first field, but it is not for T"; + + auto ntuples = readSplits( base, &tuples ); + Relation r = { tuples, ntuples }; + return r; +} + // assumes that for object T, the address of T is the address of its fields template void writeTuplesUnordered(std::vector * vec, std::string fn ) { std::string data_path = FLAGS_relations+"/"+fn; - // we will broadcast the file name as bytes - CHECK( data_path.size() <= 2040 ); - char data_path_char[2048]; - sprintf(data_path_char, "%s", data_path.c_str()); - - std::ofstream for_trunc(data_path_char, std::ios_base::out | std::ios_base::trunc | std::ios_base::binary); + std::ofstream for_trunc(data_path, std::ios_base::out | std::ios_base::trunc | std::ios_base::binary); //no writes for_trunc.close(); - + + // we will broadcast the file name as bytes + CharArray data_path_arr(data_path); + // sequentiall open for append and write for (int i=0; ibegin(); it < vec->end(); it++) { From 082293d16226811773ab86f0827bb6981484bf16 Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Tue, 9 Jun 2015 16:23:45 -0700 Subject: [PATCH 21/50] make splits use hdfs style name --- applications/join/CMakeLists.txt | 2 +- applications/join/Relation_io_tests.cpp | 2 +- applications/join/relation_io.hpp | 5 ++++- .../join/{test-part-00000 => splits_test/part-00000} | 0 .../join/{test-part-00001 => splits_test/part-00001} | 0 5 files changed, 6 insertions(+), 3 deletions(-) rename applications/join/{test-part-00000 => splits_test/part-00000} (100%) rename applications/join/{test-part-00001 => splits_test/part-00001} (100%) diff --git a/applications/join/CMakeLists.txt b/applications/join/CMakeLists.txt index 203b35245..4f4c9a9bc 100644 --- a/applications/join/CMakeLists.txt +++ b/applications/join/CMakeLists.txt @@ -159,5 +159,5 @@ endmacro() # add test for IO add_check(Relation_io_tests.cpp 2 1 pass) -file(COPY test-part-00000 test-part-00001 DESTINATION .) +file(COPY splits_test DESTINATION .) diff --git a/applications/join/Relation_io_tests.cpp b/applications/join/Relation_io_tests.cpp index d0962a5c1..487f2611c 100644 --- a/applications/join/Relation_io_tests.cpp +++ b/applications/join/Relation_io_tests.cpp @@ -296,7 +296,7 @@ BOOST_AUTO_TEST_CASE( test1 ) { } - results = readSplits( "test-part" ); + results = readSplits( "splits_test" ); BOOST_CHECK_EQUAL(results.numtuples, 6); forall( results.data, results.numtuples, [=](MaterializedTupleRef_V1_0_1_2& t) { std::cout << t << std::endl; diff --git a/applications/join/relation_io.hpp b/applications/join/relation_io.hpp index ed475a74f..f6d5befb6 100644 --- a/applications/join/relation_io.hpp +++ b/applications/join/relation_io.hpp @@ -133,7 +133,7 @@ std::string get_split_name(std::string base, int part) { assert(part <= 99999); std::stringstream ss; - ss << base << "-" << std::setw(digits) << std::setfill('0') << part; + ss << base << "/part-" << std::setw(digits) << std::setfill('0') << part; return ss.str(); } @@ -246,6 +246,9 @@ size_t readSplits( std::string basename, GlobalAddress * buf_addr ) { std::string line; while (std::getline(inp, line)) { + // done if not a json object + if (line.length() < 4) break; + // get first attribute, which is a json string std::stringstream inss(line); Json::Value root; diff --git a/applications/join/test-part-00000 b/applications/join/splits_test/part-00000 similarity index 100% rename from applications/join/test-part-00000 rename to applications/join/splits_test/part-00000 diff --git a/applications/join/test-part-00001 b/applications/join/splits_test/part-00001 similarity index 100% rename from applications/join/test-part-00001 rename to applications/join/splits_test/part-00001 From 6cee305cbcdf4f5d8309931f0471c633fd8c6c68 Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Tue, 9 Jun 2015 16:24:26 -0700 Subject: [PATCH 22/50] truncate strings to MAX_STR_LEN :( --- applications/join/relation_io.hpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/applications/join/relation_io.hpp b/applications/join/relation_io.hpp index f6d5befb6..969d0b67e 100644 --- a/applications/join/relation_io.hpp +++ b/applications/join/relation_io.hpp @@ -259,12 +259,14 @@ size_t readSplits( std::string basename, GlobalAddress * buf_addr ) { // json to csv to use fromIStream std::stringstream ascii_s; for ( Json::ValueIterator itr = root.begin(); itr != root.end(); itr++ ) { - ascii_s << *itr << ","; + char truncated[MAX_STR_LEN-1]; + strncpy(truncated, itr->asString().c_str(), MAX_STR_LEN-2); + ascii_s << truncated << ","; } VLOG(5) << ascii_s.str(); - auto val = T::fromIStream(ascii_s); + auto val = T::fromIStream(ascii_s, ','); //Grappa::delegate::write(tuples+offset+suboffset, val); Grappa::delegate::write(tuples+offset+suboffset, val); ++suboffset; From 5a6f0687ed75f562fb7cd63c2aaeca522d7164db Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Tue, 9 Jun 2015 16:26:21 -0700 Subject: [PATCH 23/50] actually use OOP --- applications/join/relation_io.hpp | 265 ++++++++++++++++-------------- 1 file changed, 140 insertions(+), 125 deletions(-) diff --git a/applications/join/relation_io.hpp b/applications/join/relation_io.hpp index ed475a74f..df9a69f18 100644 --- a/applications/join/relation_io.hpp +++ b/applications/join/relation_io.hpp @@ -206,23 +206,74 @@ struct CharArray { }; template -size_t readSplits( std::string basename, GlobalAddress * buf_addr ) { - - uint64_t part = 0; - auto part_addr = make_global(&part); - bool done = false; - auto done_addr = make_global(&done); - uint64_t offset_counter = 0; - auto offset_counter_addr = make_global( &offset_counter ); +class RelationFileReader { + +public: + Relation read( std::string base ) { + GlobalAddress tuples; - auto ntuples = get_total_lines(basename); - CHECK(ntuples >= 0); - auto tuples = Grappa::global_alloc(ntuples); + T sample; + CHECK( reinterpret_cast(&sample.f0) == reinterpret_cast(&sample) ) << "IO assumes f0 is the first field, but it is not for T"; - // choose to new here simply to save stack space - auto basename_ptr = make_global(new CharArray(basename)); + auto ntuples = read( fn, &tuples ); + Relation r = { tuples, ntuples }; + return r; + } +protected: + virtual size_t read( std::string basename, GlobalAddress* buf_addr ) = 0; +}; + +template +class RowParser { +public: + virtual T parseRow(const std::string& line) = 0; +}; - on_all_cores( [part_addr,done_addr,offset_counter_addr,tuples,basename_ptr] { +template +class JSONRowParser : public RowParser { + T parseRow(const std::string& line) { + // get first attribute, which is a json string + std::stringstream inss(line); + Json::Value root; + inss >> root; // ignore other json objects + + VLOG(5) << root; + + // json to csv to use fromIStream + std::stringstream ascii_s; + for ( Json::ValueIterator itr = root.begin(); itr != root.end(); itr++ ) { + ascii_s << *itr << ","; + } + + VLOG(5) << ascii_s.str(); + + return T::fromIStream(ascii_s); + } +}; + +template +class SplitsRelationFileReader : public RelationFileReader { +private: + Parser parser; + +protected: + template + size_t read( std::string basename, GlobalAddress * buf_addr ) { + uint64_t part = 0; + auto part_addr = make_global(&part); + bool done = false; + auto done_addr = make_global(&done); + uint64_t offset_counter = 0; + auto offset_counter_addr = make_global( &offset_counter ); + + auto ntuples = get_total_lines(basename); + CHECK(ntuples >= 0); + auto tuples = Grappa::global_alloc(ntuples); + + // choose to new here simply to save stack space + auto basename_ptr = make_global(new CharArray(basename)); + + on_all_cores( [part_addr,done_addr,offset_counter_addr,tuples,basename_ptr] { auto fname_arr = delegate::read(basename_ptr); auto basename_copy = std::string(fname_arr.arr); @@ -246,132 +297,96 @@ size_t readSplits( std::string basename, GlobalAddress * buf_addr ) { std::string line; while (std::getline(inp, line)) { - // get first attribute, which is a json string - std::stringstream inss(line); - Json::Value root; - inss >> root; // ignore other json objects - - VLOG(5) << root; - - // json to csv to use fromIStream - std::stringstream ascii_s; - for ( Json::ValueIterator itr = root.begin(); itr != root.end(); itr++ ) { - ascii_s << *itr << ","; - } - - VLOG(5) << ascii_s.str(); - - auto val = T::fromIStream(ascii_s); + auto val = parser.parseRow(line); //Grappa::delegate::write(tuples+offset+suboffset, val); Grappa::delegate::write(tuples+offset+suboffset, val); - ++suboffset; + ++suboffset; } } } - }); + }); // Grappa::impl::local_gce.wait(); - delete basename_ptr.pointer(); - - *buf_addr = tuples; - return ntuples; -} - -// assumes that for object T, the address of T is the address of its fields -template -size_t readTuplesUnordered( std::string fn, GlobalAddress * buf_addr ) { - /* - std::string metadata_path = FLAGS_relations+"/"+fn+"."+metadata; //TODO replace such metadatafiles with a real catalog - std::ifstream metadata_file(metadata_path, std::ifstream::in); - CHECK( metadata_file.is_open() ) << metadata_path << " failed to open"; - int64_t numcols; - metadata_file >> numcols; - */ - - // binary; TODO: factor out to allow other formats like fixed-line length ascii + delete basename_ptr.pointer(); -// we get just the size of the fields (since T is a padded data type) - size_t row_size_bytes = T::fieldsSize(); - VLOG(2) << "row_size_bytes=" << row_size_bytes; - std::string data_path = FLAGS_relations+"/"+fn; - size_t file_size = fs::file_size( data_path ); - size_t ntuples = file_size / row_size_bytes; - CHECK( ntuples * row_size_bytes == file_size ) << "File " << data_path << " is ill-formatted; perhaps not all rows have same columns? file size = " << file_size << " row_size_bytes = " << row_size_bytes; - VLOG(1) << fn << " has " << ntuples << " rows"; - - auto tuples = Grappa::global_alloc(ntuples); - - size_t offset_counter = 0; - auto offset_counter_addr = make_global( &offset_counter, Grappa::mycore() ); + *buf_addr = tuples; + return ntuples; + } +}; - auto data_path_ptr = make_global(new CharArray(data_path)); - on_all_cores( [=] { - auto data_path_arr = delegate::read(data_path_ptr); - auto fname = std::string(data_path_arr.arr); - // find my array split - auto local_start = tuples.localize(); - auto local_end = (tuples+ntuples).localize(); - size_t local_count = local_end - local_start; +// assumes that for object T, the address of T is the address of its fields +template +class BinaryRelationFileReader : public RelationFileReader { +protected: + size_t read( std::string fn, GlobalAddress * buf_addr ) { + /* + std::string metadata_path = FLAGS_relations+"/"+fn+"."+metadata; //TODO replace such metadatafiles with a real catalog + std::ifstream metadata_file(metadata_path, std::ifstream::in); + CHECK( metadata_file.is_open() ) << metadata_path << " failed to open"; + int64_t numcols; + metadata_file >> numcols; + */ + + // binary; TODO: factor out to allow other formats like fixed-line length ascii - // reserve a file split - int64_t offset = Grappa::delegate::fetch_and_add( offset_counter_addr, local_count ); - VLOG(2) << "file offset " << offset; - - std::ifstream data_file(fname, std::ios_base::in | std::ios_base::binary); - CHECK( data_file.is_open() ) << fname << " failed to open"; - VLOG(5) << "seeking"; - data_file.seekg( offset * row_size_bytes ); - VLOG(5) << "reading"; - data_file.read( (char*) local_start, local_count * row_size_bytes ); - data_file.close(); - - // expand packed data into T's if necessary - if (row_size_bytes < sizeof(T)) { - VLOG(5) << "inflating"; - char * byte_ptr = reinterpret_cast(local_start); - // go backwards so we never overwrite - for( int64_t i = local_count - 1; i >= 0; --i ) { - char * data = byte_ptr + i * row_size_bytes; - memcpy(&local_start[i], data, row_size_bytes); +// we get just the size of the fields (since T is a padded data type) + size_t row_size_bytes = T::fieldsSize(); + VLOG(2) << "row_size_bytes=" << row_size_bytes; + std::string data_path = FLAGS_relations+"/"+fn; + size_t file_size = fs::file_size( data_path ); + size_t ntuples = file_size / row_size_bytes; + CHECK( ntuples * row_size_bytes == file_size ) << "File " << data_path << " is ill-formatted; perhaps not all rows have same columns? file size = " << file_size << " row_size_bytes = " << row_size_bytes; + VLOG(1) << fn << " has " << ntuples << " rows"; + + auto tuples = Grappa::global_alloc(ntuples); + + size_t offset_counter = 0; + auto offset_counter_addr = make_global( &offset_counter, Grappa::mycore() ); + + auto data_path_ptr = make_global(new CharArray(data_path)); + + on_all_cores( [=] { + auto data_path_arr = delegate::read(data_path_ptr); + auto fname = std::string(data_path_arr.arr); + + // find my array split + auto local_start = tuples.localize(); + auto local_end = (tuples+ntuples).localize(); + size_t local_count = local_end - local_start; + + // reserve a file split + int64_t offset = Grappa::delegate::fetch_and_add( offset_counter_addr, local_count ); + VLOG(2) << "file offset " << offset; + + std::ifstream data_file(fname, std::ios_base::in | std::ios_base::binary); + CHECK( data_file.is_open() ) << fname << " failed to open"; + VLOG(5) << "seeking"; + data_file.seekg( offset * row_size_bytes ); + VLOG(5) << "reading"; + data_file.read( (char*) local_start, local_count * row_size_bytes ); + data_file.close(); + + // expand packed data into T's if necessary + if (row_size_bytes < sizeof(T)) { + VLOG(5) << "inflating"; + char * byte_ptr = reinterpret_cast(local_start); + // go backwards so we never overwrite + for( int64_t i = local_count - 1; i >= 0; --i ) { + char * data = byte_ptr + i * row_size_bytes; + memcpy(&local_start[i], data, row_size_bytes); + } } - } - VLOG(4) << "local first row: " << *local_start; - }); + VLOG(4) << "local first row: " << *local_start; + }); - delete data_path_ptr.pointer(); + delete data_path_ptr.pointer(); - *buf_addr = tuples; - return ntuples; -} - -// convenient version for Relation type -template -Relation readTuplesUnordered( std::string fn ) { - GlobalAddress tuples; - - T sample; - CHECK( reinterpret_cast(&sample.f0) == reinterpret_cast(&sample) ) << "IO assumes f0 is the first field, but it is not for T"; - - auto ntuples = readTuplesUnordered( fn, &tuples ); - Relation r = { tuples, ntuples }; - return r; -} - -// convenient version for Relation type -template -Relation readSplits( std::string base ) { - VLOG(5) << "called readSplits"; - GlobalAddress tuples; - - T sample; - CHECK( reinterpret_cast(&sample.f0) == reinterpret_cast(&sample) ) << "IO assumes f0 is the first field, but it is not for T"; - - auto ntuples = readSplits( base, &tuples ); - Relation r = { tuples, ntuples }; - return r; -} + *buf_addr = tuples; + return ntuples; + } +}; // assumes that for object T, the address of T is the address of its fields template From 8ab0a16921c779993e90740babe2a6b226711446 Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Tue, 9 Jun 2015 17:13:49 -0700 Subject: [PATCH 24/50] make relation io test work with refactoring --- applications/join/Relation_io_tests.cpp | 19 +++++++------- applications/join/relation_io.hpp | 34 ++++++++++++------------- 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/applications/join/Relation_io_tests.cpp b/applications/join/Relation_io_tests.cpp index 487f2611c..8af011461 100644 --- a/applications/join/Relation_io_tests.cpp +++ b/applications/join/Relation_io_tests.cpp @@ -107,7 +107,7 @@ class MaterializedTupleRef_V1_0_1_2 { //} // use the tuple schema to interpret the input stream - static MaterializedTupleRef_V1_0_1_2 fromIStream(std::istream& ss) { + static MaterializedTupleRef_V1_0_1_2 fromIStream(std::istream& ss, char d=' ') { MaterializedTupleRef_V1_0_1_2 _ret; @@ -115,14 +115,14 @@ class MaterializedTupleRef_V1_0_1_2 { ss >> _ret.f0; // throw away comma std::string _temp; - std::getline(ss, _temp, ','); + std::getline(ss, _temp, d); } { std::string _temp; - std::getline(ss, _temp, ','); + std::getline(ss, _temp, d); _ret.f1 = to_array(_temp); } @@ -130,7 +130,7 @@ class MaterializedTupleRef_V1_0_1_2 { { std::string _temp; - std::getline(ss, _temp, ','); + std::getline(ss, _temp, d); _ret.f2 = to_array(_temp); } @@ -225,8 +225,8 @@ BOOST_AUTO_TEST_CASE( test1 ) { writeTuplesUnordered( &more_data, write_file ); // try read - Relation results = - readTuplesUnordered( write_file ); + BinaryRelationFileReader reader1; + Relation results = reader1.read( write_file ); BOOST_CHECK_EQUAL( 2, results.numtuples ); auto r0 = Grappa::delegate::read(results.data); @@ -268,8 +268,8 @@ BOOST_AUTO_TEST_CASE( test1 ) { writeTuplesUnordered( &more_data, write_file ); // verify write - results = - readTuplesUnordered( write_file ); + BinaryRelationFileReader reader2; + results = reader2.read( write_file ); BOOST_CHECK_EQUAL( 4, results.numtuples ); @@ -296,7 +296,8 @@ BOOST_AUTO_TEST_CASE( test1 ) { } - results = readSplits( "splits_test" ); + SplitsRelationFileReader, MaterializedTupleRef_V1_0_1_2> reader3; + results = reader3.read( "splits_test" ); BOOST_CHECK_EQUAL(results.numtuples, 6); forall( results.data, results.numtuples, [=](MaterializedTupleRef_V1_0_1_2& t) { std::cout << t << std::endl; diff --git a/applications/join/relation_io.hpp b/applications/join/relation_io.hpp index 5a8b8f9ec..92daf4981 100644 --- a/applications/join/relation_io.hpp +++ b/applications/join/relation_io.hpp @@ -19,6 +19,8 @@ namespace fs = boost::filesystem; #include "Tuple.hpp" #include "relation.hpp" +#include "strings.h" + #include "grappa/graph.hpp" DECLARE_string(relations); @@ -128,7 +130,7 @@ tuple_graph readEdges( std::string fn, int64_t numTuples ) { } -std::string get_split_name(std::string base, int part) { +std::string get_split_name(const std::string& base, int part) { const int digits = 5; assert(part <= 99999); @@ -137,7 +139,7 @@ std::string get_split_name(std::string base, int part) { return ss.str(); } -size_t get_lines( std::string fname ) { +size_t get_lines( const std::string& fname ) { #if 0 // POPEN and MPI don't mix well char cmd[256]; @@ -163,7 +165,7 @@ size_t get_lines( std::string fname ) { } -size_t get_total_lines( std::string basename ) { +size_t get_total_lines( const std::string& basename ) { #if 0 // POPEN and MPI don't mix well char cmd[128]; @@ -209,30 +211,31 @@ template class RelationFileReader { public: - Relation read( std::string base ) { + Relation read( const std::string& base ) { GlobalAddress tuples; T sample; CHECK( reinterpret_cast(&sample.f0) == reinterpret_cast(&sample) ) << "IO assumes f0 is the first field, but it is not for T"; - auto ntuples = read( fn, &tuples ); + auto ntuples = this->_read( base, &tuples ); Relation r = { tuples, ntuples }; return r; } protected: - virtual size_t read( std::string basename, GlobalAddress* buf_addr ) = 0; + virtual size_t _read( const std::string& basename, GlobalAddress* buf_addr ) = 0; }; template class RowParser { public: virtual T parseRow(const std::string& line) = 0; - virtual bool EOF(const std::string& line) = 0; + virtual bool eof(const std::string& line) = 0; }; template -class JSONRowParser : public RowParser { - bool EOF(const std::string& line) { +class JSONRowParser : public RowParser { + public: + bool eof(const std::string& line) { return line.length() < 4; } @@ -254,18 +257,14 @@ class JSONRowParser : public RowParser { VLOG(5) << ascii_s.str(); - return = T::fromIStream(ascii_s, ','); + return T::fromIStream(ascii_s, ','); } }; template class SplitsRelationFileReader : public RelationFileReader { -private: - Parser parser; - protected: - template - size_t read( std::string basename, GlobalAddress * buf_addr ) { + size_t _read( const std::string& basename, GlobalAddress * buf_addr ) { uint64_t part = 0; auto part_addr = make_global(&part); bool done = false; @@ -303,9 +302,10 @@ class SplitsRelationFileReader : public RelationFileReader { auto suboffset = 0; std::string line; + Parser parser; while (std::getline(inp, line)) { // check other EOF conditions, like empty line - if (parser.EOF(line)) break; + if (parser.eof(line)) break; auto val = parser.parseRow(line); @@ -330,7 +330,7 @@ class SplitsRelationFileReader : public RelationFileReader { template class BinaryRelationFileReader : public RelationFileReader { protected: - size_t read( std::string fn, GlobalAddress * buf_addr ) { + size_t _read( const std::string& fn, GlobalAddress * buf_addr ) { /* std::string metadata_path = FLAGS_relations+"/"+fn+"."+metadata; //TODO replace such metadatafiles with a real catalog std::ifstream metadata_file(metadata_path, std::ifstream::in); From 40f632f0eec8736bb3936953b9dcf22ed17967c0 Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Tue, 16 Jun 2015 16:37:06 -0700 Subject: [PATCH 25/50] add a schema to json parser --- applications/join/Relation_io_tests.cpp | 5 +++-- applications/join/relation_io.hpp | 14 ++++++++++++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/applications/join/Relation_io_tests.cpp b/applications/join/Relation_io_tests.cpp index 8af011461..e9204550b 100644 --- a/applications/join/Relation_io_tests.cpp +++ b/applications/join/Relation_io_tests.cpp @@ -37,6 +37,8 @@ using namespace Grappa; BOOST_AUTO_TEST_SUITE( Relation_io_tests ); +std::vector schema = {"a","b","c"}; + class MaterializedTupleRef_V1_0_1_2 { // Invariant: data stored in _fields is always in the representation // specified by _scheme. @@ -295,8 +297,7 @@ BOOST_AUTO_TEST_CASE( test1 ) { BOOST_CHECK( one.f2 == r3.f2 ); } - - SplitsRelationFileReader, MaterializedTupleRef_V1_0_1_2> reader3; + SplitsRelationFileReader, MaterializedTupleRef_V1_0_1_2> reader3; results = reader3.read( "splits_test" ); BOOST_CHECK_EQUAL(results.numtuples, 6); forall( results.data, results.numtuples, [=](MaterializedTupleRef_V1_0_1_2& t) { diff --git a/applications/join/relation_io.hpp b/applications/join/relation_io.hpp index 92daf4981..f77052de9 100644 --- a/applications/join/relation_io.hpp +++ b/applications/join/relation_io.hpp @@ -232,9 +232,9 @@ class RowParser { virtual bool eof(const std::string& line) = 0; }; -template +template * Schema> class JSONRowParser : public RowParser { - public: +private: bool eof(const std::string& line) { return line.length() < 4; } @@ -249,11 +249,21 @@ class JSONRowParser : public RowParser { // json to csv to use fromIStream std::stringstream ascii_s; + + // this way is broken because it doesn't regain order + /* for ( Json::ValueIterator itr = root.begin(); itr != root.end(); itr++ ) { char truncated[MAX_STR_LEN-1]; strncpy(truncated, itr->asString().c_str(), MAX_STR_LEN-2); ascii_s << truncated << ","; } + */ + + for (auto name : *Schema) { + char truncated[MAX_STR_LEN-1]; + strncpy(truncated, root[name].asString().c_str(), MAX_STR_LEN-2); + ascii_s << truncated << ","; + } VLOG(5) << ascii_s.str(); From 34329250b4ac918975d0a3e352e8286f2d2ecabc Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Tue, 16 Jun 2015 16:39:17 -0700 Subject: [PATCH 26/50] this should be public like its parent class --- applications/join/relation_io.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/applications/join/relation_io.hpp b/applications/join/relation_io.hpp index f77052de9..7eb1bcd1b 100644 --- a/applications/join/relation_io.hpp +++ b/applications/join/relation_io.hpp @@ -234,7 +234,7 @@ class RowParser { template * Schema> class JSONRowParser : public RowParser { -private: +public: bool eof(const std::string& line) { return line.length() < 4; } From a57027165c94b76cb2096627bed0a88b81dcba02 Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Wed, 24 Jun 2015 01:45:17 -0700 Subject: [PATCH 27/50] add io tests for string+double tuple and fix memcpy overlap bug --- applications/join/Relation_io_tests.cpp | 239 ++++++++++++++++++++++-- applications/join/relation_io.hpp | 2 +- 2 files changed, 223 insertions(+), 18 deletions(-) diff --git a/applications/join/Relation_io_tests.cpp b/applications/join/Relation_io_tests.cpp index e9204550b..f6d21065b 100644 --- a/applications/join/Relation_io_tests.cpp +++ b/applications/join/Relation_io_tests.cpp @@ -38,6 +38,184 @@ using namespace Grappa; BOOST_AUTO_TEST_SUITE( Relation_io_tests ); std::vector schema = {"a","b","c"}; + +class MaterializedTupleRef_V3_0_1 { + // Invariant: data stored in _fields is always in the representation + // specified by _scheme. + + public: + + std::array f0; + + double f1; + + + static constexpr int numFields() { + return 2; + } + + static size_t fieldsSize() { + const MaterializedTupleRef_V3_0_1 _t; + return + + ((char*)&_t.f1) + sizeof(_t.f1) - ((char*)&_t); + } + + MaterializedTupleRef_V3_0_1 () { + // no-op + } + + //template + //MaterializedTupleRef_V3_0_1 (const OT& other) { + // std::memcpy(this, &other, sizeof(MaterializedTupleRef_V3_0_1)); + //} + MaterializedTupleRef_V3_0_1 ( + const std::array& a0 + , + + + const double& a1 + + + ) { + + f0 = a0; + + f1 = a1; + + + } + + + + + + MaterializedTupleRef_V3_0_1(const std::tuple< + + std::array + , + + double + + + >& o) { + + f0 = std::get<0>(o); + + f1 = std::get<1>(o); + + + } + + std::tuple< + + std::array + , + + double + + + > to_tuple() { + + std::tuple< + + std::array + , + + double + + + > r; + + std::get<0>(r) = f0; + + std::get<1>(r) = f1; + + + return r; + } + + // shamelessly terrible disambiguation: one solution is named factory methods + //MaterializedTupleRef_V3_0_1 (std::vector vals, bool ignore1, bool ignore2) { + // + // f0 = vals[0]; + // + // f1 = vals[1]; + // + // f2 = vals[2]; + // + //} + + // use the tuple schema to interpret the input stream + static MaterializedTupleRef_V3_0_1 fromIStream(std::istream& ss, char delim=' ') { + MaterializedTupleRef_V3_0_1 _ret; + + + + { + std::string _temp; + std::getline(ss, _temp, delim); + _ret.f0 = to_array(_temp); + } + + + + { + // use operator>> to parse into proper numeric type + ss >> _ret.f1; + //throw away the next delimiter + std::string _temp; + std::getline(ss, _temp, delim); + } + + + + return _ret; + } + + void toOStream(std::ostream& os) const { + + + VLOG(1) << "writing " << this->fieldsSize(); + os.write((char*)this, this->fieldsSize()); + + + } + + //template + //MaterializedTupleRef_V3_0_1 (const Tuple& v0, const T& from) { + // constexpr size_t v0_size = std::tuple_size::value; + // constexpr int from_size = T::numFields(); + // static_assert(MaterializedTupleRef_V3_0_1::numFields() == (v0_size + from_size), "constructor only works on same number of total fields"); + // TupleUtils::assign<0, decltype(_scheme)>(_fields, v0); + // std::memcpy(((char*)&_fields)+v0_size*sizeof(int64_t), &(from._fields), from_size*sizeof(int64_t)); + //} + + //template + //MaterializedTupleRef_V3_0_1 (const Tuple& v0) { + // static_assert(MaterializedTupleRef_V3_0_1::numFields() == (std::tuple_size::value), "constructor only works on same number of total fields"); + // TupleUtils::assign<0, decltype(_scheme)>(_fields, v0); + //} + + inline std::ostream& dump(std::ostream& o) const { + o << "Materialized("; + + + o << "" << f0 << ","; + + o << "" << f1 << ","; + + o << ")"; + return o; + } + + friend inline std::ostream& operator<< (std::ostream& o, const MaterializedTupleRef_V3_0_1& t) { + return t.dump(o); + } + + + } GRAPPA_BLOCK_ALIGNED; + class MaterializedTupleRef_V1_0_1_2 { // Invariant: data stored in _fields is always in the representation @@ -157,15 +335,6 @@ class MaterializedTupleRef_V1_0_1_2 { } - void toOStreamAscii(std::ostream& os) const { - os - - << f0 << " " - - << f1 << " " - - << f2 << std::endl; - } //template //MaterializedTupleRef_V1_0_1_2 (const Tuple& v0, const T& from) { @@ -182,28 +351,32 @@ class MaterializedTupleRef_V1_0_1_2 { // TupleUtils::assign<0, decltype(_scheme)>(_fields, v0); //} - std::ostream& dump(std::ostream& o) const { + inline std::ostream& dump(std::ostream& o) const { o << "Materialized("; o << f0 << ","; - o << f1 << ","; + o << std::string(f1.data()) << ","; - o << f2 << ","; + o << std::string(f2.data()) << ","; o << ")"; return o; } + + friend inline std::ostream& operator<< (std::ostream& o, const MaterializedTupleRef_V1_0_1_2& t) { + return t.dump(o); + } } GRAPPA_BLOCK_ALIGNED; - std::ostream& operator<< (std::ostream& o, const MaterializedTupleRef_V1_0_1_2& t) { - return t.dump(o); - } + + std::vector more_data; +std::vector dt_results; BOOST_AUTO_TEST_CASE( test1 ) { Grappa::init( GRAPPA_TEST_ARGS ); @@ -303,11 +476,43 @@ BOOST_AUTO_TEST_CASE( test1 ) { forall( results.data, results.numtuples, [=](MaterializedTupleRef_V1_0_1_2& t) { std::cout << t << std::endl; }); - - }); + // test with doubles + MaterializedTupleRef_V3_0_1 dt1(to_array(std::string("compris indic")),-0.35686); + MaterializedTupleRef_V3_0_1 dt2(to_array(std::string("distanc computerimpl")),1.73678); + MaterializedTupleRef_V3_0_1 dt3(to_array(std::string("interv determin")),-0.0503026); + MaterializedTupleRef_V3_0_1 dt4(to_array(std::string("select search")),0.74823); + MaterializedTupleRef_V3_0_1 dt5(to_array(std::string("super gain")),1.997); + MaterializedTupleRef_V3_0_1 dt6(to_array(std::string("cat pancake")),-0.1154); + MaterializedTupleRef_V3_0_1 dt7(to_array(std::string("specialized device")),0.00131); + dt_results.push_back(dt1); + dt_results.push_back(dt2); + dt_results.push_back(dt3); + dt_results.push_back(dt4); + dt_results.push_back(dt5); + dt_results.push_back(dt6); + dt_results.push_back(dt7); + std::string write_file2("write2.bin"); + writeTuplesUnordered( &dt_results, write_file2 ); + + BinaryRelationFileReader reader_d; + Relation results_d = reader_d.read( write_file2 ); + + BOOST_CHECK_EQUAL( 7, results_d.numtuples ); + + forall( results_d.data, results_d.numtuples, [=](MaterializedTupleRef_V3_0_1& t) { + std::cout << t << std::endl; + BOOST_CHECK( (t.f0 == dt1.f0 && t.f1 == dt1.f1) + || (t.f0 == dt2.f0 && t.f1 == dt2.f1) + || (t.f0 == dt3.f0 && t.f1 == dt3.f1) + || (t.f0 == dt4.f0 && t.f1 == dt4.f1) + || (t.f0 == dt5.f0 && t.f1 == dt5.f1) + || (t.f0 == dt6.f0 && t.f1 == dt6.f1) + || (t.f0 == dt7.f0 && t.f1 == dt7.f1) ); + }); +}); // end grappa::run() diff --git a/applications/join/relation_io.hpp b/applications/join/relation_io.hpp index 7eb1bcd1b..89834376e 100644 --- a/applications/join/relation_io.hpp +++ b/applications/join/relation_io.hpp @@ -395,7 +395,7 @@ class BinaryRelationFileReader : public RelationFileReader { // go backwards so we never overwrite for( int64_t i = local_count - 1; i >= 0; --i ) { char * data = byte_ptr + i * row_size_bytes; - memcpy(&local_start[i], data, row_size_bytes); + memmove(&local_start[i], data, row_size_bytes); } } From 69e07f80c6ba5f390b172b8c329f65f3cbb438be Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Thu, 25 Jun 2015 18:18:22 -0700 Subject: [PATCH 28/50] block distribution --- applications/join/CMakeLists.txt | 2 + applications/join/Relation_io_tests.cpp | 27 ++++++++ applications/join/block_distribution.cpp | 34 +++++++++ applications/join/block_distribution.hpp | 30 ++++++++ applications/join/relation_io.hpp | 88 ++++++++++++++++++++---- 5 files changed, 166 insertions(+), 15 deletions(-) create mode 100644 applications/join/block_distribution.cpp create mode 100644 applications/join/block_distribution.hpp diff --git a/applications/join/CMakeLists.txt b/applications/join/CMakeLists.txt index 4f4c9a9bc..0d78985e9 100644 --- a/applications/join/CMakeLists.txt +++ b/applications/join/CMakeLists.txt @@ -34,6 +34,8 @@ set(QUERYLIB_SOURCES set(QUERYIO_SOURCES relation_io.hpp relation_io.cpp + block_distribution.hpp + block_distribution.cpp Tuple.hpp Tuple.cpp relation.hpp diff --git a/applications/join/Relation_io_tests.cpp b/applications/join/Relation_io_tests.cpp index f6d21065b..a58be9372 100644 --- a/applications/join/Relation_io_tests.cpp +++ b/applications/join/Relation_io_tests.cpp @@ -512,6 +512,33 @@ BOOST_AUTO_TEST_CASE( test1 ) { || (t.f0 == dt6.f0 && t.f1 == dt6.f1) || (t.f0 == dt7.f0 && t.f1 == dt7.f1) ); }); + + + // test symmetric array version + BinaryRelationFileReader, + SymmetricArrayRepresentation> reader_d2; + Relation> results_d2 = reader_d2.read( write_file2 ); + + BOOST_CHECK_EQUAL( 7, results_d2.numtuples ); + on_all_cores( [=] { + forall_here( 0, results_d2.data->vector.size(), [=](int64_t start, int64_t iters) { + for (int j=start; jvector[j]; + std::cout << t << std::endl; + BOOST_CHECK( (t.f0 == dt1.f0 && t.f1 == dt1.f1) + || (t.f0 == dt2.f0 && t.f1 == dt2.f1) + || (t.f0 == dt3.f0 && t.f1 == dt3.f1) + || (t.f0 == dt4.f0 && t.f1 == dt4.f1) + || (t.f0 == dt5.f0 && t.f1 == dt5.f1) + || (t.f0 == dt6.f0 && t.f1 == dt6.f1) + || (t.f0 == dt7.f0 && t.f1 == dt7.f1) ); + } + }); + }); + + + }); // end grappa::run() diff --git a/applications/join/block_distribution.cpp b/applications/join/block_distribution.cpp new file mode 100644 index 000000000..e98b81f3b --- /dev/null +++ b/applications/join/block_distribution.cpp @@ -0,0 +1,34 @@ +#include "block_distribution.hpp" + + NaturalNumberRange::NaturalNumberRange(uint64_t leftInclusive, uint64_t rightExclusive) +: leftInclusive(leftInclusive) + , rightExclusive(rightExclusive) { } + + + + BlockDistribution::BlockDistribution(uint64_t numBlocks, uint64_t numElements) { + this->blockSizeMin = numElements/numBlocks; + this->remainder = numElements%numBlocks; + } + + NaturalNumberRange BlockDistribution::getRangeForBlock(uint64_t blockId) { + // first remainder blocks get +1 elements + if (blockId < remainder) { + uint64_t size = blockSizeMin + 1; + uint64_t left = (blockSizeMin+1)*blockId; + return NaturalNumberRange(left, left+size); + } else { + // after remainder, blocks get +0 elements + uint64_t size = blockSizeMin; + uint64_t left = (blockSizeMin+1)*remainder+blockSizeMin*(blockId-remainder); + return NaturalNumberRange(left, left+size); + } + } + +uint64_t BlockDistribution::getBlockIdForIndex(uint64_t index) { + if (index/(blockSizeMin+1) < remainder) { + return index/(blockSizeMin+1); + } else { + return (index-remainder) / blockSizeMin; + } + } diff --git a/applications/join/block_distribution.hpp b/applications/join/block_distribution.hpp new file mode 100644 index 000000000..0d8a1cdb7 --- /dev/null +++ b/applications/join/block_distribution.hpp @@ -0,0 +1,30 @@ +#include + +class NaturalNumberRange { + public: + const uint64_t leftInclusive; + const uint64_t rightExclusive; + + NaturalNumberRange(uint64_t leftInclusive, uint64_t rightExclusive); +}; + +class BlockDistribution { + private: + uint64_t blockSizeMin; + uint64_t remainder; + +public: + + /** + * Create a block distribution of elements. + * Each block contains contiguous elements. + * + * @param numBlocks number of blocks to distribute across + * @param numElements number of elements + */ + BlockDistribution(uint64_t numBlocks, uint64_t numElements); + + NaturalNumberRange getRangeForBlock(uint64_t blockId); + + uint64_t getBlockIdForIndex(uint64_t index); +}; diff --git a/applications/join/relation_io.hpp b/applications/join/relation_io.hpp index 89834376e..50237ea28 100644 --- a/applications/join/relation_io.hpp +++ b/applications/join/relation_io.hpp @@ -18,6 +18,7 @@ namespace fs = boost::filesystem; #include #include "Tuple.hpp" #include "relation.hpp" +#include "block_distribution.hpp" #include "strings.h" @@ -207,22 +208,22 @@ struct CharArray { } }; -template +template class RelationFileReader { public: - Relation read( const std::string& base ) { - GlobalAddress tuples; + Relation read( const std::string& base ) { + GlobalAddress tuples; - T sample; - CHECK( reinterpret_cast(&sample.f0) == reinterpret_cast(&sample) ) << "IO assumes f0 is the first field, but it is not for T"; + //T sample; + //CHECK( reinterpret_cast(&sample.f0) == reinterpret_cast(&sample) ) << "IO assumes f0 is the first field, but it is not for T"; auto ntuples = this->_read( base, &tuples ); - Relation r = { tuples, ntuples }; + Relation r = { tuples, ntuples }; return r; } protected: - virtual size_t _read( const std::string& basename, GlobalAddress* buf_addr ) = 0; + virtual size_t _read( const std::string& basename, GlobalAddress* buf_addr ) = 0; }; template @@ -334,13 +335,65 @@ class SplitsRelationFileReader : public RelationFileReader { } }; +template +class ArrayRepresentation { + public: + virtual size_t row_size_bytes() const = 0; + virtual GlobalAddress allocate(size_t ntuples) = 0; + virtual std::tuple start_end(const GlobalAddress& tuples, size_t ntuples) const = 0; +}; +template +class GlobalArrayRepresentation : public ArrayRepresentation { + public: + size_t row_size_bytes() const { + return T::fieldsSize(); + } + + GlobalAddress allocate(size_t ntuples) { + return Grappa::global_alloc(ntuples); + } + + std::tuple start_end(const GlobalAddress& tuples, size_t ntuples) const { + return std::make_tuple(tuples.localize(), (tuples+ntuples).localize()); + } +}; -// assumes that for object T, the address of T is the address of its fields template -class BinaryRelationFileReader : public RelationFileReader { -protected: - size_t _read( const std::string& fn, GlobalAddress * buf_addr ) { +struct aligned_vector { + std::vector vector; +} GRAPPA_BLOCK_ALIGNED; + +template +class SymmetricArrayRepresentation : public ArrayRepresentation, T> { + public: + size_t row_size_bytes() const { + return T::fieldsSize(); + } + + GlobalAddress> allocate(size_t ntuples) { + return Grappa::symmetric_global_alloc>(); + } + + std::tuple start_end(const GlobalAddress>& tuples, size_t ntuples) const { + BlockDistribution dist(Grappa::cores(), ntuples); + auto range = dist.getRangeForBlock(Grappa::mycore()); + auto num_local_elements = range.rightExclusive - range.leftInclusive; + // this resize will make the returned start/end correct and different from previous calls + tuples->vector.resize(num_local_elements); + + auto local_start = &((tuples->vector)[0]); + VLOG(4) << "range " << range.leftInclusive << " " << range.rightExclusive << " start: " << local_start; + return std::make_tuple(local_start, local_start + num_local_elements); + } +}; + + +// assumes that for object T, the address of T is the address of its fields +template > +class BinaryRelationFileReader : public RelationFileReader { + protected: + size_t _read( const std::string& fn, GlobalAddress * buf_addr ) { /* std::string metadata_path = FLAGS_relations+"/"+fn+"."+metadata; //TODO replace such metadatafiles with a real catalog std::ifstream metadata_file(metadata_path, std::ifstream::in); @@ -349,10 +402,12 @@ class BinaryRelationFileReader : public RelationFileReader { metadata_file >> numcols; */ + ArrRep array_repr; + // binary; TODO: factor out to allow other formats like fixed-line length ascii // we get just the size of the fields (since T is a padded data type) - size_t row_size_bytes = T::fieldsSize(); + size_t row_size_bytes = array_repr.row_size_bytes(); VLOG(2) << "row_size_bytes=" << row_size_bytes; std::string data_path = FLAGS_relations+"/"+fn; size_t file_size = fs::file_size( data_path ); @@ -360,7 +415,7 @@ class BinaryRelationFileReader : public RelationFileReader { CHECK( ntuples * row_size_bytes == file_size ) << "File " << data_path << " is ill-formatted; perhaps not all rows have same columns? file size = " << file_size << " row_size_bytes = " << row_size_bytes; VLOG(1) << fn << " has " << ntuples << " rows"; - auto tuples = Grappa::global_alloc(ntuples); + GlobalAddress tuples = array_repr.allocate(ntuples); size_t offset_counter = 0; auto offset_counter_addr = make_global( &offset_counter, Grappa::mycore() ); @@ -372,9 +427,12 @@ class BinaryRelationFileReader : public RelationFileReader { auto fname = std::string(data_path_arr.arr); // find my array split - auto local_start = tuples.localize(); - auto local_end = (tuples+ntuples).localize(); + auto local_start_end = array_repr.start_end(tuples, ntuples); + auto local_start = std::get<0>(local_start_end); + auto local_end = std::get<1>(local_start_end); + size_t local_count = local_end - local_start; + VLOG(3) << "start/end: " << local_start << " " << local_end << " count " << local_count; // reserve a file split int64_t offset = Grappa::delegate::fetch_and_add( offset_counter_addr, local_count ); From 8feaf5c3bde2e940040aa15ac70eeddc2900ea8e Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Mon, 29 Jun 2015 10:30:56 -0700 Subject: [PATCH 29/50] enforce block size for global arrays of tuples --- applications/join/relation_io.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/applications/join/relation_io.hpp b/applications/join/relation_io.hpp index 50237ea28..9c26cbb50 100644 --- a/applications/join/relation_io.hpp +++ b/applications/join/relation_io.hpp @@ -285,6 +285,7 @@ class SplitsRelationFileReader : public RelationFileReader { auto ntuples = get_total_lines(basename); CHECK(ntuples >= 0); + CHECK( sizeof(T) <= BLOCK_SIZE ); auto tuples = Grappa::global_alloc(ntuples); // choose to new here simply to save stack space @@ -351,6 +352,7 @@ class GlobalArrayRepresentation : public ArrayRepresentation { } GlobalAddress allocate(size_t ntuples) { + CHECK( sizeof(T) <= BLOCK_SIZE ); return Grappa::global_alloc(ntuples); } @@ -584,6 +586,7 @@ GlobalAddress readTuples( std::string fn, int64_t numTuples ) { // shared by the local tasks reading the file int64_t fin = 0; + CHECK( sizeof(T) <= BLOCK_SIZE ); auto tuples = Grappa::global_alloc(numTuples); // token delimiter From 004f6324fbb67c2f11d5789dd0e1392272c7bbb2 Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Wed, 1 Jul 2015 13:08:30 -0700 Subject: [PATCH 30/50] strings.h to_array truncates now, so don't do it in file read io --- applications/join/relation_io.hpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/applications/join/relation_io.hpp b/applications/join/relation_io.hpp index 9c26cbb50..3858d5b9c 100644 --- a/applications/join/relation_io.hpp +++ b/applications/join/relation_io.hpp @@ -260,10 +260,9 @@ class JSONRowParser : public RowParser { } */ + // Create csv row using Schema for (auto name : *Schema) { - char truncated[MAX_STR_LEN-1]; - strncpy(truncated, root[name].asString().c_str(), MAX_STR_LEN-2); - ascii_s << truncated << ","; + ascii_s << root[name].asString() << ","; } VLOG(5) << ascii_s.str(); From f0fa9e2f7a500eafe436c694db7a21d7a8668b96 Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Fri, 31 Jul 2015 08:06:49 -0700 Subject: [PATCH 31/50] collection of parallel iterators --- applications/join/Iterators.hpp | 42 +++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 applications/join/Iterators.hpp diff --git a/applications/join/Iterators.hpp b/applications/join/Iterators.hpp new file mode 100644 index 000000000..a6aad7cbc --- /dev/null +++ b/applications/join/Iterators.hpp @@ -0,0 +1,42 @@ +#include +#include "relation_io.hpp" // for aligned_vector only + + +// Iterate over a symmetric global vector, that is each partition has +// a local vector pointed to by the address symmetric_array +template +void forall(GlobalAddress> symmetric_array, F loop_body) { + auto origin = mycore(); + GCE->enroll(cores()); + on_all_cores([=] { + auto num_elements = symmetric_array->vector.size(); + forall_here( 0, num_elements, [=](int64_t start, int64_t iters) { + for (int64_t j=start; jvector[j]; + loop_body(el); + } + }); + GCE->send_completion(origin); + GCE->wait(); + }); +} + + + + +template +void forall_enum(GlobalAddress> symmetric_array, F loop_body) { + auto origin = mycore(); + GCE->enroll(cores()); + on_all_cores([=] { + auto num_elements = symmetric_array->vector.size(); + forall_here( 0, num_elements, [=](int64_t start, int64_t iters) { + for (int64_t j=start; jvector[j]; + loop_body(j, el); + } + }); + GCE->send_completion(origin); + GCE->wait(); + }); +} From 8efbf42fa7383f62d0e4970302e2f2f618d4ce65 Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Mon, 3 Aug 2015 11:33:00 -0700 Subject: [PATCH 32/50] boost dir --- configure | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/configure b/configure index a1984ed14..12d869a7d 100755 --- a/configure +++ b/configure @@ -14,7 +14,9 @@ case `hostname` when /n[\d+]/ opt.boost = "/sampa/share/gcc-4.7.2/src/boost_1_51_0" when /pal/ - opt.boost = "~nels707/boost153-install" + opt.boost = "~nels707/boost153-installx" +when /bigdatann.ib/ + opt.boost = "~nels707/boost153-installx" end OptionParser.new{|p| From db5d14e28a9634fb9e11179715352deb806bc07d Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Tue, 11 Aug 2015 11:27:02 -0700 Subject: [PATCH 33/50] add iterator based join and all the others --- applications/join/DHT_symmetric.hpp | 6 + applications/join/DoubleDHT.hpp | 79 +++++++- applications/join/Operators.hpp | 277 ++++++++++++++++++++++++++++ 3 files changed, 360 insertions(+), 2 deletions(-) create mode 100644 applications/join/Operators.hpp diff --git a/applications/join/DHT_symmetric.hpp b/applications/join/DHT_symmetric.hpp index 1c956a5f4..d9bc5122f 100644 --- a/applications/join/DHT_symmetric.hpp +++ b/applications/join/DHT_symmetric.hpp @@ -64,6 +64,7 @@ class DHT_symmetric { Grappa::delegate::call(index, [key, val, target]() { // inserts initial value only if the key is not yet present std::pair entry(key, Init()); + auto res = target->local_map->insert(entry); auto resIt = res.first; //auto resNew = res.second; // perform the update in place @@ -99,6 +100,11 @@ class DHT_symmetric { // TODO GCE->wait(); // block until all tasks are done } + std::unordered_map * get_local_map() { + return local_map; + } + + } GRAPPA_BLOCK_ALIGNED; diff --git a/applications/join/DoubleDHT.hpp b/applications/join/DoubleDHT.hpp index e161e7564..979c6b037 100644 --- a/applications/join/DoubleDHT.hpp +++ b/applications/join/DoubleDHT.hpp @@ -6,9 +6,11 @@ #include #include #include +#include #include #include +#include //GRAPPA_DECLARE_METRIC(MaxMetric, max_cell_length); GRAPPA_DECLARE_METRIC(SimpleMetric, hash_tables_size); @@ -18,7 +20,7 @@ GRAPPA_DECLARE_METRIC(SummarizingMetric, hash_tables_lookup_steps); #define DDHT_TYPE(type) typename DoubleDHT::type #define _DDHT_TYPE(type) DoubleDHT::type -enum class Direction { LEFT, RIGHT }; +//enum class Direction { LEFT, RIGHT }; // Hash table for joins // * allows multiple copies of a Key @@ -240,6 +242,27 @@ class DoubleDHT { insert_lookup_iter_left( key, val, f ); } + template + void insert_left(K key, VL val) { + auto index = computeIndex( key ); + GlobalAddress< PairCell > target = base + index; + + Grappa::delegate::call( target.core(), [key, val, target]() { + insert_local_left( key, target.pointer(), val ); + }); + } + +template + void insert_right(K key, VR val) { + auto index = computeIndex( key ); + GlobalAddress< PairCell > target = base + index; + + Grappa::delegate::call( target.core(), [key, val, target]() { + insert_local_right( key, target.pointer(), val ); + }); + } + + template< typename CF, Grappa::GlobalCompletionEvent * GCE = &Grappa::impl::local_gce, bool Unique=false > void insert_lookup_iter_right ( K key, VR val, CF f ) { auto index = computeIndex( key ); @@ -298,7 +321,59 @@ class DoubleDHT { } */ - +/* + class LocalMatchesIterator { + private: + PairCell * const start; + PairCell * const end; + PairCell * p; + decltype(p->entriesLeft->begin()) left_iter; + decltype(p->entriesRight->begin()) right_iter; + decltype(Entry().vs->begin()) left_vs_iter; + decltype(Entry().vs->begin()) right_vs_iter; + public: + LocalMatchesIterator(PairCell * start, PairCell * end) : start(base.localize()), end((base+capacity).localize()), p(start) { } + + bool next(std::pair& r) { + if (right_vs_iter != + std::list> * entriesLeft; + std::list> * entriesRight; + + + LocalMatchesIterator() +*/ + + Grappa::FullEmpty>> * matches() { + // Use Grappa's coroutines for generator pattern + + auto c = new Grappa::FullEmpty>>(); + Grappa::spawn([c,this] { + PairCell * p = base.localize(); + PairCell * end = (base+capacity).localize(); + + VLOG(3) << "has " << (end-p) << " paircells"; + while ( p != end ) { + if (p->entriesLeft != NULL && p->entriesRight != NULL) { + for (auto& l : *(p->entriesLeft)) { + for (auto& r : *(p->entriesRight)) { + if (l.key == r.key) { + for (auto& le : *(l.vs)) { + for (auto& re : *(r.vs)) { + // yield + c->writeEF(std::make_pair(true, std::make_pair(le, re))); + } + } + } + } + } + } + ++p; + } + // end + c->writeEF(std::make_pair(false, std::pair())); + }); + return c; + } }; diff --git a/applications/join/Operators.hpp b/applications/join/Operators.hpp new file mode 100644 index 000000000..ccc897400 --- /dev/null +++ b/applications/join/Operators.hpp @@ -0,0 +1,277 @@ +#pragma once +#include +#include + +#include "radish_utils.h" +#include "stats.h" +#include "strings.h" +#include "relation.hpp" +#include "DHT_symmetric.hpp" +#include "DoubleDHT.hpp" +#include + + +template +class Operator { + public: + virtual bool next(P& outTuple) = 0; + virtual void close() = 0; +}; + +template +class BasePipelined : public Operator

{ + protected: + Operator * input; // operator produces what we consume + public: + BasePipelined(Operator* input) : input(input) { } + + void close() { + this->input->close(); + } +}; + +template +class Apply : public BasePipelined{ + public: + using BasePipelined::BasePipelined; + bool next(P& t_000) { + C t_004; + if (this->input->next(t_004)) { + apply(t_000, t_004); + return true; + } else { + return false; + } + } + + protected: + // subclass is generated and implements + // this method + virtual void apply(P& p, C& c) = 0; +}; + +template +class Store : public BasePipelined { + private: + std::vector * _res; + public: + Store(Operator* input, std::vector * res) + : BasePipelined(input) + , _res(res) { } + bool next(int& ignore) { + C t_000; + if (this->input->next(t_000)) { + _res->push_back(t_000); + VLOG(2) << t_000; + return true; + } else { + return false; + } + } +}; + +template +class AggregateSink : public BasePipelined { + private: + typedef hash_tuple::hash Hash; + + protected: + GlobalAddress> group_hash; + + public: + AggregateSink(Operator* input, GlobalAddress< + DHT_symmetric> group_hash_000) + : BasePipelined(input) + , group_hash(group_hash_000) { } + + bool next(int& ignore) { + C t_002; + if (this->input->next(t_002)) { + VLOG(4) << "update with tuple " << t_002; + group_hash->template update(mktuple(t_002), t_002); + return true; + } else { + return false; + } + } + + protected: + // subclass is generated and implements + // this method + virtual K mktuple(C& val) = 0; +}; + +template +class AggregateSource : public Operator

{ + private: + typedef C V; + typedef hash_tuple::hash Hash; + + GlobalAddress> group_hash; + + protected: + typedef decltype(group_hash->get_local_map()->begin()) iter_type; + iter_type iter; + + // subclass is generated and implements + // this method + virtual void mktuple(P& out) = 0; + public: + AggregateSource( + GlobalAddress< + DHT_symmetric> group_hash_000) { + group_hash = group_hash_000; + iter = group_hash->get_local_map()->begin(); + VLOG(3) << "local size: " << group_hash->get_local_map()->size(); + } + + bool next(P& t_010) {//P=MaterializedTupleRef_V8_10 + if (iter != group_hash->get_local_map()->end()) { + VLOG(3) << "got a tuple"; + this->mktuple(t_010); + ++iter; + return true; + } else { + return false; + } + } + + void close() { + } +}; + + +template +class Scan : public Operator

{ + private: + uint64_t index; + uint64_t size; + Relation> rel; + public: + Scan(Relation> rel) { + this->index = 0; + this->rel = rel; + this->size = rel.data->vector.size(); + } + bool next(P& t_003) { + VLOG(3) << "index(" << index << ") vector[index++]; + VLOG(3) << t_003; + return true; + } else { + return false; + } + } + + void close() { + } +}; + + +template +class Select : public BasePipelined { + public: + using BasePipelined::BasePipelined; + bool next(P& t_100) { + C t_003; + while (this->input->next(t_003)) { + if (predicate(t_003)) { + t_100 = t_003; + return true; + } + } + return false; + } + + protected: + // subclass is generated and implements + // this method + virtual bool predicate(C& t) = 0; + +}; + +template +class HashJoinSinkLeft : public BasePipelined { + public: + typedef DoubleDHT dht_t; + dht_t * double_hash; + + HashJoinSinkLeft(dht_t * hash_000, Operator * left) + : BasePipelined(left) + , double_hash(hash_000) { } + + bool next(int& ignore) { + CL t_000; + if (this->input->next(t_000)) { + VLOG(3) << t_000; + double_hash->insert_left(mktuple(t_000), t_000); + return true; + } else { + return false; + } + } + + protected: + virtual K mktuple(CL& t) = 0; +}; + +template +class HashJoinSinkRight : public BasePipelined { + public: + typedef DoubleDHT dht_t; + dht_t * double_hash; + + HashJoinSinkRight(dht_t * hash_000, Operator* right) + : BasePipelined(right) + , double_hash(hash_000) { } + + bool next(int& ignore) { + CR t_000; + if (this->input->next(t_000)) { + VLOG(3) << t_000; + double_hash->insert_right(mktuple(t_000), t_000); + return true; + } else { + return false; + } + } + + protected: + virtual K mktuple(CR& t) = 0; +}; + +template +class HashJoinSource : public Operator

{ + public: + typedef DoubleDHT dht_t; + private: + dht_t * double_hash; + Grappa::FullEmpty>> * match_iter; + + public: + + HashJoinSource(dht_t * hash_000) + : double_hash(hash_000) + , match_iter(NULL) { } + + bool next(P& t) { + // init + if (match_iter == NULL) { match_iter = double_hash->matches(); } + + auto entry = this->match_iter->readFE(); + if (entry.first) { + join_coarse_result_count++; + t = this->mktuple(entry.second.first, entry.second.second); + return true; + } else { + return false; + } + } + + void close() { + } + + protected: + virtual P mktuple(CL& tl, CR& tr) = 0; +}; From 507ce93b50005354f45e7b18f6b7693693436b28 Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Thu, 13 Aug 2015 20:05:19 -0700 Subject: [PATCH 34/50] fix aggregate operator: shouldn't expose iterator to the mktuple implementation --- applications/join/Operators.hpp | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/applications/join/Operators.hpp b/applications/join/Operators.hpp index ccc897400..b9c296f96 100644 --- a/applications/join/Operators.hpp +++ b/applications/join/Operators.hpp @@ -108,14 +108,15 @@ class AggregateSource : public Operator

{ typedef hash_tuple::hash Hash; GlobalAddress> group_hash; - - protected: - typedef decltype(group_hash->get_local_map()->begin()) iter_type; + +typedef decltype(group_hash->get_local_map()->begin()) iter_type; iter_type iter; + protected: +typedef typename std::iterator_traits::value_type map_output_t; // subclass is generated and implements // this method - virtual void mktuple(P& out) = 0; + virtual void mktuple(P& out, map_output_t& inp) = 0; public: AggregateSource( GlobalAddress< @@ -128,7 +129,8 @@ class AggregateSource : public Operator

{ bool next(P& t_010) {//P=MaterializedTupleRef_V8_10 if (iter != group_hash->get_local_map()->end()) { VLOG(3) << "got a tuple"; - this->mktuple(t_010); + auto V6 = *(this->iter); + this->mktuple(t_010, V6); ++iter; return true; } else { @@ -275,3 +277,18 @@ class HashJoinSource : public Operator

{ protected: virtual P mktuple(CL& tl, CR& tr) = 0; }; + +using namespace Grappa; +void iterate(Operator ** fragment, GlobalCompletionEvent * gce) { + auto origin = mycore(); + gce->enroll(cores()); + + on_all_cores([=] { + int dummy; + auto fp = *fragment; + while (fp->next(dummy)); + fp->close(); + gce->send_completion(origin); + gce->wait(); + }); +} From da44ca9a91cb74fb3d3ad43251e7a57bf39076c2 Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Mon, 17 Aug 2015 17:45:34 -0700 Subject: [PATCH 35/50] add version of DHT that takes update function as a member --- applications/join/CMakeLists.txt | 1 + applications/join/DHT_symmetric_generic.hpp | 106 ++++++++++++++++++++ applications/join/Operators.hpp | 76 ++++++++++++-- 3 files changed, 173 insertions(+), 10 deletions(-) create mode 100644 applications/join/DHT_symmetric_generic.hpp diff --git a/applications/join/CMakeLists.txt b/applications/join/CMakeLists.txt index 0d78985e9..1fb828c68 100644 --- a/applications/join/CMakeLists.txt +++ b/applications/join/CMakeLists.txt @@ -25,6 +25,7 @@ set(QUERYLIB_SOURCES MapReduce.hpp Aggregates.hpp DHT_symmetric.hpp + DHT_symmetric_generic.hpp ) #FIXME: these MapReduce Hash joins belong in the above sources #HashJoin.hpp diff --git a/applications/join/DHT_symmetric_generic.hpp b/applications/join/DHT_symmetric_generic.hpp new file mode 100644 index 000000000..d7327fe37 --- /dev/null +++ b/applications/join/DHT_symmetric_generic.hpp @@ -0,0 +1,106 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + + +//GRAPPA_DECLARE_METRIC(MaxMetric, max_cell_length); +GRAPPA_DECLARE_METRIC(SimpleMetric, hash_tables_size); +GRAPPA_DECLARE_METRIC(SummarizingMetric, hash_tables_lookup_steps); + + +// for naming the types scoped in DHT_symmetric_generic +#define DHT_symmetric_generic_TYPE(type) typename DHT_symmetric_generic::type +#define DHT_symmetric_generic_T DHT_symmetric_generic + +// Hash table for joins +// * allows multiple copies of a Key +// * lookups return all Key matches +template +class DHT_symmetric_generic { + public: + typedef V (*update_f)(const V& oldval, const UV& incVal); + typedef V (*init_f)(void); + + private: + // private members + GlobalAddress< DHT_symmetric_generic_T > self; + std::unordered_map * local_map; + size_t partitions; + + update_f UpF; + init_f Init; + + size_t computeIndex( K key ) { + return Hash()(key) % partitions; + } + + // for creating local DHT_symmetric_generic + DHT_symmetric_generic( GlobalAddress self, update_f upf, init_f initf ) + : self(self) + , UpF(upf) + , Init(initf) + , partitions(Grappa::cores()) + , local_map(new std::unordered_map()) + {} + + public: + // for static construction + DHT_symmetric_generic( ) {} + + static GlobalAddress create_DHT_symmetric( update_f upf, init_f initf ) { + auto object = Grappa::symmetric_global_alloc(); + + Grappa::on_all_cores( [object, upf, initf] { + new(object.pointer()) DHT_symmetric_generic_T(object, upf, initf); + }); + + return object; + } + + template< GlobalCompletionEvent * GCE, SyncMode S = SyncMode::Async > + void update( K key, UV val ) { + auto index = computeIndex( key ); + auto target = this->self; + + Grappa::delegate::call(index, [key, val, target]() { + // inserts initial value only if the key is not yet present + std::pair entry(key, target->Init()); + + auto res = target->local_map->insert(entry); auto resIt = res.first; //auto resNew = res.second; + + // perform the update in place + resIt->second = target->UpF(resIt->second, val); + }); + } + + template < GlobalCompletionEvent * GCE, typename CF > + void forall_entries( CF f ) { + auto target = this->self; + Grappa::on_all_cores([target, f] { + // TODO: cannot use forall_here because unordered_map->begin() is a forward iterator (std::advance is O(n)) + // TODO: for now the serial loop is only performant if the continuation code is also in CPS + // TODO: best solution is a forall_here where loop decomposition is just linear continuation instead of divide and conquer + auto m = target->local_map; + for (auto it = m->begin(); it != m->end(); it++) { + // continuation takes a mapping + f(*it); + } + }); + // TODO GCE->wait(); // block until all tasks are done + } + + std::unordered_map * get_local_map() { + return local_map; + } + + + +} GRAPPA_BLOCK_ALIGNED; + diff --git a/applications/join/Operators.hpp b/applications/join/Operators.hpp index b9c296f96..f08c0fd37 100644 --- a/applications/join/Operators.hpp +++ b/applications/join/Operators.hpp @@ -7,6 +7,7 @@ #include "strings.h" #include "relation.hpp" #include "DHT_symmetric.hpp" +#include "DHT_symmetric_generic.hpp" #include "DoubleDHT.hpp" #include @@ -70,17 +71,73 @@ class Store : public BasePipelined { } }; -template -class AggregateSink : public BasePipelined { +template +class ZeroKeyAggregateSink : public BasePipelined { + public: + typedef V (*update_f)(const V&, const C&); + typedef V (*init_f)(void); private: - typedef hash_tuple::hash Hash; + GlobalAddress _val; + update_f _update; + public: + + ZeroKeyAggregateSink(GlobalAddress v, update_f update, init_f init) + : _val(v) + , _update(update) + +{ + *(_val.localize()) = init(); + } + + bool next(int& ignore) { + C t_002; + if (this->input->next(t_002)) { + auto _val_local = _val.localize(); + *_val_local = this->_update(*_val_local, t_002); + return true; + } else { + return false; + } + } +}; + +// making combine still template because reduce needs it +template +class ZeroKeyAggregateSource : public Operator

{ + private: + GlobalAddress _val; + bool _done; + + public: + ZeroKeyAggregateSource(GlobalAddress v) : _val(v), _done(Grappa::mycore() != 0) { } + + bool next(P& t) { + if (!_done) { + V temp = reduce(_val); + mktuple(t, temp); + _done = true; + return true; + } else { + return false; + } + } + protected: - GlobalAddress> group_hash; + void mktuple(P& dest, V& src) = 0; + +}; + +template +class AggregateSink : public BasePipelined { + private: + typedef hash_tuple::hash Hash; + GlobalAddress> group_hash; + public: AggregateSink(Operator* input, GlobalAddress< - DHT_symmetric> group_hash_000) + DHT_symmetric_generic> group_hash_000) : BasePipelined(input) , group_hash(group_hash_000) { } @@ -88,7 +145,7 @@ class AggregateSink : public BasePipelined { C t_002; if (this->input->next(t_002)) { VLOG(4) << "update with tuple " << t_002; - group_hash->template update(mktuple(t_002), t_002); + group_hash->template update(mktuple(t_002), t_002); return true; } else { return false; @@ -101,13 +158,12 @@ class AggregateSink : public BasePipelined { virtual K mktuple(C& val) = 0; }; -template +template class AggregateSource : public Operator

{ private: - typedef C V; typedef hash_tuple::hash Hash; - GlobalAddress> group_hash; + GlobalAddress> group_hash; typedef decltype(group_hash->get_local_map()->begin()) iter_type; iter_type iter; @@ -120,7 +176,7 @@ typedef typename std::iterator_traits::value_type map_output_t; public: AggregateSource( GlobalAddress< - DHT_symmetric> group_hash_000) { + DHT_symmetric_generic> group_hash_000) { group_hash = group_hash_000; iter = group_hash->get_local_map()->begin(); VLOG(3) << "local size: " << group_hash->get_local_map()->size(); From 5f342512647e3d26a8d933b1be1ac6551f4c45bb Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Wed, 19 Aug 2015 11:56:55 -0700 Subject: [PATCH 36/50] fix bug: need to specify appropriate GCE --- applications/join/DoubleDHT.hpp | 4 ++-- applications/join/Operators.hpp | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/applications/join/DoubleDHT.hpp b/applications/join/DoubleDHT.hpp index 979c6b037..12e4e0e1c 100644 --- a/applications/join/DoubleDHT.hpp +++ b/applications/join/DoubleDHT.hpp @@ -242,7 +242,7 @@ class DoubleDHT { insert_lookup_iter_left( key, val, f ); } - template + template void insert_left(K key, VL val) { auto index = computeIndex( key ); GlobalAddress< PairCell > target = base + index; @@ -252,7 +252,7 @@ class DoubleDHT { }); } -template +template void insert_right(K key, VR val) { auto index = computeIndex( key ); GlobalAddress< PairCell > target = base + index; diff --git a/applications/join/Operators.hpp b/applications/join/Operators.hpp index f08c0fd37..3d6181869 100644 --- a/applications/join/Operators.hpp +++ b/applications/join/Operators.hpp @@ -249,7 +249,7 @@ class Select : public BasePipelined { }; -template +template class HashJoinSinkLeft : public BasePipelined { public: typedef DoubleDHT dht_t; @@ -263,7 +263,7 @@ class HashJoinSinkLeft : public BasePipelined { CL t_000; if (this->input->next(t_000)) { VLOG(3) << t_000; - double_hash->insert_left(mktuple(t_000), t_000); + double_hash->template insert_left(mktuple(t_000), t_000); return true; } else { return false; @@ -274,7 +274,7 @@ class HashJoinSinkLeft : public BasePipelined { virtual K mktuple(CL& t) = 0; }; -template +template class HashJoinSinkRight : public BasePipelined { public: typedef DoubleDHT dht_t; @@ -288,7 +288,7 @@ class HashJoinSinkRight : public BasePipelined { CR t_000; if (this->input->next(t_000)) { VLOG(3) << t_000; - double_hash->insert_right(mktuple(t_000), t_000); + double_hash->template insert_right(mktuple(t_000), t_000); return true; } else { return false; From d2fb1642d634351c91b8e81b9f00bbb5a1dc5854 Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Wed, 19 Aug 2015 11:57:24 -0700 Subject: [PATCH 37/50] add operators.hpp to sources --- applications/join/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/applications/join/CMakeLists.txt b/applications/join/CMakeLists.txt index 1fb828c68..5f3e37efe 100644 --- a/applications/join/CMakeLists.txt +++ b/applications/join/CMakeLists.txt @@ -26,6 +26,7 @@ set(QUERYLIB_SOURCES Aggregates.hpp DHT_symmetric.hpp DHT_symmetric_generic.hpp + Operators.hpp ) #FIXME: these MapReduce Hash joins belong in the above sources #HashJoin.hpp From 5c70dfe7bb786bb031c36edbdded107c8d75afc5 Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Thu, 27 Aug 2015 14:36:31 -0700 Subject: [PATCH 38/50] fix bug with zero key aggie --- applications/join/Operators.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/applications/join/Operators.hpp b/applications/join/Operators.hpp index 3d6181869..0c309eff3 100644 --- a/applications/join/Operators.hpp +++ b/applications/join/Operators.hpp @@ -82,8 +82,9 @@ class ZeroKeyAggregateSink : public BasePipelined { public: - ZeroKeyAggregateSink(GlobalAddress v, update_f update, init_f init) - : _val(v) + ZeroKeyAggregateSink(Operator * input, GlobalAddress v, update_f update, init_f init) + : BasePipelined(input) + , _val(v) , _update(update) { From 11baafe62211407ed93d81ba20f684d6663a463b Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Fri, 28 Aug 2015 11:55:41 -0700 Subject: [PATCH 39/50] fixes for zero key aggie --- applications/join/Operators.hpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/applications/join/Operators.hpp b/applications/join/Operators.hpp index 0c309eff3..603a76ac7 100644 --- a/applications/join/Operators.hpp +++ b/applications/join/Operators.hpp @@ -123,9 +123,12 @@ class ZeroKeyAggregateSource : public Operator

{ return false; } } + + void close() { + } protected: - void mktuple(P& dest, V& src) = 0; + virtual void mktuple(P& dest, V& src) = 0; }; From c4d5e6e9327a3e4442c0ccefb9e291bb89f97075 Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Tue, 1 Sep 2015 13:08:38 -0700 Subject: [PATCH 40/50] add broadcast cross product --- applications/join/Operators.hpp | 48 +++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/applications/join/Operators.hpp b/applications/join/Operators.hpp index 603a76ac7..b15bfcd59 100644 --- a/applications/join/Operators.hpp +++ b/applications/join/Operators.hpp @@ -71,6 +71,54 @@ class Store : public BasePipelined { } }; +template +class BroadcastTupleSink : public BasePipelined { +private: + C * _global_value; +public: + BroadcastTupleSink(Operator * input, C * value) + : BasePipelined(input), + , _global_value(value) { } + + bool next(int& ignore) { + C c; + if (this->input->next(c)) { + auto address = _global_value; + on_all_cores([=] { + *address = c; + }); + return true; + } else { + return false; + } + } +}; + +template +class BroadcastTupleStream : public BasePipelined { +private: + CR * _global_value; +public: + BroadcastTupleStream(Operator * input, CR * value) + : BasePipelined(input), + , _global_value(value) { } + + bool next(P& p) { + CL cl; + if (this->input->next(cl)) { + p = mktuple(cl, *_global_value); + return true; + } else { + return false; + } + +protected: + virtual void mktuple(P& p, CL& cl, CR& cr) = 0; +}; + + + + template class ZeroKeyAggregateSink : public BasePipelined { public: From 29c0b5bb8b511e59b0049a60ea61835379744278 Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Tue, 1 Sep 2015 13:41:27 -0700 Subject: [PATCH 41/50] type-o! --- applications/join/Operators.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/applications/join/Operators.hpp b/applications/join/Operators.hpp index b15bfcd59..dc115f891 100644 --- a/applications/join/Operators.hpp +++ b/applications/join/Operators.hpp @@ -77,7 +77,7 @@ class BroadcastTupleSink : public BasePipelined { C * _global_value; public: BroadcastTupleSink(Operator * input, C * value) - : BasePipelined(input), + : BasePipelined(input) , _global_value(value) { } bool next(int& ignore) { @@ -100,7 +100,7 @@ class BroadcastTupleStream : public BasePipelined { CR * _global_value; public: BroadcastTupleStream(Operator * input, CR * value) - : BasePipelined(input), + : BasePipelined(input) , _global_value(value) { } bool next(P& p) { @@ -111,6 +111,7 @@ class BroadcastTupleStream : public BasePipelined { } else { return false; } + } protected: virtual void mktuple(P& p, CL& cl, CR& cr) = 0; From 281aab97c444d51cfbcdd38e62858f9f21f2e0be Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Tue, 1 Sep 2015 13:43:03 -0700 Subject: [PATCH 42/50] change signatuire of mktuple in broadcast --- applications/join/Operators.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/applications/join/Operators.hpp b/applications/join/Operators.hpp index dc115f891..77db5a4b5 100644 --- a/applications/join/Operators.hpp +++ b/applications/join/Operators.hpp @@ -384,7 +384,7 @@ class HashJoinSource : public Operator

{ } protected: - virtual P mktuple(CL& tl, CR& tr) = 0; + virtual void mktuple(P& p, CL& tl, CR& tr) = 0; }; using namespace Grappa; From 4ab2c9b1b1032c4540ea884097d572fd8d28dd26 Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Tue, 1 Sep 2015 13:46:20 -0700 Subject: [PATCH 43/50] fix the actual fix --- applications/join/Operators.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/applications/join/Operators.hpp b/applications/join/Operators.hpp index 77db5a4b5..aa06e3a11 100644 --- a/applications/join/Operators.hpp +++ b/applications/join/Operators.hpp @@ -106,7 +106,7 @@ class BroadcastTupleStream : public BasePipelined { bool next(P& p) { CL cl; if (this->input->next(cl)) { - p = mktuple(cl, *_global_value); + mktuple(p, cl, *_global_value); return true; } else { return false; @@ -384,7 +384,7 @@ class HashJoinSource : public Operator

{ } protected: - virtual void mktuple(P& p, CL& tl, CR& tr) = 0; + virtual P mktuple(CL& tl, CR& tr) = 0; }; using namespace Grappa; From 3c60cc79b3026a0bb824336d1116160cad29907e Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Tue, 8 Sep 2015 15:35:35 -0700 Subject: [PATCH 44/50] forgot to count hash_tables_size --- applications/join/MatchesDHT.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/applications/join/MatchesDHT.hpp b/applications/join/MatchesDHT.hpp index af6d6cfc5..32c4bebdc 100644 --- a/applications/join/MatchesDHT.hpp +++ b/applications/join/MatchesDHT.hpp @@ -314,6 +314,7 @@ class MatchesDHT { Entry newe( key ); newe.vs->push_back( val ); entries->push_back( newe ); + hash_tables_size+=1; return; }); From ed0cb12753b2f12062e15a8b9bba4012193e74d3 Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Tue, 8 Sep 2015 16:14:46 -0700 Subject: [PATCH 45/50] adding an important metric for perf comparison --- applications/join/CMakeLists.txt | 1 + applications/join/DHT_symmetric.cpp | 4 +++ applications/join/DHT_symmetric.hpp | 7 ++--- applications/join/DHT_symmetric_generic.hpp | 6 ++-- applications/join/MatchesDHT.hpp | 35 ++++++++++++++++++++- 5 files changed, 44 insertions(+), 9 deletions(-) create mode 100644 applications/join/DHT_symmetric.cpp diff --git a/applications/join/CMakeLists.txt b/applications/join/CMakeLists.txt index 5f3e37efe..ec824d932 100644 --- a/applications/join/CMakeLists.txt +++ b/applications/join/CMakeLists.txt @@ -25,6 +25,7 @@ set(QUERYLIB_SOURCES MapReduce.hpp Aggregates.hpp DHT_symmetric.hpp + DHT_symmetric.cpp DHT_symmetric_generic.hpp Operators.hpp ) diff --git a/applications/join/DHT_symmetric.cpp b/applications/join/DHT_symmetric.cpp new file mode 100644 index 000000000..eda34eaaf --- /dev/null +++ b/applications/join/DHT_symmetric.cpp @@ -0,0 +1,4 @@ +//#include "DHT_symmetric.hpp" +#include "Metrics.hpp" + +GRAPPA_DEFINE_METRIC(SimpleMetric, dht_inserts, 0); diff --git a/applications/join/DHT_symmetric.hpp b/applications/join/DHT_symmetric.hpp index d9bc5122f..dfc0421b0 100644 --- a/applications/join/DHT_symmetric.hpp +++ b/applications/join/DHT_symmetric.hpp @@ -10,10 +10,7 @@ #include -//GRAPPA_DECLARE_METRIC(MaxMetric, max_cell_length); -GRAPPA_DECLARE_METRIC(SimpleMetric, hash_tables_size); -GRAPPA_DECLARE_METRIC(SummarizingMetric, hash_tables_lookup_steps); - +GRAPPA_DECLARE_METRIC(SimpleMetric, dht_inserts); // for naming the types scoped in DHT_symmetric #define DHT_symmetric_TYPE(type) typename DHT_symmetric::type @@ -69,6 +66,7 @@ class DHT_symmetric { // perform the update in place resIt->second = UpF(resIt->second, val); + dht_inserts++; }); } @@ -81,6 +79,7 @@ class DHT_symmetric { // inserts initial value only if the key is not yet present std::pair entry(key, val); target->local_map->insert(entry); + dht_inserts++; }); } diff --git a/applications/join/DHT_symmetric_generic.hpp b/applications/join/DHT_symmetric_generic.hpp index d7327fe37..cb28fb4fb 100644 --- a/applications/join/DHT_symmetric_generic.hpp +++ b/applications/join/DHT_symmetric_generic.hpp @@ -10,10 +10,7 @@ #include -//GRAPPA_DECLARE_METRIC(MaxMetric, max_cell_length); -GRAPPA_DECLARE_METRIC(SimpleMetric, hash_tables_size); -GRAPPA_DECLARE_METRIC(SummarizingMetric, hash_tables_lookup_steps); - +GRAPPA_DECLARE_METRIC(SimpleMetric, dht_inserts); // for naming the types scoped in DHT_symmetric_generic #define DHT_symmetric_generic_TYPE(type) typename DHT_symmetric_generic::type @@ -77,6 +74,7 @@ class DHT_symmetric_generic { // perform the update in place resIt->second = target->UpF(resIt->second, val); + dht_inserts++; }); } diff --git a/applications/join/MatchesDHT.hpp b/applications/join/MatchesDHT.hpp index 32c4bebdc..9fbf2fc61 100644 --- a/applications/join/MatchesDHT.hpp +++ b/applications/join/MatchesDHT.hpp @@ -23,6 +23,33 @@ GRAPPA_DECLARE_METRIC(SimpleMetric, hash_local_inserts); GRAPPA_DECLARE_METRIC(SimpleMetric, hash_called_lookups); GRAPPA_DECLARE_METRIC(SimpleMetric, hash_called_inserts); +///////////////////////////////////////////// +namespace aux{ +template struct seq{}; + +template +struct gen_seq : gen_seq{}; + +template +struct gen_seq<0, Is...> : seq{}; + +template +void print_tuple(std::basic_ostream& os, Tuple const& t, seq){ + using swallow = int[]; + (void)swallow{0, (void(os << (Is == 0? "" : ", ") << std::get(t)), 0)...}; +} +} // aux:: + +template +auto operator<<(std::basic_ostream& os, std::tuple const& t) + -> std::basic_ostream& +{ + os << "("; + aux::print_tuple(os, t, aux::gen_seq()); + return os << ")"; +} +///////////////////////////////////////////////// + // for naming the types scoped in MatchesDHT #define MDHT_TYPE(type) typename MatchesDHT::type @@ -59,6 +86,7 @@ class MatchesDHT { size_t capacity; size_t computeIndex( K key ) { + VLOG(2) << "hash table(" << base << ") -- Hash(" << key << "<<<"<<*(reinterpret_cast(&std::get<0>(key)))<<">>>=" << Hash()(key); return Hash()(key) & (capacity - 1); } @@ -188,6 +216,7 @@ class MatchesDHT { void lookup_iter ( K key, CF f ) { auto index = computeIndex( key ); GlobalAddress< Cell > target = base + index; + VLOG(2) << "hash(" << key << ")=" << index << ", " << target; // FIXME: remove 'this' capture when using gcc4.8, this is just a bug in 4.7 //TODO optimization where only need to do remotePrivateTask instead of call_async @@ -200,7 +229,9 @@ class MatchesDHT { Grappa::spawnRemote( target.core(), [key, target, f, this]() { hash_called_lookups++; Entry e; - if (lookup_local( key, target.pointer(), &e)) { + auto stuff = lookup_local( key, target.pointer(), &e); + VLOG(2) << "found " << key << "? " << stuff << " data key=" << e.key; + if (stuff) { auto resultsptr = e.vs; Grappa::forall_here(0, e.vs->size(), [f,resultsptr](int64_t start, int64_t iters) { for (int64_t i=start; i target = base + index; + VLOG(2) << "hash(" << key << ")=" << index << ", " << target; if (target.core() == Grappa::mycore()) { hash_local_inserts++; @@ -303,6 +335,7 @@ class MatchesDHT { Entry e = *i; if ( e.key == key ) { // key found so add to matches + VLOG(2) << "really inserting " << key; e.vs->push_back( val ); hash_tables_size+=1; return; From a3dfc5040aee8a5c47231abfbcebf1aebc73c57d Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Wed, 9 Sep 2015 11:02:02 -0700 Subject: [PATCH 46/50] track misses in cells --- applications/join/CMakeLists.txt | 1 + applications/join/DoubleDHT.cpp | 5 +++++ applications/join/DoubleDHT.hpp | 13 +++++++++++-- 3 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 applications/join/DoubleDHT.cpp diff --git a/applications/join/CMakeLists.txt b/applications/join/CMakeLists.txt index ec824d932..96bd35040 100644 --- a/applications/join/CMakeLists.txt +++ b/applications/join/CMakeLists.txt @@ -7,6 +7,7 @@ set(QUERYLIB_SOURCES MatchesDHT.hpp MatchesDHT.cpp DoubleDHT.hpp + DoubleDHT.cpp Hypercube.hpp Hypercube.cpp local_graph.cpp diff --git a/applications/join/DoubleDHT.cpp b/applications/join/DoubleDHT.cpp new file mode 100644 index 000000000..0a690c4d8 --- /dev/null +++ b/applications/join/DoubleDHT.cpp @@ -0,0 +1,5 @@ +#include + +GRAPPA_DEFINE_METRIC(SimpleMetric, hash_matches_iterator_cell_single_misses, 0); +GRAPPA_DEFINE_METRIC(SimpleMetric, hash_matches_iterator_cell_hits, 0); +GRAPPA_DEFINE_METRIC(SimpleMetric, hash_matches_iterator_cell_both_misses, 0); diff --git a/applications/join/DoubleDHT.hpp b/applications/join/DoubleDHT.hpp index 12e4e0e1c..3ede91a99 100644 --- a/applications/join/DoubleDHT.hpp +++ b/applications/join/DoubleDHT.hpp @@ -16,6 +16,10 @@ GRAPPA_DECLARE_METRIC(SimpleMetric, hash_tables_size); GRAPPA_DECLARE_METRIC(SummarizingMetric, hash_tables_lookup_steps); +GRAPPA_DECLARE_METRIC(SimpleMetric, hash_matches_iterator_cell_single_misses); +GRAPPA_DECLARE_METRIC(SimpleMetric, hash_matches_iterator_cell_both_misses); +GRAPPA_DECLARE_METRIC(SimpleMetric, hash_matches_iterator_cell_hits); + // for naming the types scoped in DoubleDHT #define DDHT_TYPE(type) typename DoubleDHT::type #define _DDHT_TYPE(type) DoubleDHT::type @@ -353,7 +357,12 @@ template VLOG(3) << "has " << (end-p) << " paircells"; while ( p != end ) { - if (p->entriesLeft != NULL && p->entriesRight != NULL) { + auto left_full = (p->entriesLeft != NULL); + auto right_full = (p->entriesRight != NULL); + hash_matches_iterator_cell_single_misses += (left_full != right_full) ? 1 : 0; + hash_matches_iterator_cell_both_misses += (!left_full && !right_full) ? 1 : 0; + if (left_full && right_full) { + hash_matches_iterator_cell_hits++; for (auto& l : *(p->entriesLeft)) { for (auto& r : *(p->entriesRight)) { if (l.key == r.key) { @@ -366,7 +375,7 @@ template } } } - } + } ++p; } // end From bdeb17aa750ec6c714911fe9d61d0c805ca23e09 Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Wed, 23 Sep 2015 11:55:00 -0700 Subject: [PATCH 47/50] add partition local updates --- applications/join/DHT_symmetric.cpp | 1 + applications/join/DHT_symmetric.hpp | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/applications/join/DHT_symmetric.cpp b/applications/join/DHT_symmetric.cpp index eda34eaaf..a39561e6d 100644 --- a/applications/join/DHT_symmetric.cpp +++ b/applications/join/DHT_symmetric.cpp @@ -2,3 +2,4 @@ #include "Metrics.hpp" GRAPPA_DEFINE_METRIC(SimpleMetric, dht_inserts, 0); +GRAPPA_DEFINE_METRIC(SimpleMetric, dht_partition_inserts, 0); diff --git a/applications/join/DHT_symmetric.hpp b/applications/join/DHT_symmetric.hpp index dfc0421b0..1a6e900d4 100644 --- a/applications/join/DHT_symmetric.hpp +++ b/applications/join/DHT_symmetric.hpp @@ -11,6 +11,7 @@ GRAPPA_DECLARE_METRIC(SimpleMetric, dht_inserts); +GRAPPA_DECLARE_METRIC(SimpleMetric, dht_partition_inserts); // for naming the types scoped in DHT_symmetric #define DHT_symmetric_TYPE(type) typename DHT_symmetric::type @@ -53,6 +54,17 @@ class DHT_symmetric { return object; } + template< typename UV, V (*UpF)(const V& oldval, const UV& incVal), V (*Init)(void) > + void update_partition( K key, UV val ) { + std::pair entry(key, Init()); + + auto res = this->local_map->insert(entry); auto resIt = res.first; //auto resNew = res.second; + + // perform the update in place + resIt->second = UpF(resIt->second, val); + dht_partition_inserts++; + } + template< GlobalCompletionEvent * GCE, typename UV, V (*UpF)(const V& oldval, const UV& incVal), V (*Init)(void), SyncMode S = SyncMode::Async > void update( K key, UV val ) { auto index = computeIndex( key ); From 1fcf85988ab8b73745e8a5a688bb231c1326bd01 Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Wed, 23 Sep 2015 12:07:17 -0700 Subject: [PATCH 48/50] emulate a forall_here in forall_entries --- applications/join/DHT_symmetric.hpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/applications/join/DHT_symmetric.hpp b/applications/join/DHT_symmetric.hpp index 1a6e900d4..ec866a93b 100644 --- a/applications/join/DHT_symmetric.hpp +++ b/applications/join/DHT_symmetric.hpp @@ -102,10 +102,19 @@ class DHT_symmetric { // TODO: cannot use forall_here because unordered_map->begin() is a forward iterator (std::advance is O(n)) // TODO: for now the serial loop is only performant if the continuation code is also in CPS // TODO: best solution is a forall_here where loop decomposition is just linear continuation instead of divide and conquer + + int64_t iter_count = 0; auto m = target->local_map; - for (auto it = m->begin(); it != m->end(); it++) { + for (auto it = m->begin(); it != m->end(); it++, iter_count++) { // continuation takes a mapping f(*it); + + // get the same effect as a forall_here that would have linear decomposition: + // specifically that there is at least one yield per FLAGS_loop_theshold iterations. + if (iter_count == FLAGS_loop_threshold) { + iter_count = 0; + Grappa::yield(); + } } }); // TODO GCE->wait(); // block until all tasks are done From 44c74320c097fa8696de48805232ab3d672a318a Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Mon, 5 Oct 2015 11:12:25 -0700 Subject: [PATCH 49/50] iterators with local groupby optimization --- applications/join/DHT_symmetric_generic.hpp | 15 +++++++++++ applications/join/Operators.hpp | 28 +++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/applications/join/DHT_symmetric_generic.hpp b/applications/join/DHT_symmetric_generic.hpp index cb28fb4fb..f5aa7ddc1 100644 --- a/applications/join/DHT_symmetric_generic.hpp +++ b/applications/join/DHT_symmetric_generic.hpp @@ -16,6 +16,11 @@ GRAPPA_DECLARE_METRIC(SimpleMetric, dht_inserts); #define DHT_symmetric_generic_TYPE(type) typename DHT_symmetric_generic::type #define DHT_symmetric_generic_T DHT_symmetric_generic +// TODO: The functionality of this class is covered by DHT_symmetric already. +// The only difference is that update_f/init_f are template parameters +// versus instance parameters. We can just add these instance parameters +// to DHT_symmetric and have two versions of update() method. + // Hash table for joins // * allows multiple copies of a Key // * lookups return all Key matches @@ -61,6 +66,16 @@ class DHT_symmetric_generic { return object; } + void update_partition(K key, UV val) { + std::pair entry(key, Init()); + + auto res = this->local_map->insert(entry); auto resIt = res.first; //auto resNew = res.second; + + // perform the update in place + resIt->second = this->UpF(resIt->second, val); + dht_partition_inserts++; + } + template< GlobalCompletionEvent * GCE, SyncMode S = SyncMode::Async > void update( K key, UV val ) { auto index = computeIndex( key ); diff --git a/applications/join/Operators.hpp b/applications/join/Operators.hpp index aa06e3a11..546bff86e 100644 --- a/applications/join/Operators.hpp +++ b/applications/join/Operators.hpp @@ -181,6 +181,34 @@ class ZeroKeyAggregateSource : public Operator

{ }; +template +class AggregatePartitionSink : public BasePipelined { + private: + typedef hash_tuple::hash Hash; + GlobalAddress> group_hash; + + public: + AggregateSink(Operator* input, GlobalAddress< + DHT_symmetric_generic> group_hash_000) + : BasePipelined(input) + , group_hash(group_hash_000) { } + + bool next(int& ignore) { + C t_002; + if (this->input->next(t_002)) { + VLOG(4) << "update with tuple " << t_002; + group_hash->update_partition(mktuple(t_002), t_002); + return true; + } else { + return false; + } + } + + protected: + // subclass is generated and implements + // this method + virtual K mktuple(C& val) = 0; +}; template class AggregateSink : public BasePipelined { From 33ea389a4cc3d7c40268e80b8c66927f002abd91 Mon Sep 17 00:00:00 2001 From: Brandon Myers Date: Mon, 5 Oct 2015 11:20:53 -0700 Subject: [PATCH 50/50] fix typo in iterator groupby --- applications/join/Operators.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/applications/join/Operators.hpp b/applications/join/Operators.hpp index 546bff86e..0f634b663 100644 --- a/applications/join/Operators.hpp +++ b/applications/join/Operators.hpp @@ -188,7 +188,7 @@ class AggregatePartitionSink : public BasePipelined { GlobalAddress> group_hash; public: - AggregateSink(Operator* input, GlobalAddress< + AggregatePartitionSink(Operator* input, GlobalAddress< DHT_symmetric_generic> group_hash_000) : BasePipelined(input) , group_hash(group_hash_000) { }