forked from abseil/abseil-cpp
-
Notifications
You must be signed in to change notification settings - Fork 1
/
cord.h
1566 lines (1356 loc) · 55.4 KB
/
cord.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: cord.h
// -----------------------------------------------------------------------------
//
// This file defines the `absl::Cord` data structure and operations on that data
// structure. A Cord is a string-like sequence of characters optimized for
// specific use cases. Unlike a `std::string`, which stores an array of
// contiguous characters, Cord data is stored in a structure consisting of
// separate, reference-counted "chunks." (Currently, this implementation is a
// tree structure, though that implementation may change.)
//
// Because a Cord consists of these chunks, data can be added to or removed from
// a Cord during its lifetime. Chunks may also be shared between Cords. Unlike a
// `std::string`, a Cord can therefore accommodate data that changes over its
// lifetime, though it's not quite "mutable"; it can change only in the
// attachment, detachment, or rearrangement of chunks of its constituent data.
//
// A Cord provides some benefit over `std::string` under the following (albeit
// narrow) circumstances:
//
// * Cord data is designed to grow and shrink over a Cord's lifetime. Cord
// provides efficient insertions and deletions at the start and end of the
// character sequences, avoiding copies in those cases. Static data should
// generally be stored as strings.
// * External memory consisting of string-like data can be directly added to
// a Cord without requiring copies or allocations.
// * Cord data may be shared and copied cheaply. Cord provides a copy-on-write
// implementation and cheap sub-Cord operations. Copying a Cord is an O(1)
// operation.
//
// As a consequence to the above, Cord data is generally large. Small data
// should generally use strings, as construction of a Cord requires some
// overhead. Small Cords (<= 15 bytes) are represented inline, but most small
// Cords are expected to grow over their lifetimes.
//
// Note that because a Cord is made up of separate chunked data, random access
// to character data within a Cord is slower than within a `std::string`.
//
// Thread Safety
//
// Cord has the same thread-safety properties as many other types like
// std::string, std::vector<>, int, etc -- it is thread-compatible. In
// particular, if threads do not call non-const methods, then it is safe to call
// const methods without synchronization. Copying a Cord produces a new instance
// that can be used concurrently with the original in arbitrary ways.
#ifndef ABSL_STRINGS_CORD_H_
#define ABSL_STRINGS_CORD_H_
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <iosfwd>
#include <iterator>
#include <string>
#include <type_traits>
#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/base/internal/endian.h"
#include "absl/base/internal/per_thread_tls.h"
#include "absl/base/macros.h"
#include "absl/base/port.h"
#include "absl/container/inlined_vector.h"
#include "absl/functional/function_ref.h"
#include "absl/meta/type_traits.h"
#include "absl/strings/cord_analysis.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_btree_reader.h"
#include "absl/strings/internal/cord_rep_crc.h"
#include "absl/strings/internal/cord_rep_ring.h"
#include "absl/strings/internal/cordz_functions.h"
#include "absl/strings/internal/cordz_info.h"
#include "absl/strings/internal/cordz_statistics.h"
#include "absl/strings/internal/cordz_update_scope.h"
#include "absl/strings/internal/cordz_update_tracker.h"
#include "absl/strings/internal/resize_uninitialized.h"
#include "absl/strings/internal/string_constant.h"
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
class Cord;
class CordTestPeer;
template <typename Releaser>
Cord MakeCordFromExternal(absl::string_view, Releaser&&);
void CopyCordToString(const Cord& src, std::string* dst);
// Cord memory accounting modes
enum class CordMemoryAccounting {
// Counts the *approximate* number of bytes held in full or in part by this
// Cord (which may not remain the same between invocations). Cords that share
// memory could each be "charged" independently for the same shared memory.
kTotal,
// Counts the *approximate* number of bytes held in full or in part by this
// Cord weighted by the sharing ratio of that data. For example, if some data
// edge is shared by 4 different Cords, then each cord is attributed 1/4th of
// the total memory usage as a 'fair share' of the total memory usage.
kFairShare,
};
// Cord
//
// A Cord is a sequence of characters, designed to be more efficient than a
// `std::string` in certain circumstances: namely, large string data that needs
// to change over its lifetime or shared, especially when such data is shared
// across API boundaries.
//
// A Cord stores its character data in a structure that allows efficient prepend
// and append operations. This makes a Cord useful for large string data sent
// over in a wire format that may need to be prepended or appended at some point
// during the data exchange (e.g. HTTP, protocol buffers). For example, a
// Cord is useful for storing an HTTP request, and prepending an HTTP header to
// such a request.
//
// Cords should not be used for storing general string data, however. They
// require overhead to construct and are slower than strings for random access.
//
// The Cord API provides the following common API operations:
//
// * Create or assign Cords out of existing string data, memory, or other Cords
// * Append and prepend data to an existing Cord
// * Create new Sub-Cords from existing Cord data
// * Swap Cord data and compare Cord equality
// * Write out Cord data by constructing a `std::string`
//
// Additionally, the API provides iterator utilities to iterate through Cord
// data via chunks or character bytes.
//
class Cord {
private:
template <typename T>
using EnableIfString =
absl::enable_if_t<std::is_same<T, std::string>::value, int>;
public:
// Cord::Cord() Constructors.
// Creates an empty Cord.
constexpr Cord() noexcept;
// Creates a Cord from an existing Cord. Cord is copyable and efficiently
// movable. The moved-from state is valid but unspecified.
Cord(const Cord& src);
Cord(Cord&& src) noexcept;
Cord& operator=(const Cord& x);
Cord& operator=(Cord&& x) noexcept;
// Creates a Cord from a `src` string. This constructor is marked explicit to
// prevent implicit Cord constructions from arguments convertible to an
// `absl::string_view`.
explicit Cord(absl::string_view src);
Cord& operator=(absl::string_view src);
// Creates a Cord from a `std::string&&` rvalue. These constructors are
// templated to avoid ambiguities for types that are convertible to both
// `absl::string_view` and `std::string`, such as `const char*`.
template <typename T, EnableIfString<T> = 0>
explicit Cord(T&& src);
template <typename T, EnableIfString<T> = 0>
Cord& operator=(T&& src);
// Cord::~Cord()
//
// Destructs the Cord.
~Cord() {
if (contents_.is_tree()) DestroyCordSlow();
}
// MakeCordFromExternal()
//
// Creates a Cord that takes ownership of external string memory. The
// contents of `data` are not copied to the Cord; instead, the external
// memory is added to the Cord and reference-counted. This data may not be
// changed for the life of the Cord, though it may be prepended or appended
// to.
//
// `MakeCordFromExternal()` takes a callable "releaser" that is invoked when
// the reference count for `data` reaches zero. As noted above, this data must
// remain live until the releaser is invoked. The callable releaser also must:
//
// * be move constructible
// * support `void operator()(absl::string_view) const` or `void operator()`
//
// Example:
//
// Cord MakeCord(BlockPool* pool) {
// Block* block = pool->NewBlock();
// FillBlock(block);
// return absl::MakeCordFromExternal(
// block->ToStringView(),
// [pool, block](absl::string_view v) {
// pool->FreeBlock(block, v);
// });
// }
//
// WARNING: Because a Cord can be reference-counted, it's likely a bug if your
// releaser doesn't do anything. For example, consider the following:
//
// void Foo(const char* buffer, int len) {
// auto c = absl::MakeCordFromExternal(absl::string_view(buffer, len),
// [](absl::string_view) {});
//
// // BUG: If Bar() copies its cord for any reason, including keeping a
// // substring of it, the lifetime of buffer might be extended beyond
// // when Foo() returns.
// Bar(c);
// }
template <typename Releaser>
friend Cord MakeCordFromExternal(absl::string_view data, Releaser&& releaser);
// Cord::Clear()
//
// Releases the Cord data. Any nodes that share data with other Cords, if
// applicable, will have their reference counts reduced by 1.
ABSL_ATTRIBUTE_REINITIALIZES void Clear();
// Cord::Append()
//
// Appends data to the Cord, which may come from another Cord or other string
// data.
void Append(const Cord& src);
void Append(Cord&& src);
void Append(absl::string_view src);
template <typename T, EnableIfString<T> = 0>
void Append(T&& src);
// Cord::Prepend()
//
// Prepends data to the Cord, which may come from another Cord or other string
// data.
void Prepend(const Cord& src);
void Prepend(absl::string_view src);
template <typename T, EnableIfString<T> = 0>
void Prepend(T&& src);
// Cord::RemovePrefix()
//
// Removes the first `n` bytes of a Cord.
void RemovePrefix(size_t n);
void RemoveSuffix(size_t n);
// Cord::Subcord()
//
// Returns a new Cord representing the subrange [pos, pos + new_size) of
// *this. If pos >= size(), the result is empty(). If
// (pos + new_size) >= size(), the result is the subrange [pos, size()).
Cord Subcord(size_t pos, size_t new_size) const;
// Cord::swap()
//
// Swaps the contents of the Cord with `other`.
void swap(Cord& other) noexcept;
// swap()
//
// Swaps the contents of two Cords.
friend void swap(Cord& x, Cord& y) noexcept { x.swap(y); }
// Cord::size()
//
// Returns the size of the Cord.
size_t size() const;
// Cord::empty()
//
// Determines whether the given Cord is empty, returning `true` is so.
bool empty() const;
// Cord::EstimatedMemoryUsage()
//
// Returns the *approximate* number of bytes held by this cord.
// See CordMemoryAccounting for more information on accounting method used.
size_t EstimatedMemoryUsage(CordMemoryAccounting accounting_method =
CordMemoryAccounting::kTotal) const;
// Cord::Compare()
//
// Compares 'this' Cord with rhs. This function and its relatives treat Cords
// as sequences of unsigned bytes. The comparison is a straightforward
// lexicographic comparison. `Cord::Compare()` returns values as follows:
//
// -1 'this' Cord is smaller
// 0 two Cords are equal
// 1 'this' Cord is larger
int Compare(absl::string_view rhs) const;
int Compare(const Cord& rhs) const;
// Cord::StartsWith()
//
// Determines whether the Cord starts with the passed string data `rhs`.
bool StartsWith(const Cord& rhs) const;
bool StartsWith(absl::string_view rhs) const;
// Cord::EndsWith()
//
// Determines whether the Cord ends with the passed string data `rhs`.
bool EndsWith(absl::string_view rhs) const;
bool EndsWith(const Cord& rhs) const;
// Cord::operator std::string()
//
// Converts a Cord into a `std::string()`. This operator is marked explicit to
// prevent unintended Cord usage in functions that take a string.
explicit operator std::string() const;
// CopyCordToString()
//
// Copies the contents of a `src` Cord into a `*dst` string.
//
// This function optimizes the case of reusing the destination string since it
// can reuse previously allocated capacity. However, this function does not
// guarantee that pointers previously returned by `dst->data()` remain valid
// even if `*dst` had enough capacity to hold `src`. If `*dst` is a new
// object, prefer to simply use the conversion operator to `std::string`.
friend void CopyCordToString(const Cord& src, std::string* dst);
class CharIterator;
//----------------------------------------------------------------------------
// Cord::ChunkIterator
//----------------------------------------------------------------------------
//
// A `Cord::ChunkIterator` allows iteration over the constituent chunks of its
// Cord. Such iteration allows you to perform non-const operatons on the data
// of a Cord without modifying it.
//
// Generally, you do not instantiate a `Cord::ChunkIterator` directly;
// instead, you create one implicitly through use of the `Cord::Chunks()`
// member function.
//
// The `Cord::ChunkIterator` has the following properties:
//
// * The iterator is invalidated after any non-const operation on the
// Cord object over which it iterates.
// * The `string_view` returned by dereferencing a valid, non-`end()`
// iterator is guaranteed to be non-empty.
// * Two `ChunkIterator` objects can be compared equal if and only if they
// remain valid and iterate over the same Cord.
// * The iterator in this case is a proxy iterator; the `string_view`
// returned by the iterator does not live inside the Cord, and its
// lifetime is limited to the lifetime of the iterator itself. To help
// prevent lifetime issues, `ChunkIterator::reference` is not a true
// reference type and is equivalent to `value_type`.
// * The iterator keeps state that can grow for Cords that contain many
// nodes and are imbalanced due to sharing. Prefer to pass this type by
// const reference instead of by value.
class ChunkIterator {
public:
using iterator_category = std::input_iterator_tag;
using value_type = absl::string_view;
using difference_type = ptrdiff_t;
using pointer = const value_type*;
using reference = value_type;
ChunkIterator() = default;
ChunkIterator& operator++();
ChunkIterator operator++(int);
bool operator==(const ChunkIterator& other) const;
bool operator!=(const ChunkIterator& other) const;
reference operator*() const;
pointer operator->() const;
friend class Cord;
friend class CharIterator;
private:
using CordRep = absl::cord_internal::CordRep;
using CordRepBtree = absl::cord_internal::CordRepBtree;
using CordRepBtreeReader = absl::cord_internal::CordRepBtreeReader;
// Stack of right children of concat nodes that we have to visit.
// Keep this at the end of the structure to avoid cache-thrashing.
// TODO(jgm): Benchmark to see if there's a more optimal value than 47 for
// the inlined vector size (47 exists for backward compatibility).
using Stack = absl::InlinedVector<absl::cord_internal::CordRep*, 47>;
// Constructs a `begin()` iterator from `tree`. `tree` must not be null.
explicit ChunkIterator(cord_internal::CordRep* tree);
// Constructs a `begin()` iterator from `cord`.
explicit ChunkIterator(const Cord* cord);
// Initializes this instance from a tree. Invoked by constructors.
void InitTree(cord_internal::CordRep* tree);
// Removes `n` bytes from `current_chunk_`. Expects `n` to be smaller than
// `current_chunk_.size()`.
void RemoveChunkPrefix(size_t n);
Cord AdvanceAndReadBytes(size_t n);
void AdvanceBytes(size_t n);
// Stack specific operator++
ChunkIterator& AdvanceStack();
// Btree specific operator++
ChunkIterator& AdvanceBtree();
void AdvanceBytesBtree(size_t n);
// Iterates `n` bytes, where `n` is expected to be greater than or equal to
// `current_chunk_.size()`.
void AdvanceBytesSlowPath(size_t n);
// A view into bytes of the current `CordRep`. It may only be a view to a
// suffix of bytes if this is being used by `CharIterator`.
absl::string_view current_chunk_;
// The current leaf, or `nullptr` if the iterator points to short data.
// If the current chunk is a substring node, current_leaf_ points to the
// underlying flat or external node.
absl::cord_internal::CordRep* current_leaf_ = nullptr;
// The number of bytes left in the `Cord` over which we are iterating.
size_t bytes_remaining_ = 0;
// Cord reader for cord btrees. Empty if not traversing a btree.
CordRepBtreeReader btree_reader_;
// See 'Stack' alias definition.
Stack stack_of_right_children_;
};
// Cord::ChunkIterator::chunk_begin()
//
// Returns an iterator to the first chunk of the `Cord`.
//
// Generally, prefer using `Cord::Chunks()` within a range-based for loop for
// iterating over the chunks of a Cord. This method may be useful for getting
// a `ChunkIterator` where range-based for-loops are not useful.
//
// Example:
//
// absl::Cord::ChunkIterator FindAsChunk(const absl::Cord& c,
// absl::string_view s) {
// return std::find(c.chunk_begin(), c.chunk_end(), s);
// }
ChunkIterator chunk_begin() const;
// Cord::ChunkItertator::chunk_end()
//
// Returns an iterator one increment past the last chunk of the `Cord`.
//
// Generally, prefer using `Cord::Chunks()` within a range-based for loop for
// iterating over the chunks of a Cord. This method may be useful for getting
// a `ChunkIterator` where range-based for-loops may not be available.
ChunkIterator chunk_end() const;
//----------------------------------------------------------------------------
// Cord::ChunkIterator::ChunkRange
//----------------------------------------------------------------------------
//
// `ChunkRange` is a helper class for iterating over the chunks of the `Cord`,
// producing an iterator which can be used within a range-based for loop.
// Construction of a `ChunkRange` will return an iterator pointing to the
// first chunk of the Cord. Generally, do not construct a `ChunkRange`
// directly; instead, prefer to use the `Cord::Chunks()` method.
//
// Implementation note: `ChunkRange` is simply a convenience wrapper over
// `Cord::chunk_begin()` and `Cord::chunk_end()`.
class ChunkRange {
public:
// Fulfill minimum c++ container requirements [container.requirements]
// Theses (partial) container type definitions allow ChunkRange to be used
// in various utilities expecting a subset of [container.requirements].
// For example, the below enables using `::testing::ElementsAre(...)`
using value_type = absl::string_view;
using reference = value_type&;
using const_reference = const value_type&;
using iterator = ChunkIterator;
using const_iterator = ChunkIterator;
explicit ChunkRange(const Cord* cord) : cord_(cord) {}
ChunkIterator begin() const;
ChunkIterator end() const;
private:
const Cord* cord_;
};
// Cord::Chunks()
//
// Returns a `Cord::ChunkIterator::ChunkRange` for iterating over the chunks
// of a `Cord` with a range-based for-loop. For most iteration tasks on a
// Cord, use `Cord::Chunks()` to retrieve this iterator.
//
// Example:
//
// void ProcessChunks(const Cord& cord) {
// for (absl::string_view chunk : cord.Chunks()) { ... }
// }
//
// Note that the ordinary caveats of temporary lifetime extension apply:
//
// void Process() {
// for (absl::string_view chunk : CordFactory().Chunks()) {
// // The temporary Cord returned by CordFactory has been destroyed!
// }
// }
ChunkRange Chunks() const;
//----------------------------------------------------------------------------
// Cord::CharIterator
//----------------------------------------------------------------------------
//
// A `Cord::CharIterator` allows iteration over the constituent characters of
// a `Cord`.
//
// Generally, you do not instantiate a `Cord::CharIterator` directly; instead,
// you create one implicitly through use of the `Cord::Chars()` member
// function.
//
// A `Cord::CharIterator` has the following properties:
//
// * The iterator is invalidated after any non-const operation on the
// Cord object over which it iterates.
// * Two `CharIterator` objects can be compared equal if and only if they
// remain valid and iterate over the same Cord.
// * The iterator keeps state that can grow for Cords that contain many
// nodes and are imbalanced due to sharing. Prefer to pass this type by
// const reference instead of by value.
// * This type cannot act as a forward iterator because a `Cord` can reuse
// sections of memory. This fact violates the requirement for forward
// iterators to compare equal if dereferencing them returns the same
// object.
class CharIterator {
public:
using iterator_category = std::input_iterator_tag;
using value_type = char;
using difference_type = ptrdiff_t;
using pointer = const char*;
using reference = const char&;
CharIterator() = default;
CharIterator& operator++();
CharIterator operator++(int);
bool operator==(const CharIterator& other) const;
bool operator!=(const CharIterator& other) const;
reference operator*() const;
pointer operator->() const;
friend Cord;
private:
explicit CharIterator(const Cord* cord) : chunk_iterator_(cord) {}
ChunkIterator chunk_iterator_;
};
// Cord::CharIterator::AdvanceAndRead()
//
// Advances the `Cord::CharIterator` by `n_bytes` and returns the bytes
// advanced as a separate `Cord`. `n_bytes` must be less than or equal to the
// number of bytes within the Cord; otherwise, behavior is undefined. It is
// valid to pass `char_end()` and `0`.
static Cord AdvanceAndRead(CharIterator* it, size_t n_bytes);
// Cord::CharIterator::Advance()
//
// Advances the `Cord::CharIterator` by `n_bytes`. `n_bytes` must be less than
// or equal to the number of bytes remaining within the Cord; otherwise,
// behavior is undefined. It is valid to pass `char_end()` and `0`.
static void Advance(CharIterator* it, size_t n_bytes);
// Cord::CharIterator::ChunkRemaining()
//
// Returns the longest contiguous view starting at the iterator's position.
//
// `it` must be dereferenceable.
static absl::string_view ChunkRemaining(const CharIterator& it);
// Cord::CharIterator::char_begin()
//
// Returns an iterator to the first character of the `Cord`.
//
// Generally, prefer using `Cord::Chars()` within a range-based for loop for
// iterating over the chunks of a Cord. This method may be useful for getting
// a `CharIterator` where range-based for-loops may not be available.
CharIterator char_begin() const;
// Cord::CharIterator::char_end()
//
// Returns an iterator to one past the last character of the `Cord`.
//
// Generally, prefer using `Cord::Chars()` within a range-based for loop for
// iterating over the chunks of a Cord. This method may be useful for getting
// a `CharIterator` where range-based for-loops are not useful.
CharIterator char_end() const;
// Cord::CharIterator::CharRange
//
// `CharRange` is a helper class for iterating over the characters of a
// producing an iterator which can be used within a range-based for loop.
// Construction of a `CharRange` will return an iterator pointing to the first
// character of the Cord. Generally, do not construct a `CharRange` directly;
// instead, prefer to use the `Cord::Chars()` method show below.
//
// Implementation note: `CharRange` is simply a convenience wrapper over
// `Cord::char_begin()` and `Cord::char_end()`.
class CharRange {
public:
// Fulfill minimum c++ container requirements [container.requirements]
// Theses (partial) container type definitions allow CharRange to be used
// in various utilities expecting a subset of [container.requirements].
// For example, the below enables using `::testing::ElementsAre(...)`
using value_type = char;
using reference = value_type&;
using const_reference = const value_type&;
using iterator = CharIterator;
using const_iterator = CharIterator;
explicit CharRange(const Cord* cord) : cord_(cord) {}
CharIterator begin() const;
CharIterator end() const;
private:
const Cord* cord_;
};
// Cord::CharIterator::Chars()
//
// Returns a `Cord::CharIterator` for iterating over the characters of a
// `Cord` with a range-based for-loop. For most character-based iteration
// tasks on a Cord, use `Cord::Chars()` to retrieve this iterator.
//
// Example:
//
// void ProcessCord(const Cord& cord) {
// for (char c : cord.Chars()) { ... }
// }
//
// Note that the ordinary caveats of temporary lifetime extension apply:
//
// void Process() {
// for (char c : CordFactory().Chars()) {
// // The temporary Cord returned by CordFactory has been destroyed!
// }
// }
CharRange Chars() const;
// Cord::operator[]
//
// Gets the "i"th character of the Cord and returns it, provided that
// 0 <= i < Cord.size().
//
// NOTE: This routine is reasonably efficient. It is roughly
// logarithmic based on the number of chunks that make up the cord. Still,
// if you need to iterate over the contents of a cord, you should
// use a CharIterator/ChunkIterator rather than call operator[] or Get()
// repeatedly in a loop.
char operator[](size_t i) const;
// Cord::TryFlat()
//
// If this cord's representation is a single flat array, returns a
// string_view referencing that array. Otherwise returns nullopt.
absl::optional<absl::string_view> TryFlat() const;
// Cord::Flatten()
//
// Flattens the cord into a single array and returns a view of the data.
//
// If the cord was already flat, the contents are not modified.
absl::string_view Flatten();
// Supports absl::Cord as a sink object for absl::Format().
friend void AbslFormatFlush(absl::Cord* cord, absl::string_view part) {
cord->Append(part);
}
// Cord::SetExpectedChecksum()
//
// Stores a checksum value with this non-empty cord instance, for later
// retrieval.
//
// The expected checksum is a number stored out-of-band, alongside the data.
// It is preserved across copies and assignments, but any mutations to a cord
// will cause it to lose its expected checksum.
//
// The expected checksum is not part of a Cord's value, and does not affect
// operations such as equality or hashing.
//
// This field is intended to store a CRC32C checksum for later validation, to
// help support end-to-end checksum workflows. However, the Cord API itself
// does no CRC validation, and assigns no meaning to this number.
//
// This call has no effect if this cord is empty.
void SetExpectedChecksum(uint32_t crc);
// Returns this cord's expected checksum, if it has one. Otherwise, returns
// nullopt.
absl::optional<uint32_t> ExpectedChecksum() const;
template <typename H>
friend H AbslHashValue(H hash_state, const absl::Cord& c) {
absl::optional<absl::string_view> maybe_flat = c.TryFlat();
if (maybe_flat.has_value()) {
return H::combine(std::move(hash_state), *maybe_flat);
}
return c.HashFragmented(std::move(hash_state));
}
// Create a Cord with the contents of StringConstant<T>::value.
// No allocations will be done and no data will be copied.
// This is an INTERNAL API and subject to change or removal. This API can only
// be used by spelling absl::strings_internal::MakeStringConstant, which is
// also an internal API.
template <typename T>
explicit constexpr Cord(strings_internal::StringConstant<T>);
private:
using CordRep = absl::cord_internal::CordRep;
using CordRepFlat = absl::cord_internal::CordRepFlat;
using CordzInfo = cord_internal::CordzInfo;
using CordzUpdateScope = cord_internal::CordzUpdateScope;
using CordzUpdateTracker = cord_internal::CordzUpdateTracker;
using InlineData = cord_internal::InlineData;
using MethodIdentifier = CordzUpdateTracker::MethodIdentifier;
// Creates a cord instance with `method` representing the originating
// public API call causing the cord to be created.
explicit Cord(absl::string_view src, MethodIdentifier method);
friend class CordTestPeer;
friend bool operator==(const Cord& lhs, const Cord& rhs);
friend bool operator==(const Cord& lhs, absl::string_view rhs);
friend const CordzInfo* GetCordzInfoForTesting(const Cord& cord);
// Calls the provided function once for each cord chunk, in order. Unlike
// Chunks(), this API will not allocate memory.
void ForEachChunk(absl::FunctionRef<void(absl::string_view)>) const;
// Allocates new contiguous storage for the contents of the cord. This is
// called by Flatten() when the cord was not already flat.
absl::string_view FlattenSlowPath();
// Actual cord contents are hidden inside the following simple
// class so that we can isolate the bulk of cord.cc from changes
// to the representation.
//
// InlineRep holds either a tree pointer, or an array of kMaxInline bytes.
class InlineRep {
public:
static constexpr unsigned char kMaxInline = cord_internal::kMaxInline;
static_assert(kMaxInline >= sizeof(absl::cord_internal::CordRep*), "");
constexpr InlineRep() : data_() {}
explicit InlineRep(InlineData::DefaultInitType init) : data_(init) {}
InlineRep(const InlineRep& src);
InlineRep(InlineRep&& src);
InlineRep& operator=(const InlineRep& src);
InlineRep& operator=(InlineRep&& src) noexcept;
explicit constexpr InlineRep(cord_internal::InlineData data);
void Swap(InlineRep* rhs);
bool empty() const;
size_t size() const;
const char* data() const; // Returns nullptr if holding pointer
void set_data(const char* data, size_t n); // Discards pointer, if any
char* set_data(size_t n); // Write data to the result
// Returns nullptr if holding bytes
absl::cord_internal::CordRep* tree() const;
absl::cord_internal::CordRep* as_tree() const;
// Returns non-null iff was holding a pointer
absl::cord_internal::CordRep* clear();
// Converts to pointer if necessary.
void reduce_size(size_t n); // REQUIRES: holding data
void remove_prefix(size_t n); // REQUIRES: holding data
void AppendArray(absl::string_view src, MethodIdentifier method);
absl::string_view FindFlatStartPiece() const;
// Creates a CordRepFlat instance from the current inlined data with `extra'
// bytes of desired additional capacity.
CordRepFlat* MakeFlatWithExtraCapacity(size_t extra);
// Sets the tree value for this instance. `rep` must not be null.
// Requires the current instance to hold a tree, and a lock to be held on
// any CordzInfo referenced by this instance. The latter is enforced through
// the CordzUpdateScope argument. If the current instance is sampled, then
// the CordzInfo instance is updated to reference the new `rep` value.
void SetTree(CordRep* rep, const CordzUpdateScope& scope);
// Identical to SetTree(), except that `rep` is allowed to be null, in
// which case the current instance is reset to an empty value.
void SetTreeOrEmpty(CordRep* rep, const CordzUpdateScope& scope);
// Sets the tree value for this instance, and randomly samples this cord.
// This function disregards existing contents in `data_`, and should be
// called when a Cord is 'promoted' from an 'uninitialized' or 'inlined'
// value to a non-inlined (tree / ring) value.
void EmplaceTree(CordRep* rep, MethodIdentifier method);
// Identical to EmplaceTree, except that it copies the parent stack from
// the provided `parent` data if the parent is sampled.
void EmplaceTree(CordRep* rep, const InlineData& parent,
MethodIdentifier method);
// Commits the change of a newly created, or updated `rep` root value into
// this cord. `old_rep` indicates the old (inlined or tree) value of the
// cord, and determines if the commit invokes SetTree() or EmplaceTree().
void CommitTree(const CordRep* old_rep, CordRep* rep,
const CordzUpdateScope& scope, MethodIdentifier method);
void AppendTreeToInlined(CordRep* tree, MethodIdentifier method);
void AppendTreeToTree(CordRep* tree, MethodIdentifier method);
void AppendTree(CordRep* tree, MethodIdentifier method);
void PrependTreeToInlined(CordRep* tree, MethodIdentifier method);
void PrependTreeToTree(CordRep* tree, MethodIdentifier method);
void PrependTree(CordRep* tree, MethodIdentifier method);
template <bool has_length>
void GetAppendRegion(char** region, size_t* size, size_t length);
bool IsSame(const InlineRep& other) const {
return memcmp(&data_, &other.data_, sizeof(data_)) == 0;
}
int BitwiseCompare(const InlineRep& other) const {
uint64_t x, y;
// Use memcpy to avoid aliasing issues.
memcpy(&x, &data_, sizeof(x));
memcpy(&y, &other.data_, sizeof(y));
if (x == y) {
memcpy(&x, reinterpret_cast<const char*>(&data_) + 8, sizeof(x));
memcpy(&y, reinterpret_cast<const char*>(&other.data_) + 8, sizeof(y));
if (x == y) return 0;
}
return absl::big_endian::FromHost64(x) < absl::big_endian::FromHost64(y)
? -1
: 1;
}
void CopyTo(std::string* dst) const {
// memcpy is much faster when operating on a known size. On most supported
// platforms, the small string optimization is large enough that resizing
// to 15 bytes does not cause a memory allocation.
absl::strings_internal::STLStringResizeUninitialized(dst,
sizeof(data_) - 1);
memcpy(&(*dst)[0], &data_, sizeof(data_) - 1);
// erase is faster than resize because the logic for memory allocation is
// not needed.
dst->erase(inline_size());
}
// Copies the inline contents into `dst`. Assumes the cord is not empty.
void CopyToArray(char* dst) const;
bool is_tree() const { return data_.is_tree(); }
// Returns true if the Cord is being profiled by cordz.
bool is_profiled() const { return data_.is_tree() && data_.is_profiled(); }
// Returns the available inlined capacity, or 0 if is_tree() == true.
size_t remaining_inline_capacity() const {
return data_.is_tree() ? 0 : kMaxInline - data_.inline_size();
}
// Returns the profiled CordzInfo, or nullptr if not sampled.
absl::cord_internal::CordzInfo* cordz_info() const {
return data_.cordz_info();
}
// Sets the profiled CordzInfo. `cordz_info` must not be null.
void set_cordz_info(cord_internal::CordzInfo* cordz_info) {
assert(cordz_info != nullptr);
data_.set_cordz_info(cordz_info);
}
// Resets the current cordz_info to null / empty.
void clear_cordz_info() { data_.clear_cordz_info(); }
private:
friend class Cord;
void AssignSlow(const InlineRep& src);
// Unrefs the tree and stops profiling.
void UnrefTree();
void ResetToEmpty() { data_ = {}; }
void set_inline_size(size_t size) { data_.set_inline_size(size); }
size_t inline_size() const { return data_.inline_size(); }
cord_internal::InlineData data_;
};
InlineRep contents_;
// Helper for GetFlat() and TryFlat().
static bool GetFlatAux(absl::cord_internal::CordRep* rep,
absl::string_view* fragment);
// Helper for ForEachChunk().
static void ForEachChunkAux(
absl::cord_internal::CordRep* rep,
absl::FunctionRef<void(absl::string_view)> callback);
// The destructor for non-empty Cords.
void DestroyCordSlow();
// Out-of-line implementation of slower parts of logic.
void CopyToArraySlowPath(char* dst) const;
int CompareSlowPath(absl::string_view rhs, size_t compared_size,
size_t size_to_compare) const;
int CompareSlowPath(const Cord& rhs, size_t compared_size,
size_t size_to_compare) const;
bool EqualsImpl(absl::string_view rhs, size_t size_to_compare) const;
bool EqualsImpl(const Cord& rhs, size_t size_to_compare) const;
int CompareImpl(const Cord& rhs) const;
template <typename ResultType, typename RHS>
friend ResultType GenericCompare(const Cord& lhs, const RHS& rhs,
size_t size_to_compare);
static absl::string_view GetFirstChunk(const Cord& c);
static absl::string_view GetFirstChunk(absl::string_view sv);
// Returns a new reference to contents_.tree(), or steals an existing
// reference if called on an rvalue.
absl::cord_internal::CordRep* TakeRep() const&;
absl::cord_internal::CordRep* TakeRep() &&;
// Helper for Append().
template <typename C>
void AppendImpl(C&& src);
// Prepends the provided data to this instance. `method` contains the public
// API method for this action which is tracked for Cordz sampling purposes.
void PrependArray(absl::string_view src, MethodIdentifier method);
// Assigns the value in 'src' to this instance, 'stealing' its contents.
// Requires src.length() > kMaxBytesToCopy.
Cord& AssignLargeString(std::string&& src);
// Helper for AbslHashValue().
template <typename H>
H HashFragmented(H hash_state) const {
typename H::AbslInternalPiecewiseCombiner combiner;
ForEachChunk([&combiner, &hash_state](absl::string_view chunk) {
hash_state = combiner.add_buffer(std::move(hash_state), chunk.data(),
chunk.size());
});
return H::combine(combiner.finalize(std::move(hash_state)), size());
}
};
ABSL_NAMESPACE_END
} // namespace absl
namespace absl {
ABSL_NAMESPACE_BEGIN
// allow a Cord to be logged
extern std::ostream& operator<<(std::ostream& out, const Cord& cord);
// ------------------------------------------------------------------
// Internal details follow. Clients should ignore.
namespace cord_internal {
// Fast implementation of memmove for up to 15 bytes. This implementation is
// safe for overlapping regions. If nullify_tail is true, the destination is
// padded with '\0' up to 16 bytes.
template <bool nullify_tail = false>
inline void SmallMemmove(char* dst, const char* src, size_t n) {
if (n >= 8) {
assert(n <= 16);
uint64_t buf1;
uint64_t buf2;
memcpy(&buf1, src, 8);
memcpy(&buf2, src + n - 8, 8);
if (nullify_tail) {
memset(dst + 8, 0, 8);
}
memcpy(dst, &buf1, 8);
memcpy(dst + n - 8, &buf2, 8);
} else if (n >= 4) {
uint32_t buf1;
uint32_t buf2;
memcpy(&buf1, src, 4);
memcpy(&buf2, src + n - 4, 4);
if (nullify_tail) {
memset(dst + 4, 0, 4);