From dd822b1a982e3e97aabdb37d1224388d28d85771 Mon Sep 17 00:00:00 2001 From: Oliver Rice Date: Tue, 12 Oct 2021 04:02:34 -0500 Subject: [PATCH 1/2] to_list --- docs/api.rst | 1 + src/flupy/fluent.py | 150 ++++++++++++++++++++++-------------------- src/tests/test_flu.py | 4 ++ 3 files changed, 84 insertions(+), 71 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 8dfd280..f074abd 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -70,6 +70,7 @@ Summarizing .. automethod:: flu.last .. automethod:: flu.head .. automethod:: flu.tail +.. automethod:: flu.to_list .. automethod:: flu.collect ---- diff --git a/src/flupy/fluent.py b/src/flupy/fluent.py index 3bb0157..adab190 100644 --- a/src/flupy/fluent.py +++ b/src/flupy/fluent.py @@ -85,7 +85,7 @@ class Fluent(Generic[T]): .filter(lambda x: x % 3 == 0) .chunk(3) .take(2) - .collect() + .to_list() ) [[0, 9, 36], [81, 144, 225]] """ @@ -128,6 +128,14 @@ def collect(self, n: int = None, container_type: CallableTakesIterable[T] = list """ return container_type(self.take(n)) + def to_list(self) -> List[T]: + """Collect items from iterable into a list + + >>> flu(range(4)).to_list() + [0, 1, 2, 3] + """ + return list(self) + def sum(self) -> Union[T, int]: """Sum of elements in the iterable @@ -231,14 +239,14 @@ def sort( Note: sorting loads the entire iterable into memory - >>> flu([3,6,1]).sort().collect() - [1, 3, 6] + >>> flu([3,6,1]).sort().to_list() + [1, 3, 6] - >>> flu([3,6,1]).sort(reverse=True).collect() - [6, 3, 1] + >>> flu([3,6,1]).sort(reverse=True).to_list() + [6, 3, 1] - >>> flu([3,-6,1]).sort(key=abs).collect() - [1, 3, -6] + >>> flu([3,-6,1]).sort(key=abs).to_list() + [1, 3, -6] """ return Fluent(sorted(self, key=key, reverse=reverse)) @@ -254,8 +262,8 @@ def join_left( Note: join_left loads *other* into memory - >>> flu(range(6)).join_left(range(0, 6, 2)).collect() - [(0, 0), (1, None), (2, 2), (3, None), (4, 4), (5, None)] + >>> flu(range(6)).join_left(range(0, 6, 2)).to_list() + [(0, 0), (1, None), (2, 2), (3, None), (4, 4), (5, None)] """ def _impl() -> Generator[Tuple[T, Union[_T1, None]], None, None]: @@ -288,8 +296,8 @@ def join_inner( Note: join_inner loads *other* into memory - >>> flu(range(6)).join_inner(range(0, 6, 2)).collect() - [(0, 0), (2, 2), (4, 4)] + >>> flu(range(6)).join_inner(range(0, 6, 2)).to_list() + [(0, 0), (2, 2), (4, 4)] """ @@ -313,10 +321,10 @@ def shuffle(self) -> "Fluent[T]": Note: shuffle loads the entire iterable into memory - >>> flu([3,6,1]).shuffle().collect() - [6, 1, 3] + >>> flu([3,6,1]).shuffle().to_list() + [6, 1, 3] """ - dat: List[T] = list(self.collect()) + dat: List[T] = list(self.to_list()) return Fluent(sample(dat, len(dat))) def group_by( @@ -328,12 +336,12 @@ def group_by( When the iterable is pre-sorted according to *key*, setting *sort* to False will prevent loading the dataset into memory and improve performance - >>> flu([2, 4, 2, 4]).group_by().collect() + >>> flu([2, 4, 2, 4]).group_by().to_list() [2, ), (4, )] Or, if the iterable is pre-sorted - >>> flu([2, 2, 5, 5]).group_by(sort=False).collect() + >>> flu([2, 2, 5, 5]).group_by(sort=False).to_list() [(2, ), (5, )] Using a key function @@ -344,7 +352,7 @@ def group_by( {'x': 1, 'y': 5} ] >>> key_func = lambda u: u['x'] - >>> flu(points).group_by(key=key_func, sort=True).collect() + >>> flu(points).group_by(key=key_func, sort=True).to_list() [(1, ), (4, )] """ @@ -354,10 +362,10 @@ def group_by( def unique(self, key: Callable[[T], Hashable] = identity) -> "Fluent[T]": """Yield elements that are unique by a *key*. - >>> flu([2, 3, 2, 3]).unique().collect() + >>> flu([2, 3, 2, 3]).unique().to_list() [2, 3] - >>> flu([2, -3, -2, 3]).unique(key=abs).collect() + >>> flu([2, -3, -2, 3]).unique(key=abs).to_list() [2, -3] """ @@ -381,8 +389,8 @@ def rate_limit(self, per_second: Union[int, float] = 100) -> "Fluent[T]": >>> import time >>> start_time = time.time() - >>> flu(range(3)).rate_limit(3).collect() - >>> print('Runtime', int(time.time() - start_time) + >>> _ = flu(range(3)).rate_limit(3).to_list() + >>> print('Runtime', int(time.time() - start_time)) 1.00126 # approximately 1 second for 3 items """ @@ -408,10 +416,10 @@ def side_effect( and after iteration ends respectively. Each will be called exactly once. - >>> flu(range(2)).side_effect(lambda x: print(f'Collected {x}')).collect() - Collected 0 - Collected 1 - [0, 1] + >>> flu(range(2)).side_effect(lambda x: print(f'Collected {x}')).to_list() + Collected 0 + Collected 1 + [0, 1] """ def _impl() -> Generator[T, None, None]: @@ -434,7 +442,7 @@ def _impl() -> Generator[T, None, None]: def map(self, func: Callable[[T], _T1], *args: Any, **kwargs: Any) -> "Fluent[_T1]": """Apply *func* to each element of iterable - >>> flu(range(5)).map(lambda x: x*x).collect() + >>> flu(range(5)).map(lambda x: x*x).to_list() [0, 1, 4, 9, 16] """ @@ -448,10 +456,10 @@ def _impl() -> Generator[_T1, None, None]: def map_item(self: "Fluent[SupportsGetItem[T]]", item: Hashable) -> "Fluent[SupportsGetItem[T]]": """Extracts *item* from every element of the iterable - >>> flu([(2, 4), (2, 5)]).map_item(1).collect() + >>> flu([(2, 4), (2, 5)]).map_item(1).to_list() [4, 5] - >>> flu([{'mykey': 8}, {'mykey': 5}]).map_item('mykey').collect() + >>> flu([{'mykey': 8}, {'mykey': 5}]).map_item('mykey').to_list() [8, 5] """ @@ -466,7 +474,7 @@ def map_attr(self, attr: str) -> "Fluent[Any]": >>> from collections import namedtuple >>> MyTup = namedtuple('MyTup', ['value', 'backup_val']) - >>> flu([MyTup(1, 5), MyTup(2, 4)]).map_attr('value').collect() + >>> flu([MyTup(1, 5), MyTup(2, 4)]).map_attr('value').to_list() [1, 2] """ return self.map(lambda x: getattr(x, attr)) @@ -474,7 +482,7 @@ def map_attr(self, attr: str) -> "Fluent[Any]": def filter(self, func: Callable[..., bool], *args: Any, **kwargs: Any) -> "Fluent[T]": """Yield elements of iterable where *func* returns truthy - >>> flu(range(10)).filter(lambda x: x % 2 == 0).collect() + >>> flu(range(10)).filter(lambda x: x % 2 == 0).to_list() [0, 2, 4, 6, 8] """ @@ -489,8 +497,8 @@ def reduce(self, func: Callable[[T, T], T]) -> T: """Apply a function of two arguments cumulatively to the items of the iterable, from left to right, so as to reduce the sequence to a single value - >>> flu(range(5)).reduce(lambda x, y: x + y) - 10 + >>> flu(range(5)).reduce(lambda x, y: x + y) + 10 """ return reduce(func, self) @@ -540,8 +548,8 @@ def zip( """Yields tuples containing the i-th element from the i-th argument in the chainable, and the iterable - >>> flu(range(5)).zip(range(3, 0, -1)).collect() - [(0, 3), (1, 2), (2, 1)] + >>> flu(range(5)).zip(range(3, 0, -1)).to_list() + [(0, 3), (1, 2), (2, 1)] """ # @self_to_flu is not compatible with @overload # make sure any usage of self supports arbitrary iterables @@ -554,12 +562,12 @@ def zip_longest(self, *iterable: Iterable[_T1], fill_value: Any = None) -> "Flue Iteration continues until the longest iterable is exhaused. If iterables are uneven in length, missing values are filled in with fill value - >>> flu(range(5)).zip_longest(range(3, 0, -1)).collect() - [(0, 3), (1, 2), (2, 1), (3, None), (4, None)] + >>> flu(range(5)).zip_longest(range(3, 0, -1)).to_list() + [(0, 3), (1, 2), (2, 1), (3, None), (4, None)] - >>> flu(range(5)).zip_longest(range(3, 0, -1), fill_value='a').collect() - [(0, 3), (1, 2), (2, 1), (3, 'a'), (4, 'a')] + >>> flu(range(5)).zip_longest(range(3, 0, -1), fill_value='a').to_list() + [(0, 3), (1, 2), (2, 1), (3, 'a'), (4, 'a')] """ return Fluent(zip_longest(self, *iterable, fillvalue=fill_value)) @@ -567,15 +575,15 @@ def enumerate(self, start: int = 0) -> "Fluent[Tuple[int, T]]": """Yields tuples from the chainable where the first element is a count from initial value *start*. - >>> flu(range(5)).zip_longest(range(3, 0, -1)).collect() - [(0, 3), (1, 2), (2, 1), (3, None), (4, None)] + >>> flu(range(5)).zip_longest(range(3, 0, -1)).to_list() + [(0, 3), (1, 2), (2, 1), (3, None), (4, None)] """ return Fluent(enumerate(self, start=start)) def take(self, n: Optional[int] = None) -> "Fluent[T]": """Yield first *n* items of the iterable - >>> flu(range(10)).take(2).collect() + >>> flu(range(10)).take(2).to_list() [0, 1] """ return Fluent(islice(self._iterator, n)) @@ -583,7 +591,7 @@ def take(self, n: Optional[int] = None) -> "Fluent[T]": def take_while(self, predicate: Callable[[T], bool]) -> "Fluent[T]": """Yield elements from the chainable so long as the predicate is true - >>> flu(range(10)).take_while(lambda x: x < 3).collect() + >>> flu(range(10)).take_while(lambda x: x < 3).to_list() [0, 1, 2] """ return Fluent(takewhile(predicate, self._iterator)) @@ -592,8 +600,8 @@ def drop_while(self, predicate: Callable[[T], bool]) -> "Fluent[T]": """Drop elements from the chainable as long as the predicate is true; afterwards, return every element - >>> flu(range(10)).drop_while(lambda x: x < 3).collect() - [3, 4, 5, 6, 7, 8, 9] + >>> flu(range(10)).drop_while(lambda x: x < 3).to_list() + [3, 4, 5, 6, 7, 8, 9] """ return Fluent(dropwhile(predicate, self._iterator)) @@ -602,8 +610,8 @@ def chunk(self, n: int) -> "Fluent[List[T]]": if the iterable is not evenly divisiible by *n*, the final list will be shorter - >>> flu(range(10)).chunk(3).collect() - [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] + >>> flu(range(10)).chunk(3).to_list() + [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] """ def _impl() -> Generator[List[T], None, None]: @@ -629,23 +637,23 @@ def flatten( Strings are treated as non-iterable for convenience. set iterate_string=True to change that behavior. - >>> flu([[0, 1, 2], [3, 4, 5]]).flatten().collect() - [0, 1, 2, 3, 4, 5] + >>> flu([[0, 1, 2], [3, 4, 5]]).flatten().to_list() + [0, 1, 2, 3, 4, 5] - >>> flu([[0, [1, 2]], [[3, 4], 5]]).flatten().collect() - [0, [1, 2], [3, 4], 5] + >>> flu([[0, [1, 2]], [[3, 4], 5]]).flatten().to_list() + [0, [1, 2], [3, 4], 5] - >>> flu([[0, [1, 2]], [[3, 4], 5]]).flatten(depth=2).collect() - [0, 1, 2, 3, 4, 5] + >>> flu([[0, [1, 2]], [[3, 4], 5]]).flatten(depth=2).to_list() + [0, 1, 2, 3, 4, 5] - >>> flu([[0, [1, 2]], [[3, 4], 5]]).flatten(depth=2).collect() - [0, 1, 2, 3, 4, 5] + >>> flu([[0, [1, 2]], [[3, 4], 5]]).flatten(depth=2).to_list() + [0, 1, 2, 3, 4, 5] - >>> flu([1, (2, 2), 4, [5, (6, 6, 6)]]).flatten(base_type=tuple).collect() - [1, (2, 2), 4, 5, (6, 6, 6)] + >>> flu([1, (2, 2), 4, [5, (6, 6, 6)]]).flatten(base_type=tuple).to_list() + [1, (2, 2), 4, 5, (6, 6, 6)] - >>> flu([[2, 0], 'abc', 3, [4]]).flatten(iterate_strings=True).collect() - [2, 0, 'a', 'b', 'c', 3, 4] + >>> flu([[2, 0], 'abc', 3, [4]]).flatten(iterate_strings=True).to_list() + [2, 0, 'a', 'b', 'c', 3, 4] """ # TODO(OR): Reimplement with strong types @@ -672,13 +680,13 @@ def walk(node: Any, level: int) -> Generator[T, None, None]: def denormalize(self: "Fluent[SupportsIteration[Any]]", iterate_strings: bool = False) -> "Fluent[Tuple[Any, ...]]": """Denormalize iterable components of each record - >>> flu([("abc", [1, 2, 3])]).denormalize().collect() + >>> flu([("abc", [1, 2, 3])]).denormalize().to_list() [('abc', 1), ('abc', 2), ('abc', 3)] - >>> flu([("abc", [1, 2])]).denormalize(iterate_strings=True).collect() + >>> flu([("abc", [1, 2])]).denormalize(iterate_strings=True).to_list() [('a', 1), ('a', 2), ('b', 1), ('b', 2), ('c', 1), ('c', 2)] - >>> flu([("abc", [])]).denormalize().collect() + >>> flu([("abc", [])]).denormalize().to_list() [] """ @@ -717,16 +725,16 @@ def window(self, n: int, step: int = 1, fill_value: Any = None) -> "Fluent[Tuple If the length of the iterable does not evenly divide by the *step* the final output is padded with *fill_value* - >>> flu(range(5)).window(3).collect() + >>> flu(range(5)).window(3).to_list() [(0, 1, 2), (1, 2, 3), (2, 3, 4)] - >>> flu(range(5)).window(n=3, step=2).collect() + >>> flu(range(5)).window(n=3, step=2).to_list() [(0, 1, 2), (2, 3, 4)] - >>> flu(range(9)).window(n=4, step=3).collect() + >>> flu(range(9)).window(n=4, step=3).to_list() [(0, 1, 2, 3), (3, 4, 5, 6), (6, 7, 8, None)] - >>> flu(range(9)).window(n=4, step=3, fill_value=-1).collect() + >>> flu(range(9)).window(n=4, step=3, fill_value=-1).to_list() [(0, 1, 2, 3), (3, 4, 5, 6), (6, 7, 8, -1)] """ @@ -777,11 +785,11 @@ def tee(self, n: int = 2) -> "Fluent[Fluent[T]]": anywhere else; otherwise, the iterable could get advanced without the tee objects being informed - >>> copy1, copy2 = flu(range(5)).tee() - >>> copy1.sum() - 10 - >>> copy2.collect() - [0, 1, 2, 3, 4] + >>> copy1, copy2 = flu(range(5)).tee() + >>> copy1.sum() + 10 + >>> copy2.to_list() + [0, 1, 2, 3, 4] """ return Fluent((Fluent(x) for x in tee(self, n))) @@ -796,7 +804,7 @@ class flu(Fluent[T]): .filter(lambda x: x % 3 == 0) .chunk(3) .take(2) - .collect() + .to_list() ) [[0, 9, 36], [81, 144, 225]] """ diff --git a/src/tests/test_flu.py b/src/tests/test_flu.py index c40d2e0..1f5daf7 100644 --- a/src/tests/test_flu.py +++ b/src/tests/test_flu.py @@ -12,6 +12,10 @@ def test_collect(): assert flu(range(3)).collect(n=2) == [0, 1] +def test_to_list(): + assert flu(range(3)).to_list() == [0, 1, 2] + + def test___getitem__(): assert flu(range(3))[1] == 1 assert flu(range(3))[1:].collect() == [1, 2] From 2125dbdd723909efbce25ea39431b033a0b650c2 Mon Sep 17 00:00:00 2001 From: Oliver Rice Date: Tue, 12 Oct 2021 04:07:26 -0500 Subject: [PATCH 2/2] remove redundant list(list(x)) -> list(x) --- src/flupy/fluent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/flupy/fluent.py b/src/flupy/fluent.py index adab190..eb22354 100644 --- a/src/flupy/fluent.py +++ b/src/flupy/fluent.py @@ -324,7 +324,7 @@ def shuffle(self) -> "Fluent[T]": >>> flu([3,6,1]).shuffle().to_list() [6, 1, 3] """ - dat: List[T] = list(self.to_list()) + dat: List[T] = self.to_list() return Fluent(sample(dat, len(dat))) def group_by(