Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added more unit and integration tests. #12

Merged
merged 7 commits into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
global-exclude *test.py
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ Feel free to do any analysis you wish. For example:
.plot()
)
```
![Coal vs Wind in the US since 1940](demo.png)
![Coal vs Wind in the US since 1940](https://raw.githubusercontent.com/alxmrs/dask-ee/main/demo.png)

There are a few other useful things you can do.

Expand Down Expand Up @@ -81,6 +81,11 @@ df.head()
Contributions are welcome. A good way to start is to check out open [issues](https://github.com/alxmrs/dask-ee/issues)
or file a new one. We're happy to review pull requests, too.

Before writing code, please install the development dependencies (after cloning the repo):
```shell
pip install -e ".[dev]"
```

## License
```
Copyright 2024 Alexander S Merose
Expand Down
14 changes: 7 additions & 7 deletions dask_ee/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@
}


# TODO(#4): Support 'auto' chunks, where we calculate the maximum allowed page size given the number of
# bytes in each row.
def read_ee(
fc: t.Union[ee.FeatureCollection, str],
chunksize: t.Union[int, t.Literal['auto']] = 5_000,
Expand All @@ -41,25 +39,27 @@ def read_ee(
Returns:
A dask DataFrame with paged Google Earth Engine data.
"""
# TODO(#4): Support 'auto' chunks, where we calculate the maximum allowed page size given the number of
# bytes in each row.
if chunksize == 'auto':
raise NotImplementedError('Auto chunksize is not implemented yet!')

if isinstance(fc, str):
fc = ee.FeatureCollection(fc)

if chunksize == 'auto':
raise NotImplementedError('Auto chunksize is not implemented yet!')

# Make all the getInfo() calls at once, up front.
fc_size, all_info = ee.List([fc.size(), fc.limit(0)]).getInfo()

columns = {'geo': 'Json'}
columns.update(all_info['columns'])
del columns['system:index']
if 'system:index' in columns:
del columns['system:index']

divisions = tuple(range(0, fc_size, chunksize))

# TODO(#5): Compare `toList()` to other range operations, like getting all index IDs via `getInfo()`.
pages = [ee.FeatureCollection(fc.toList(chunksize, i)) for i in divisions]
# Get the remainder, if it exists. `io_chunks` are not likely to evenly partition the data.
# Get the remainder, if it exists. `chunksize` is not likely to evenly partition the data.
d, r = divmod(fc_size, chunksize)
if r != 0:
pages.append(ee.FeatureCollection(fc.toList(r, d)))
Expand Down
58 changes: 48 additions & 10 deletions dask_ee/read_integrationtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,28 +23,66 @@ def setUpClass(cls):
ee.Initialize()

def test_reads_dask_dataframe(self):
fc = ee.FeatureCollection("WRI/GPPD/power_plants")
ddf = dask_ee.read_ee(fc)
fc = ee.FeatureCollection('WRI/GPPD/power_plants')
df = dask_ee.read_ee(fc)

head = ddf.head()
columns = ddf.columns
head = df.head()
columns = df.columns

self.assertIsNotNone(ddf)
self.assertIsNotNone(df)
self.assertIsNotNone(head)
self.assertIsInstance(ddf, dd.DataFrame)
self.assertEqual(ddf.compute().shape, (28_664, 23))
self.assertIsInstance(df, dd.DataFrame)
self.assertEqual(df.compute().shape, (28_664, 23))

print(columns)
print(head)

def test_works_with_defined_features(self):
# Make a list of Features.
features = [
ee.Feature(
ee.Geometry.Rectangle(30.01, 59.80, 30.59, 60.15),
{'name': 'Voronoi'},
),
ee.Feature(ee.Geometry.Point(-73.96, 40.781), {'name': 'Thiessen'}),
ee.Feature(ee.Geometry.Point(6.4806, 50.8012), {'name': 'Dirichlet'}),
]

fc = ee.FeatureCollection(features)

df = dask_ee.read_ee(fc)

self.assertEqual(list(df.columns), ['geo', 'name'])

def test_works_with_a_single_feature_in_fc(self):
from_geom = ee.FeatureCollection(ee.Geometry.Point(16.37, 48.225))

df = dask_ee.read_ee(from_geom)

self.assertEqual(list(df.columns), ['geo'])
self.assertEqual(df.compute().shape, (1, 1))

def test_can_create_random_points(self):
# Define an arbitrary region in which to compute random points.
region = ee.Geometry.Rectangle(-119.224, 34.669, -99.536, 50.064)

# Create 1000 random points in the region.
random_points = ee.FeatureCollection.randomPoints(region)

# Note: these random points have no system:index!
df = dask_ee.read_ee(random_points)

self.assertEqual(list(df.columns), ['geo'])
self.assertEqual(df.compute().shape, (1000, 1))

def test_prof__read_ee(self):
fc = ee.FeatureCollection("WRI/GPPD/power_plants")
fc = ee.FeatureCollection('WRI/GPPD/power_plants')
with cProfile.Profile() as pr:
_ = dask_ee.read_ee(fc)

# Modified version of `pr.print_stats()`.
pstats.Stats(pr).sort_stats("cumtime").print_stats()
pstats.Stats(pr).sort_stats('cumtime').print_stats()


if __name__ == "__main__":
if __name__ == '__main__':
unittest.main()
6 changes: 6 additions & 0 deletions dask_ee/read_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ def test_can_import_read_op(self):
except ModuleNotFoundError:
self.fail('Cannot import `read_ee` function.')

def test_rejects_auto_chunks(self):
import dask_ee

with self.assertRaises(NotImplementedError):
dask_ee.read_ee('WRI/GPPD/power_plants', 'auto')


if __name__ == '__main__':
unittest.main()
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ tests = [
"pytest",
"pyink",
]
dev = [
"dask-ee[tests]",
"build",
]

[project.urls]
Homepage = "https://github.com/alxmrs/dask-ee"
Expand Down
Loading