From 0e8bfc941908486bae4cdc68a49be8727d081e2e Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Fri, 20 Dec 2024 15:33:40 -0600 Subject: [PATCH] fix: update Chicago Taxi Dataset URL to Zenodo --- docs/getting-started/10-minutes-to-awkward-array.md | 6 +++--- docs/user-guide/how-to-examine-single-item.md | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/getting-started/10-minutes-to-awkward-array.md b/docs/getting-started/10-minutes-to-awkward-array.md index c380224b28..37fe5638bb 100644 --- a/docs/getting-started/10-minutes-to-awkward-array.md +++ b/docs/getting-started/10-minutes-to-awkward-array.md @@ -24,7 +24,7 @@ In this guide, we'll look at how to manipulate a jagged dataset to plot taxi rou ## Loading the dataset -Our dataset is formatted as a 611 MB [Apache Parquet](https://parquet.apache.org/) file, provided [here](https://pivarski-princeton.s3.amazonaws.com/chicago-taxi.parquet). Alongside JSON, and raw buffers, Awkward can also read Parquet files and Arrow tables. +Our dataset is formatted as a 611 MB [Apache Parquet](https://parquet.apache.org/) file, provided [here](https://zenodo.org/records/14537442/files/chicago-taxi.parquet). Alongside JSON, and raw buffers, Awkward can also read Parquet files and Arrow tables. Given that this file is so large, let's first look at the *metadata* with `ak.metadata_from_parquet` to see what we're working with: @@ -43,7 +43,7 @@ import numpy as np import awkward as ak metadata = ak.metadata_from_parquet( - "https://pivarski-princeton.s3.amazonaws.com/chicago-taxi.parquet" + "https://zenodo.org/records/14537442/files/chicago-taxi.parquet" ) ``` @@ -59,7 +59,7 @@ There are a lot of different columns here (`trip.sec`, `trip.begin.lon`, `trip.p ```{code-cell} ipython3 taxi = ak.from_parquet( - "https://pivarski-princeton.s3.amazonaws.com/chicago-taxi.parquet", + "https://zenodo.org/records/14537442/files/chicago-taxi.parquet", row_groups=[0], columns=["trip.km", "trip.begin.l*", "trip.end.l*", "trip.path.*"], ) diff --git a/docs/user-guide/how-to-examine-single-item.md b/docs/user-guide/how-to-examine-single-item.md index 4156ebdb77..fa763f75c1 100644 --- a/docs/user-guide/how-to-examine-single-item.md +++ b/docs/user-guide/how-to-examine-single-item.md @@ -27,7 +27,7 @@ First, let's load the dataset using the {func}`ak.from_parquet` function. We wil ```{code-cell} ipython3 import awkward as ak -url = "https://pivarski-princeton.s3.amazonaws.com/chicago-taxi.parquet" +url = "https://zenodo.org/records/14537442/files/chicago-taxi.parquet" taxi = ak.from_parquet( url, row_groups=[0],