projectnessie · snazy · Nov 11, 2024 · Nov 11, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -40,6 +40,8 @@ as necessary. Empty sections will not end in the release notes.
 
 ### Fixes
 
+- GC: Consider referenced statistics (and partition statistics) files as 'live'.
+
 ### Commits
 
 ## [0.99.0] Release (2024-09-26)

diff --git a/gc/gc-iceberg/src/main/java/org/projectnessie/gc/iceberg/IcebergContentToFiles.java b/gc/gc-iceberg/src/main/java/org/projectnessie/gc/iceberg/IcebergContentToFiles.java
@@ -32,7 +32,9 @@
 import org.apache.iceberg.ManifestFile;
 import org.apache.iceberg.ManifestReaderUtil;
 import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.PartitionStatisticsFile;
 import org.apache.iceberg.Snapshot;
+import org.apache.iceberg.StatisticsFile;
 import org.apache.iceberg.TableMetadata;
 import org.apache.iceberg.TableMetadataParser;
 import org.apache.iceberg.io.CloseableIterable;
@@ -137,11 +139,26 @@ private Stream<FileReference> extractTableFiles(ContentReference contentReferenc
     Snapshot snapshot =
         snapshotId < 0L ? tableMetadata.currentSnapshot() : tableMetadata.snapshot(snapshotId);
 
-    Map<Integer, PartitionSpec> specsById = tableMetadata.specsById();
-
     Stream<StorageUri> allFiles = elementaryUrisFromSnapshot(snapshot, contentReference);
 
     if (snapshot != null) {
+      long effectiveSnapshotId = snapshot.snapshotId();
+      allFiles =
+          Stream.concat(
+              allFiles,
+              tableMetadata.statisticsFiles().stream()
+                  .filter(s -> s.snapshotId() == effectiveSnapshotId)
+                  .map(StatisticsFile::path)
+                  .map(StorageUri::of));
+      allFiles =
+          Stream.concat(
+              allFiles,
+              tableMetadata.partitionStatisticsFiles().stream()
+                  .filter(s -> s.snapshotId() == effectiveSnapshotId)
+                  .map(PartitionStatisticsFile::path)
+                  .map(StorageUri::of));
+
+      Map<Integer, PartitionSpec> specsById = tableMetadata.specsById();
       allFiles =
           Stream.concat(
               allFiles,