-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathh3_aggregation.py
162 lines (137 loc) · 8.23 KB
/
h3_aggregation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# ArcPy Script to convert iwp shp files to sumerised table for aggregated counts
# INPUT : iwp shape files duplicates marked <staging_du=1>, h3_tessellation_layer
# OUTPUT : Summery table with count for each Hexagonal cell in the h3_tessellation_layer
# saved as csv file in the data directory with batch name prefixed
# For FINAL output csv files need to be joied on GRID ID with aggregation
# Code is written to accumilate the counts for each run
# i.e. if you do not give a new name for Summery table it will start updating the existing table
# When executing same shp file should not be processed twice
import arcpy
import os
arcpy.env.workspace = None
# Set the workspace and output directory
#workspace = arcpy.env.workspace # Use the current workspace
# An error was generated when the name of the file was longer due to a known issue in ArcGIS when executing SummerizeWithin
data_directory="D:/IWP_ag3/"
data_batch="arc_11"
output_directory = "D:/"
gdb_name = "ag.gdb"
arcpy.CreateFileGDB_management(output_directory, gdb_name)
output_directory = "D:/ag.gdb/" # Where to save the results
# Create a new geodatabase if it doesn't exist
#if not arcpy.Exists(output_directory):
# arcpy.CreateFileGDB_management(os.path.dirname(output_directory), os.path.basename(output_directory))
# Set the workspace to the output directory
arcpy.env.workspace = output_directory
# Read the h3 hexagonal tessalation shp file
#h3_tessellation_layer = "D:/IWP_ag2/arctic_h3hex_res5.shp"
h3_tessellation_layer = os.path.join(data_directory,"h3_hex/arctic_h3hex_res5.shp")
# Set the Directory containing polygon shapefiles to process
#shp_directory = "D:/IWP_ag2/arc_test5/"
shp_directory = os.path.join(data_directory,data_batch)
# Set the name of summery table new name to be given if you want a new run
sum_table = "tt"
# Check if the summary table exists, if not, create it
if not arcpy.Exists(sum_table):
print("New table created for Summary data")
try:
arcpy.CreateTable_management(output_directory, sum_table)
arcpy.AddField_management(sum_table, "GRID_ID", "TEXT", field_length=50)
arcpy.AddField_management(sum_table, "Sum_Cnt", "LONG")
except arcpy.ExecuteError:
print(arcpy.GetMessages(2)) # Print any error messages
else:
print("Summary table already exists and will be used to add counts")
# Print all field names in the summary table
# Just check and see if we have the fields required for the summery
# Can commnet out later
print("Field names in summary table:")
for field in arcpy.ListFields(sum_table):
print(field.name)
# Set the tessellation layer to for the summerized aggregations
#h3_tessellation_layer = "arcticHexRes5" # Name of the tessellation layer already existing
######################################################
# Loop through each polygon shapefile in the directory
######################################################
#shp_directory = "D:/IWP_ag/arc_test5/" # Directory containing polygon shapefiles
for filename in os.listdir(shp_directory):
temp_sum = os.path.join(output_directory, "ts") #temp summary layer to keep the sumerize within
if filename.endswith(".shp"):
polygon_shp = os.path.join(shp_directory, filename)
try: # Execute SummarizeWithin for the layer
if arcpy.Exists("input_features_lyr"): # remove layer if already exisits
arcpy.Delete_management("input_features_lyr")
print("old input layer deleted")
if arcpy.Exists("centroid_points_lyr"): # remove layer if already exisits
arcpy.Delete_management("centroid_points_lyr")
print("old centroid point layer deleted")
# Make new feature to count the polygons as arcGIS only allows SUM for aggregation
# A new iwp cnt feature is added with "1" and later SUM ed
# Also a check is done to see if the polygon is marked as a duplicate (not taken)
arcpy.management.MakeFeatureLayer(polygon_shp, "input_features_lyr", "staging_du <> 1")
new_field_name = "iwp_cnt"
existing_fields = [field.name for field in arcpy.ListFields("input_features_lyr")]
if new_field_name not in existing_fields:
# Add the new a field to the attribute table to be used for counting the polygons using SUM
arcpy.AddField_management("input_features_lyr", new_field_name, "SHORT")
# Calculate the new field with the value 1 for all rows
arcpy.CalculateField_management("input_features_lyr", new_field_name, 1, "PYTHON3")
# Create a new point feature layer with centroids so the cells will not count same polygon more than once
arcpy.FeatureToPoint_management("input_features_lyr", "centroid_points_lyr", "INSIDE")
arcpy.analysis.SummarizeWithin(h3_tessellation_layer, "centroid_points_lyr", temp_sum, "ONLY_INTERSECTING", [["iwp_cnt", "SUM"]])
print(f"SummarizeWithin done for {filename}")
# Print data collected from SummerizeWithin
# Just check and see if we have the fields required for the summary later
# Can remove later
result = arcpy.GetCount_management(temp_sum)
polygon_count = int(result.getOutput(0))
print("Number of H3 Cells in summary:", polygon_count)
arcpy.Delete_management("input_features_lyr")
arcpy.Delete_management("centroid_points_lyr")
print("tmp layers removed")
except arcpy.ExecuteError:
print(f"Failed to execute SummarizeWithin for {filename}:")
print(arcpy.GetMessages())
# Update the summary table with the results from the current shapefile
try:
# Reading the cummery data for the current layer
with arcpy.da.SearchCursor(temp_sum, ["GRID_ID", "Sum_iwp_cnt"]) as cursor:
print("Field names:", cursor.fields)
# Reading the summary data table to aggregate into
with arcpy.da.UpdateCursor(sum_table, ["GRID_ID", "Sum_Cnt"]) as update_cursor:
print("Field names:", update_cursor.fields) # Print field names
for row in cursor:
print(u'GRID_ID:{0}, iwp_cnt:{1},'.format(row[0], row[1]))
grid_id, sum_cnt = row
# Check if the H3_ID already exists in the summary table
row_found = False
for update_row in update_cursor:
if update_row[0] == grid_id:
update_row[1] += sum_cnt
update_cursor.updateRow(update_row)
row_found = True
print("Updated:",grid_id, "with+:",sum_cnt)
break
if not row_found: # if not create new entry and add the row
with arcpy.da.InsertCursor(sum_table, ["GRID_ID", "Sum_Cnt"]) as insert_cursor:
try:
insert_cursor.insertRow((grid_id, sum_cnt))
except Exception as e:
print(f"Error inserting row: {e}")
#insert_cursor.insertRow((grid_id, sum_cnt))
except arcpy.ExecuteError:
print(arcpy.GetMessages())
print(f"SummarizeWithin completed for {filename}")
# Clean up temporary layer
arcpy.Delete_management(temp_sum)
#sum_table_path = os.path.join(output_directory, "sum_table.csv")
csv_file = os.path.join(data_directory,data_batch+".csv")
arcpy.management.CopyRows(sum_table, csv_file)
print("===========================================================")
print("SummarizeWithin for all shapefiles completed. Output saved ")
print("Final Counts summary table.")
with arcpy.da.SearchCursor(sum_table, ["GRID_ID", "Sum_Cnt"]) as search_cursor:
print("Field names:", search_cursor.fields) # Print field names
for row in search_cursor:
print(u'GRID_ID:{0}, iwp_cnt:{1},'.format(row[0], row[1]))
arcpy.env.workspace = None