-
Notifications
You must be signed in to change notification settings - Fork 0
/
metrics.sql
414 lines (370 loc) · 12.7 KB
/
metrics.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
-- categorises parcel area vs land use area
SELECT CASE
WHEN land_parcel_area > sum_lua THEN 'Parcel > LU'
WHEN land_parcel_area = sum_lua THEN 'Equal'
WHEN land_parcel_area < sum_lua THEN 'Parcel < LU'
ELSE 'Other'
END AS Area_comparison,
COUNT(1)
FROM
(SELECT hapar_id,
SUM(land_use_area) OVER (PARTITION BY hapar_id,
year) AS sum_lua,
land_parcel_area,
year
FROM temp_permanent) foo
GROUP BY 1
ORDER BY count DESC;
Equal 1,542,209
Parcel > LU 349,954
Parcel < LU 6,112
(10809/1959847)*100 = 0.55% <-- permanent
Equal 127,451
Parcel > LU 39,547
Parcel < LU 5,085
(5440/180233)*100 = 3.01% <-- seasonal
--* finds and sums difference in SINGLE overclaims where bps_claimed_area > land_parcel_area for joined table
SELECT hapar_id, YEAR, total_claimed_area - owner_land_parcel_area AS diff
FROM
(SELECT hapar_id,
YEAR,
owner_land_parcel_area,
user_land_parcel_area,
owner_bps_claimed_area + user_bps_claimed_area AS total_claimed_area
FROM joined) foo
WHERE total_claimed_area > owner_land_parcel_area
OR total_claimed_area > user_land_parcel_area
ORDER BY diff DESC;
-- categorises llo flags based on matches or no matches
SELECT CASE
WHEN perm_llo = 'N'
AND seas_llo = 'Y' Then 'N-Y no match'
WHEN perm_llo = 'Y'
AND seas_llo = 'N' THEN 'Y-N no match'
WHEN perm_llo = 'N'
AND seas_llo = 'N' THEN 'N match'
WHEN perm_llo = 'Y'
AND seas_llo = 'Y' THEN 'Y match'
WHEN perm_llo = seas_llo THEN 'Match'
ELSE 'Other'
END as LLO_flag_compare,
Count(1)
FROM
(SELECT hapar_id,
year,
perm.land_leased_out AS perm_llo,
seas.land_leased_out AS seas_llo
FROM temp_permanent AS perm
INNER JOIN temp_seasonal AS seas USING (hapar_id,
land_use,
land_use_area,
year)
GROUP BY hapar_id,
year,
perm_llo,
seas_llo) foo
GROUP BY 1
ORDER BY LLO_flag_compare
-- Where total land use area matches but number of claims doesnt ON JOIN
SELECT DISTINCT perm_s.hapar_id
FROM (
( SELECT hapar_id,
SUM(land_use_area) lua_per,
COUNT(*) AS cnt_per,
land_parcel_area
FROM temp_permanent
WHERE YEAR = 2018
GROUP BY hapar_id,
land_parcel_area) perm_s
INNER JOIN
( SELECT hapar_id,
SUM(land_use_area) lua_sea,
COUNT(*) AS cnt_sea,
land_parcel_area
FROM temp_seasonal
WHERE YEAR = 2018
GROUP BY hapar_id,
land_parcel_area) seas_s ON seas_s.hapar_id = perm_s.hapar_id
AND lua_sea = lua_per)
WHERE cnt_per != cnt_sea
--See changes made by count
SELECT change_note,
COUNT(*)
FROM combine
GROUP BY change_note
ORDER BY count DESC
All code moved to removed.sql
--TODO This code finds digi_area used in join aka proportion of good area for one year
-- 5,123,636/6,479,357 hectares = 79.0% of 2017 parcels (area)
SELECT SUM(digi_area) FROM
(SELECT hapar_id, digi_area
FROM ladss.snapshot_2017
JOIN final AS l USING (hapar_id)
WHERE l.YEAR = 2016
GROUP BY hapar_id, digi_area) foo
-- 5,129,263/6,476,784 hectares = 79.2% of 2018 parcels (area)
SELECT SUM(digi_area) FROM
(SELECT hapar_id, digi_area
FROM ladss.snapshot_2018
JOIN final AS l USING (hapar_id)
WHERE l.YEAR = 2017
GROUP BY hapar_id, digi_area) foo
-- 5,073,818/6,479,879 hectares = 78.3% of 2019 parcels (area)
SELECT SUM(digi_area) FROM
(SELECT hapar_id, digi_area
FROM ladss.snapshot_2019
JOIN final AS l USING (hapar_id)
WHERE l.YEAR = 2018
GROUP BY hapar_id, digi_area) foo
--* ORIGINAL DATA
-- 5,213,085/6,479,357 hectares = 80.5% of 2017 parcels (area)
SELECT SUM(digi_area)
FROM
(SELECT hapar_id,
digi_area
FROM ladss.snapshot_2017
JOIN
(SELECT DISTINCT hapar_id
FROM
(SELECT DISTINCT hapar_id
FROM rpid.saf_permanent_land_parcels_deliv20190911
WHERE YEAR = 2016
UNION SELECT DISTINCT hapar_id
FROM rpid.saf_seasonal_land_parcels_deliv20190911
WHERE YEAR = 2016) foo) bar USING (hapar_id)
GROUP BY hapar_id,
digi_area) foobar
-- 5,236,722/6,476,784 hectares = 80.9% of 2018 parcels (area)
SELECT SUM(digi_area)
FROM
(SELECT hapar_id,
digi_area
FROM rpid.lpis_land_parcels_2018_jhi_deliv20190911
JOIN
(SELECT DISTINCT hapar_id
FROM
(SELECT DISTINCT hapar_id
FROM rpid.saf_permanent_land_parcels_deliv20190911
WHERE YEAR = 2017
UNION SELECT DISTINCT hapar_id
FROM rpid.saf_seasonal_land_parcels_deliv20190911
WHERE YEAR = 2017) foo) bar USING (hapar_id)
GROUP BY hapar_id,
digi_area) foobar
-- 5,226,963/6,479,879 hectares = 80.7% of 2019 parcels (area)
SELECT SUM(digi_area)
FROM
(SELECT hapar_id,
digi_area
FROM rpid.lpis_land_parcels_2019_jhi_deliv20190911
JOIN
(SELECT DISTINCT hapar_id
FROM
(SELECT DISTINCT hapar_id
FROM rpid.saf_permanent_land_parcels_deliv20190911
WHERE YEAR = 2018
UNION SELECT DISTINCT hapar_id
FROM rpid.saf_seasonal_land_parcels_deliv20190911
WHERE YEAR = 2018) foo) bar USING (hapar_id)
GROUP BY hapar_id,
digi_area) foobar
-- 5,123,636/5,213,085 = 98.3% 2017
-- 5,130,084/5,236,722 = 98.0% 2018
-- 5,074,587/5,226,963 = 97.1% 2019
--TODO how many have at least one owner or user
-- 445,203/522,743 = 85.2% of 2017 parcels have at least one claim associated
-- 444,518/525,621 = 84.6% of 2018 parcels have at least one claim associated
-- 440,026/526,899 = 83.5% of 2019 parcels have at least one claim associated
SELECT hapar_id
FROM rpid.lpis_land_parcels_2019_jhi_deliv20190911
EXCEPT
SELECT hapar_id
FROM final AS l
WHERE l.YEAR = 2018
-- ORIGINAL DATA
-- 448,840/522,743 = 85.9% of original data
-- 449,556/525,621 = 85.5% of original data
-- 451,774/526,899 = 85.7% of original data
SELECT hapar_id
FROM rpid.lpis_land_parcels_2018_jhi_deliv20190911
EXCEPT
(SELECT hapar_id
FROM rpid.saf_permanent_land_parcels_deliv20190911
WHERE YEAR = 2017
UNION SELECT hapar_id
FROM rpid.saf_seasonal_land_parcels_deliv20190911
WHERE YEAR = 2017)
-- 445,203/448,840 = 99.2%
-- 444,518/449,556 = 98.9%
-- 440,026/451,774 = 97.4%
--TODO how many time does digi_area match land_parcel_area within 0.1 and 0.01 threshold
SELECT CASE
WHEN diff < 0.01 THEN '< 0.01 difference'
WHEN diff > 0.01
AND diff < 0.1 THEN '< 0.1 difference'
ELSE '> 0.1 difference'
END AS digi_area_match,
COUNT(1)
FROM
(SELECT hapar_id,
land_parcel_area,
digi_area,
ABS(land_parcel_area - digi_area) AS diff
FROM
(SELECT hapar_id,
land_parcel_area
FROM ladss.saf_iacs_2016_2017_2018
WHERE YEAR = 2016
GROUP BY hapar_id,
land_parcel_area) foo
JOIN ladss.snapshot_2017 USING (hapar_id)) bar
GROUP BY 1
ORDER BY count DESC
-- 2018
SELECT CASE
WHEN diff < 0.01 THEN '< 0.01 difference'
WHEN diff > 0.01
AND diff < 0.1 THEN '< 0.1 difference'
ELSE '> 0.1 difference'
END AS digi_area_match,
COUNT(1)
FROM
(SELECT hapar_id,
land_parcel_area,
digi_area,
ABS(land_parcel_area - digi_area) AS diff
FROM
(SELECT hapar_id,
land_parcel_area
FROM ladss.saf_iacs_2016_2017_2018
WHERE YEAR = 2017
GROUP BY hapar_id,
land_parcel_area) foo
JOIN ladss.snapshot_2018 USING (hapar_id)) bar
GROUP BY 1
ORDER BY count DESC
--TODO count of user businesses map
--TODO underclaims / overclaims
SELECT hapar_id,
YEAR,
sum,
saf.land_parcel_area,
ABS(sum - saf.land_parcel_area)
FROM
(SELECT hapar_id,
YEAR,
sum(all_bps)
FROM
(SELECT hapar_id,
YEAR,
CASE
WHEN owner_bps_claimed_area IS NULL THEN user_bps_claimed_area
WHEN user_bps_claimed_area IS NULL THEN owner_bps_claimed_area
ELSE owner_bps_claimed_area + user_bps_claimed_area
END AS all_bps
FROM ladss.saf_iacs_2016_2017_2018) foo
GROUP BY hapar_id,
YEAR) bar
JOIN ladss.saf_iacs_2016_2017_2018 USING (hapar_id,
year)
JOIN ladss.saf_iacs_2016_2017_2018 AS saf USING (hapar_id,
year)
WHERE sum > saf.land_parcel_area
GROUP BY hapar_id,
YEAR,
sum,
saf.land_parcel_area
--TODO number of landuses per fid
--TODO types of land use categories
--TODO number seasonal renters
Specific problems:
Remaining problem hapars: 999, 1442, 1970, 2597, 40016
-- good example to catch PGRS - RGR subdivision by user 369777, 1144
-- problems joins 40016, 401109 (two businesses claiming same piece)
-- check hapars 1144,1146,1725,1728
212811 40 ha of building
1923 is good example of three renters who are leasing out a portion of their leased land in same year
--! Look at these Doug
SELECT *
FROM temp_seasonal
WHERE hapar_id = 85859 -- so many claims? so many businesses? also these: 83863, 242798
--! look at spatial
--! should I match non_saf owners where other land_use exists for that year? 224600, 212811, 178656, 229246, 230767
--* SPATIAL MATCHES
-- count of businesses per hapar_id (2016)
SELECT hapar_id,
count(distinct businesses)
FROM
(SELECT hapar_id,
owner_habus_id AS businesses
FROM ladss.saf_iacs_2016_2017_2018
WHERE owner_habus_id IS NOT NULL
AND year = 2016
UNION SELECT hapar_id,
user_habus_id AS businesses
FROM ladss.saf_iacs_2016_2017_2018
WHERE user_habus_id IS NOT NULL
AND year = 2016) foo
GROUP BY hapar_id
-- count of landused per hapar_id (2016)
SELECT hapar_id,
count(distinct lu) INTO TABLE lu_per_hapar_2016
FROM
(SELECT hapar_id,
owner_land_use AS lu
FROM ladss.saf_iacs_2016_2017_2018
WHERE owner_land_use IS NOT NULL
AND year = 2016
UNION SELECT hapar_id,
user_land_use AS lu
FROM ladss.saf_iacs_2016_2017_2018
WHERE user_land_use IS NOT NULL
AND year = 2016) foo
GROUP BY hapar_id
-- count of landuses per field
--TODO COMMONS --------------------------------------------------------
--TODO hapar_id = 96993 has cg_hahol_id <> hahol_id (2016)
-- finds difference between bps_eligible_area and total payment_regions
SELECT cg_hahol_id,
hapar_id,
YEAR,
digitised_area,
excluded_land_area,
bps_eligible_area,
region_total,
bps_eligible_area - region_total AS diff
FROM
(SELECT cg_hahol_id,
hapar_id,
YEAR,
digitised_area,
bps_eligible_area,
excluded_land_area,
payment_region_1 + payment_region_2 + payment_region_3 AS region_total
FROM rpid.common_grazing_lpid_detail_deliv20190911) foo
WHERE bps_eligible_area <> region_total
-- Groups parcels by cg_hahol_id
--CREATE TABLE ladss.snapshot_2017_by_cg_holding AS
SELECT hahol_id,
geom,
ST_AREA(geom)
FROM
(SELECT hahol_id,
ST_Collect(geom) AS geom
FROM ladss.snapshot_2017
GROUP BY hahol_id) bar
-- finds differences between areas
SELECT hapar_id,
cg_hahol_id,
hahol_id,
start_date,
ROUND(CAST(ST_Area(geom) * 0.0001 AS NUMERIC), 2) AS calced_has,
digitised_area,
ABS(ROUND(CAST(ST_Area(geom) * 0.0001 AS NUMERIC), 2) - digitised_area) AS digi_diff,
payment_region_1 + payment_region_2 + payment_region_3 AS sum_pay_regs,
bps_eligible_area,
excluded_land_area
FROM rpid.snapshot_2017_0417_peudonymised
JOIN rpid.common_grazing_lpid_detail_deliv20190911 USING (hapar_id)
WHERE year = 2016
AND ABS(ROUND(CAST(ST_Area(geom) * 0.0001 AS NUMERIC), 2) - digitised_area) <> 0