-
Notifications
You must be signed in to change notification settings - Fork 1
/
count_film_images.pl
114 lines (93 loc) · 3.04 KB
/
count_film_images.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/perl
# nbt, 2019-02-14
# Count film images and aggregate various numbers
# - grand total
# - images on film only
# - images from films starting with a certain notation
use strict;
use warnings;
use utf8;
use Data::Dumper;
##use Data::Dumper::Names;
use JSON;
use Path::Tiny;
$Data::Dumper::Sortkeys = 1;
my $film_root = path('/pm20/film');
my $filmdata_root = path('../data/filmdata');
my $img_count_file = $filmdata_root->child('img_count.json');
my $missing_file = $filmdata_root->child('missing.json');
# regex for counting images with start_sig
my $special_qr = qr/(B42|B42a)/; # Indien
##my $special_qr = qr/E86 /; # Argentinien
my %img_count;
my %count;
my %missing;
my %conf = (
h1 => [qw/ co sh wa /],
h2 => [qw/ co sh wa /],
k1 => [qw/ sh /],
k2 => [qw/ sh /],
);
foreach my $set ( sort keys %conf ) {
foreach my $collection ( @{ $conf{$set} } ) {
print "$set $collection\n";
my $special_count = 0;
$count{total}{$set}{$collection} = 0;
$count{film_only}{$set}{$collection} = 0;
# findbuch input
my $findbuch_file =
$filmdata_root->child( $set . '_' . $collection . '.json' );
my $findbuch_data = from_json( $findbuch_file->slurp ) || die "not found";
my $last_film_id = 0;
foreach my $entry ( @{$findbuch_data} ) {
##print Dumper $entry; exit;
my $film_id = $entry->{film_id} || 'dummy';
# for film ids from Kiel
if ( $film_id =~ m/^[0-9]+$/ ) {
$film_id = sprintf( "%04d", $film_id );
}
next if ( $film_id eq $last_film_id );
$last_film_id = $film_id;
my $filmpath =
$film_root->child($set)->child($collection)->child($film_id);
my $img_count = 0;
if ( -d $filmpath ) {
$img_count = scalar( $filmpath->children(qr/\.jpg/) );
} elsif ( $entry->{online} ) {
# skip, because images are in PM20
} else {
##push(@{$missing{$set}}, $entry->{film_id});
push( @{ $missing{$set} }, $filmpath->stringify );
}
$img_count{$filmpath} = $img_count;
# Counts films with start_sig (Hamburg only)
if ( $entry->{provenance} eq 'h' ) {
if ( $entry->{start_sig} =~ m/$special_qr/ ) {
$special_count = $special_count + $img_count;
}
}
$count{total}{$set}{$collection} += $img_count;
if ( !$entry->{online} ) {
$count{film_only}{$set}{$collection} += $img_count;
}
}
print " Anzahl (Doppel-)Seiten aus $set/"
. $collection
. " zum Bereich $special_qr: $special_count\n";
}
}
# save image counts
$img_count_file->spew( encode_json( \%img_count ) );
$missing_file->spew( encode_json( \%missing ) );
# statistics
my %grand_total;
foreach my $type ( keys %count ) {
foreach my $set ( keys %{ $count{$type} } ) {
foreach my $collection ( keys %{ $count{$type}{$set} } ) {
$grand_total{$type} += $count{$type}{$set}{$collection};
}
}
}
print Dumper \%count, \%grand_total;
my $stats_file = $filmdata_root->child('stats.dat');
$stats_file->spew( Dumper \%count, \%grand_total );