-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscene_select.py
64 lines (49 loc) · 2.45 KB
/
scene_select.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import pandas as pd
from sklearn.cluster import AffinityPropagation
from sklearn import preprocessing
import argparse
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Ready')
parser.add_argument('--stats', dest='stats', required=True,
help='Path to csv with image stats')
parser.add_argument('--stats_idx', dest='stats_idx', required=False, type=int,
help='Column number (start from 0) of filepaths in stats csv')
parser.add_argument('--meta', dest='meta', required=False,
help='Path to csv containing image metadata')
parser.add_argument('--meta_idx', dest='meta_idx', required=False, type=int,
help='Column number (start from 0) of filepaths in meta csv')
parser.add_argument('--out_fn', dest='out_fn', required=True,
help='Full filepath for output csv')
parser.add_argument('--pref', dest='pref', type=float, required=False,
help='AP preference value to be used instead of default')
args = parser.parse_args()
stats = args.stats
stats_idx = args.stats_idx
meta = args.meta
meta_idx = args.meta_idx
out_fn = args.out_fn
pref = args.pref
tindex = "filepath"
if not stats_idx:
stats_idx = 0
if not meta_idx:
meta_idx = 0
if meta:
stats_df = pd.read_csv(stats, index_col=stats_idx)
meta_df = pd.read_csv(meta, index_col=meta_idx)
sample_df = pd.merge(stats_df, meta_df, left_index=True, right_index=True)
else:
sample_df = pd.read_csv(stats, index_col=stats_idx)
img_vals = sample_df.values
img_fns = sample_df.index.values
scaled_vals = preprocessing.scale(img_vals)
if pref:
ap = AffinityPropagation(max_iter=5000, convergence_iter=100, affinity="euclidean",
preference=pref).fit(scaled_vals)
else:
ap = AffinityPropagation(max_iter=5000, convergence_iter=100, affinity="euclidean").fit(scaled_vals)
print("There are " + str(len(ap.cluster_centers_indices_)) + " exemplar scenes")
exemplars = [i in ap.cluster_centers_indices_ for i in range(0, len(img_fns))]
exemplar_df = pd.DataFrame(data=exemplars, index=img_fns, columns=['exemplar'])
exemplar_df['cluster'] = ap.labels_
exemplar_df.to_csv(out_fn, header=True, index=True, index_label='filepath')