Skip to content

Commit

Permalink
v1 dataset release
Browse files Browse the repository at this point in the history
  • Loading branch information
Mike Roberts committed Dec 19, 2020
1 parent ae18c09 commit ad3add5
Show file tree
Hide file tree
Showing 12 changed files with 88,509 additions and 334 deletions.
102 changes: 91 additions & 11 deletions README.md

Large diffs are not rendered by default.

128 changes: 128 additions & 0 deletions code/python/analysis/dataset_generate_image_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
#
# For licensing see accompanying LICENSE.txt file.
# Copyright (C) 2020 Apple Inc. All Rights Reserved.
#

from pylab import *

import argparse
import os
import pandas as pd

import path_utils

parser = argparse.ArgumentParser()
parser.add_argument("--analysis_dir", required=True)
args = parser.parse_args()

assert os.path.exists(args.analysis_dir)



print("[HYPERSIM: DATASET_GENERATE_IMAGE_METADATA] Begin...")



num_images_per_camera_trajectory = 100

metadata_images_flagged_txt_file = os.path.join(args.analysis_dir, "metadata_images_flagged.txt")
metadata_images_csv_file = os.path.join(args.analysis_dir, "metadata_images.csv")

metadata_camera_trajectories_csv_file = os.path.join(args.analysis_dir, "metadata_camera_trajectories.csv")
df_camera_trajectories = pd.read_csv(metadata_camera_trajectories_csv_file).rename_axis("camera_trajectory_id")
camera_trajectories = df_camera_trajectories.to_records()



# initialize dict of lists for frames to exclude
frames_ids_excluded_flagged = {}

for c in camera_trajectories:

animation_name = c["Animation"]

scene_name = animation_name[0:10]
camera_name = animation_name[11:17]

assert scene_name.startswith("ai_")
assert camera_name.startswith("cam_")

frames_ids_excluded_flagged[(scene_name, camera_name)] = []



# fill lists of frames to exclude
scene_name_current = None
camera_name_current = None
frame_id_current = None

for line in open(metadata_images_flagged_txt_file, "r"):

line = line.strip()

assert line == "" or line.startswith("ai_") or line.startswith("scene_cam_") or line.startswith("frame.")

if line == "":
continue
if line.startswith("ai_"):
scene_name_current = line[0:10]
if line.startswith("scene_cam_"):
camera_name_current = line[6:12]
if line.startswith("frame."):
assert scene_name_current is not None and camera_name_current is not None
frame_id_current = int(line[6:10])
frames_ids_excluded_flagged[(scene_name_current, camera_name_current)].append(frame_id_current)



# create dataframe
df_columns = ["scene_name", "camera_name", "frame_id", "included_in_public_release", "exclude_reason"]
df = pd.DataFrame(columns=df_columns)

for c in camera_trajectories:

animation_name = c["Animation"]

scene_name = animation_name[0:10]
camera_name = animation_name[11:17]

assert scene_name.startswith("ai_")
assert camera_name.startswith("cam_")

print("[HYPERSIM: DATASET_GENERATE_IMAGE_METADATA] Processing scene: " + scene_name)

scene_names = [ scene_name for i in range(num_images_per_camera_trajectory) ]
camera_names = [ camera_name for i in range(num_images_per_camera_trajectory) ]
frame_ids = range(num_images_per_camera_trajectory)

if c["Scene type"] == "OUTSIDE VIEWING AREA (BAD INITIALIZATION)":
included_in_public_release = [ False for i in range(num_images_per_camera_trajectory) ]
exclude_reason = [ "OUTSIDE VIEWING AREA (BAD INITIALIZATION)" for i in range(num_images_per_camera_trajectory) ]
elif c["Scene type"] == "OUTSIDE VIEWING AREA (BAD TRAJECTORY)":
included_in_public_release = [ False for i in range(num_images_per_camera_trajectory) ]
exclude_reason = [ "OUTSIDE VIEWING AREA (BAD TRAJECTORY)" for i in range(num_images_per_camera_trajectory) ]
else:
frames_ids_excluded_flagged_ = array(frames_ids_excluded_flagged[(scene_name, camera_name)])
included_in_public_release = logical_not(in1d(frame_ids, frames_ids_excluded_flagged_))
exclude_reason = [ "" if included_in_public_release[i] else "CONTENT FLAGGED FOR REMOVAL" for i in range(num_images_per_camera_trajectory) ]

df_ = pd.DataFrame(
columns=df_columns,
data={"scene_name" : scene_names,
"camera_name" : camera_names,
"frame_id" : frame_ids,
"included_in_public_release" : included_in_public_release,
"exclude_reason" : exclude_reason})

df = df.append(df_)

included_in_public_release_counts = df.included_in_public_release.value_counts()
print("[HYPERSIM: DATASET_GENERATE_IMAGE_METADATA] Images included in public release: " + str(included_in_public_release_counts[True]))



df.to_csv(metadata_images_csv_file, index=False)



print("[HYPERSIM: DATASET_GENERATE_IMAGE_METADATA] Finished.")
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,9 @@

import argparse
import fnmatch
import glob
import h5py
import inspect
import os
import pandas as pd
import sklearn.preprocessing

import path_utils

Expand Down
5 changes: 0 additions & 5 deletions code/python/tools/dataset_copy_semantic_segmentations.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,9 @@
files_to_copy = [
"mesh_objects_si.hdf5",
"mesh_objects_sii.hdf5",
"metadata_groups.csv",
"metadata_materials.csv",
"metadata_objects.csv",
"metadata_scene_annotation_tool.log",
"metadata_semantic_colors.hdf5",
"metadata_semantic_instance_bounding_box_object_aligned_2d_extents.hdf5",
"metadata_semantic_instance_bounding_box_object_aligned_2d_orientations.hdf5",
"metadata_semantic_instance_bounding_box_object_aligned_2d_positions.hdf5",
"metadata_semantic_instance_colors.hdf5"
]

Expand Down
Loading

0 comments on commit ad3add5

Please sign in to comment.