| | import json
|
| | import os
|
| | from PIL import Image
|
| | import numpy as np
|
| | from pycocotools.mask import encode, decode, frPyObjects
|
| | from tqdm import tqdm
|
| | import copy
|
| | from natsort import natsorted
|
| | import cv2
|
| |
|
| |
|
| |
|
| | if __name__ == '__main__':
|
| | root_path = '/data/work2-gcp-europe-west4-a/yuqian_fu/Ego/data_segswap'
|
| |
|
| | save_path = os.path.join(root_path, 'egoexo_val_exosize.json')
|
| |
|
| |
|
| | split_path = "/home/yuqian_fu/Projects/ego-exo4d-relation/correspondence/SegSwap/data/split.json"
|
| | with open(split_path, "r") as fp:
|
| | data_split = json.load(fp)
|
| | val_set = data_split["val"]
|
| |
|
| |
|
| |
|
| |
|
| | new_img_id = 0
|
| |
|
| |
|
| | egoexo_dataset = []
|
| |
|
| | '''
|
| | build_DAVIS.py的代码逻辑是先处理每个视频的第一帧,第一帧中的unique_instances、高宽等信息用于该视频下后续的每一帧。
|
| | 注意,unique_instances代表的是第一帧下像素的所有类别信息,如果该视频下后续的帧中有像素的类别不在unique_instances中,会报错
|
| | '''
|
| |
|
| |
|
| | bad_case = []
|
| | for val_name in tqdm(val_set):
|
| |
|
| | vid_root_path = os.path.join(root_path, val_name)
|
| | anno_path = os.path.join(vid_root_path, "annotation.json")
|
| | with open(anno_path, 'r') as fp:
|
| | annotations = json.load(fp)
|
| |
|
| |
|
| |
|
| | objs = natsorted(list(annotations["masks"].keys()))
|
| | print("the total obj num are:", len(objs))
|
| | print(f"objs:{objs}")
|
| |
|
| |
|
| | coco_id_to_cont_id = {coco_id: cont_id+1 for cont_id, coco_id in enumerate(objs)}
|
| |
|
| |
|
| | valid_cams = os.listdir(vid_root_path)
|
| |
|
| | valid_cams.remove("annotation.json")
|
| |
|
| |
|
| | valid_cams = natsorted(valid_cams)
|
| | print(valid_cams)
|
| |
|
| |
|
| | ego_cams = []
|
| | exo_cams = []
|
| | for vc in valid_cams:
|
| | if 'aria' in vc:
|
| | ego_cams.append(vc)
|
| | else:
|
| | exo_cams.append(vc)
|
| |
|
| | ego = ego_cams[0]
|
| | exo = exo_cams[0]
|
| |
|
| |
|
| |
|
| | vid_ego_path = os.path.join(vid_root_path, ego)
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | ego_frames = natsorted(os.listdir(vid_ego_path))
|
| | ego_frames = [int(f.split(".")[0]) for f in ego_frames]
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | objs_both_have = []
|
| | for obj in objs:
|
| | if ego in annotations["masks"][obj].keys() and exo in annotations["masks"][obj].keys():
|
| | objs_both_have.append(obj)
|
| |
|
| | if len(exo_cams) > 1:
|
| | for cam in exo_cams[1:]:
|
| | objs_both_have_tmp = []
|
| | for obj in objs:
|
| | if ego in annotations["masks"][obj].keys() and cam in annotations["masks"][obj].keys():
|
| | objs_both_have_tmp.append(obj)
|
| | if len(objs_both_have_tmp) > len(objs_both_have):
|
| | exo = cam
|
| | objs_both_have = objs_both_have_tmp
|
| |
|
| | print("objs_both_have num:", len(objs_both_have))
|
| | if len(objs_both_have) == 0:
|
| | bad_case.append(val_name)
|
| | continue
|
| |
|
| | print(ego, exo)
|
| |
|
| | vid_exo_path = os.path.join(vid_root_path, exo)
|
| | print(f"vid_exo_path:{vid_exo_path}")
|
| | exo_frames = natsorted(os.listdir(vid_exo_path))
|
| |
|
| |
|
| | exo_frames = [f.split(".")[0] for f in exo_frames]
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | obj_ref = objs_both_have[0]
|
| | for obj in objs_both_have:
|
| | if len(list(annotations["masks"][obj_ref][ego].keys())) < len(list(annotations["masks"][obj][ego].keys())):
|
| | obj_ref = obj
|
| | ego_anno_frames = natsorted(list(annotations["masks"][obj_ref][ego].keys()))
|
| |
|
| | frames = natsorted(np.intersect1d(ego_anno_frames, exo_frames))
|
| | print(f"frames:{frames}")
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | all_ref_keys = np.asarray(
|
| | natsorted(annotations["masks"][obj_ref][ego])
|
| | ).astype(np.int64)
|
| |
|
| | first_anno_key = str(all_ref_keys[0])
|
| | rgb_name = f"{first_anno_key}.jpg"
|
| | first_frame_img_path = os.path.join(vid_ego_path, rgb_name)
|
| | first_frame_img_relpath = os.path.relpath(first_frame_img_path, root_path)
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | coco_format_annotations = []
|
| |
|
| |
|
| |
|
| |
|
| | obj_list_ego = []
|
| | for obj in objs_both_have:
|
| | if first_anno_key in annotations["masks"][obj][ego].keys():
|
| | mask_ego = decode(annotations["masks"][obj][ego][first_anno_key])
|
| | area_new = mask_ego.sum().astype(float)
|
| | if area_new != 0:
|
| | obj_list_ego.append(obj)
|
| | print("total obj num in ego", len(obj_list_ego))
|
| | if len(obj_list_ego) == 0:
|
| | bad_case.append(val_name)
|
| | continue
|
| |
|
| |
|
| |
|
| | idx_tmp = frames[1]
|
| | filename_tmp = f"{idx_tmp}.jpg"
|
| | tmp_path = os.path.join(vid_exo_path, filename_tmp)
|
| | img_tmp = Image.open(tmp_path)
|
| | img_tmp = np.array(img_tmp)
|
| | h_tmp, w_tmp = img_tmp.shape[:2]
|
| |
|
| | obj_list_ego_new = []
|
| | for obj in obj_list_ego:
|
| |
|
| | segmentation_tmp = annotations["masks"][obj][ego][first_anno_key]
|
| |
|
| |
|
| |
|
| | binary_mask = decode(segmentation_tmp)
|
| |
|
| |
|
| |
|
| | h,w = binary_mask.shape
|
| | binary_mask = cv2.resize(binary_mask, (w_tmp, h_tmp), interpolation=cv2.INTER_NEAREST)
|
| |
|
| |
|
| | area = binary_mask.sum().astype(float)
|
| | if area == 0:
|
| |
|
| | continue
|
| | segmentation = encode(np.asfortranarray(binary_mask))
|
| | segmentation = {
|
| | 'counts': segmentation['counts'].decode('ascii'),
|
| | 'size': segmentation["size"],
|
| | }
|
| | obj_list_ego_new.append(obj)
|
| | coco_format_annotations.append(
|
| | {
|
| | 'segmentation': segmentation,
|
| | 'area': area,
|
| | 'category_id': float(coco_id_to_cont_id[obj]),
|
| | }
|
| | )
|
| | if len(obj_list_ego_new) == 0:
|
| | bad_case.append(val_name)
|
| | continue
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | for idx in frames[1:]:
|
| | filename = f"{idx}.jpg"
|
| | sample_img_path = os.path.join(vid_exo_path, filename)
|
| | sample_img_relpath = os.path.relpath(sample_img_path, root_path)
|
| |
|
| |
|
| |
|
| |
|
| | obj_list_exo = []
|
| | for obj in obj_list_ego_new:
|
| | if idx in annotations["masks"][obj][exo].keys():
|
| | mask_exo = decode(annotations["masks"][obj][exo][idx])
|
| | area_exo = mask_exo.sum().astype(float)
|
| | if area_exo != 0:
|
| | obj_list_exo.append(obj)
|
| |
|
| | print("total obj num in exo", len(obj_list_exo))
|
| | if len(obj_list_exo) == 0:
|
| | continue
|
| |
|
| | height, width = annotations["masks"][obj_list_exo[0]][exo][idx]["size"]
|
| |
|
| | image_info = {
|
| | 'file_name': sample_img_relpath,
|
| | 'height': height//4,
|
| | 'width': width//4,
|
| | }
|
| |
|
| |
|
| | anns = []
|
| |
|
| | obj_list_exo_new = []
|
| | for obj in obj_list_exo:
|
| | assert obj in obj_list_ego_new, 'Found new target not in the first frame'
|
| | segmentation_tmp = annotations["masks"][obj][exo][idx]
|
| | binary_mask = decode(segmentation_tmp)
|
| |
|
| | h, w = binary_mask.shape
|
| | binary_mask = cv2.resize(binary_mask, (w // 4, h // 4), interpolation=cv2.INTER_NEAREST)
|
| |
|
| | area = binary_mask.sum().astype(float)
|
| | if area == 0:
|
| | continue
|
| | segmentation = encode(np.asfortranarray(binary_mask))
|
| | segmentation = {
|
| | 'counts': segmentation['counts'].decode('ascii'),
|
| | 'size': segmentation['size'],
|
| | }
|
| | obj_list_exo_new.append(obj)
|
| | anns.append(
|
| | {
|
| | 'segmentation': segmentation,
|
| | 'area': area,
|
| | 'category_id': float(coco_id_to_cont_id[obj]),
|
| | }
|
| | )
|
| | if len(obj_list_exo_new) == 0:
|
| | continue
|
| |
|
| |
|
| | sample_unique_instances = [float(coco_id_to_cont_id[obj]) for obj in obj_list_exo_new]
|
| |
|
| | print(f"sample_unique_instances in {idx}:{sample_unique_instances}")
|
| |
|
| |
|
| | first_frame_anns = copy.deepcopy(coco_format_annotations)
|
| |
|
| | if len(anns) < len(first_frame_anns):
|
| | first_frame_anns = [ann for ann in first_frame_anns if ann['category_id'] in sample_unique_instances]
|
| | assert len(anns) == len(first_frame_anns)
|
| | sample = {
|
| | 'image': sample_img_relpath,
|
| | 'image_info': image_info,
|
| | 'anns': anns,
|
| | 'first_frame_image': first_frame_img_relpath,
|
| | 'first_frame_anns': first_frame_anns,
|
| | 'new_img_id': new_img_id,
|
| | 'video_name': val_name,
|
| | }
|
| | egoexo_dataset.append(sample)
|
| | new_img_id += 1
|
| |
|
| |
|
| | print(bad_case)
|
| | with open(save_path, 'w') as f:
|
| | json.dump(egoexo_dataset, f)
|
| | print(f'Save at {save_path}. Total sample: {len(egoexo_dataset)}')
|
| |
|