加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
datasets.py 6.81 KB
一键复制 编辑 原始数据 按行查看 历史
chenliangwei 提交于 2024-04-09 09:00 . first
#
# Authors: Bowen Wen
# Contact: wenbowenxjtu@gmail.com
# Created in 2020
#
# Copyright (c) Rutgers University, 2020 All rights reserved.
#
# Wen, B., C. Mitash, B. Ren, and K. E. Bekris. "se (3)-TrackNet:
# Data-driven 6D Pose Tracking by Calibrating Image Residuals in
# Synthetic Domains." In IEEE/RSJ International Conference on Intelligent
# Robots and Systems (IROS). 2020.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the PRACSYS, Bowen Wen, Rutgers University,
# nor the names of its contributors may be used to
# endorse or promote products derived from this software without
# specific prior written permission.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
import os, sys,time
dir_path = os.path.dirname(os.path.realpath(__file__))
sys.path.append(dir_path+'/../../')
import json,yaml
from torch.utils.data.dataset import Dataset
from PIL import Image
from Utils import *
from data_augmentation import *
import numpy as np
import cv2
import glob
import torch
import transformations as T
class TrackDataset(Dataset):
def __init__(self, root,mode,images_mean, images_std, pretransforms=None, augmentations=None, posttransforms=None, dataset_info=None, trans_normalizer=0.03, rot_normalizer=5*np.pi/180):
self.mode = mode
self.images_mean = images_mean
self.images_std = images_std
self.data_pose = []
self.data_pair = {}
self.pretransforms = pretransforms
self.augmentations = augmentations
self.posttransforms = posttransforms
self.root = root
self.data_transforms = []
self.dataset_info = dataset_info
if dataset_info!=None:
self.cam_K = cam_K_from_dict(dataset_info['camera'])
print('self.cam_K:\n',self.cam_K)
else:
print('[WARN] In TrackDataset, dataset_info is None !!')
print("making dataset... for {}".format(self.mode))
self.rgbA_files = sorted(glob.glob(self.root+'/*rgbA.png'))
print('#dataset:',self.__len__())
self.trans_normalizer = trans_normalizer
self.rot_normalizer = rot_normalizer # 30*np.pi/180 for YCBInEOAT
print("self.trans_normalizer={}, self.rot_normalizer={}".format(self.trans_normalizer,self.rot_normalizer))
def __getitem__(self, index):
"""
@data: [rgbA, depthA, rgbB, depthB, poseA]. Depth data unit is mm (uint16).
@target [pose_labels]
"""
maskB = None
rgbB = np.array(Image.open(self.rgbA_files[index].replace('A','B')))
depthB = cv2.imread(self.rgbA_files[index].replace('rgbA','depthB'), cv2.IMREAD_UNCHANGED)
maskB = cv2.imread(self.rgbA_files[index].replace('rgbA','segB'), cv2.IMREAD_UNCHANGED)
meta = np.load(self.rgbA_files[index].replace('rgbA.png','meta.npz'))
B_in_cam = meta['B_in_cam']
rgbA = np.array(Image.open(self.rgbA_files[index]))
depthA = cv2.imread(self.rgbA_files[index].replace('rgbA','depthA'), cv2.IMREAD_UNCHANGED)
A_in_cam = meta['A_in_cam']
if rgbB.shape[0]!=self.dataset_info['resolution']:
resolution = self.dataset_info['resolution']
rgbA = cv2.resize(rgbA,(resolution,resolution),interpolation=cv2.INTER_NEAREST)
rgbB = cv2.resize(rgbB,(resolution,resolution),interpolation=cv2.INTER_NEAREST)
depthA = cv2.resize(depthA,(resolution,resolution),interpolation=cv2.INTER_NEAREST)
depthB = cv2.resize(depthB,(resolution,resolution),interpolation=cv2.INTER_NEAREST)
maskB = cv2.resize(maskB,(resolution,resolution),interpolation=cv2.INTER_NEAREST)
if maskB is None:
maskB = (depthB>100).astype(np.uint8)
assert np.sum(maskB)>0, 'index={}'.format(index)
data, target, rgbA, rgbB, maskA, maskB = self.processData(rgbA,depthA,A_in_cam,rgbB,depthB,B_in_cam,maskB)
return data, target, A_in_cam, B_in_cam, rgbA, rgbB, maskA, maskB
def __len__(self):
return len(self.rgbA_files)
def processData(self,rgbA,depthA,A_in_cam,rgbB,depthB,B_in_cam,maskB=None,original_size=None):
'''
After posttransforms,
@sample: [dataA, dataB] where dataA and dataB are concatenated rgb with depth CHW
@A_in_cam: 4x4 mat
'''
maskA = (depthA>100).astype(np.uint8)
if maskB is None:
maskB = (depthB>100).astype(np.uint8)
sample = [rgbA, depthA, rgbB, depthB,maskA,maskB,A_in_cam]
if self.pretransforms:
sample = self.pretransforms(sample)
rgbA_viz = rgbA.astype(np.uint8)
rgbB_viz = rgbB.astype(np.uint8)
if self.augmentations:
sample = self.augmentations(sample)
rgbA, depthA, rgbB, depthB, maskA, maskB, prior = sample
rgbA_viz = rgbA.astype(np.uint8)
rgbB_viz = rgbB.astype(np.uint8)
if self.posttransforms:
sample,maskA,maskB = self.posttransforms(sample)
trans_label = np.zeros((3))
rot_label = np.zeros((3))
trans_label = B_in_cam[:3,3] - A_in_cam[:3,3]
trans_label /= self.trans_normalizer # Normalize
A2B_in_cam_rot = np.eye(3)
A2B_in_cam_rot = B_in_cam[:3,:3].dot(A_in_cam[:3,:3].T)
A2B_in_cam_rot = normalize_rotation_matrix(A2B_in_cam_rot)
rod = cv2.Rodrigues(A2B_in_cam_rot)[0].reshape(-1)
rod = rod/self.rot_normalizer
rot_label = rod
if self.mode=='train':
assert (trans_label<=1).all() and (trans_label>=-1).all()
assert (rot_label>=-1).all() and (rot_label<=1).all(),'root:\n{}\nrot_label\n{}\n A2B_in_cam_rot{}\n'.format(self.root,rot_label,A2B_in_cam_rot)
return sample, [trans_label, rot_label], rgbA_viz, rgbB_viz, maskA, maskB
def processPredict(self,A_in_cam,predB,original_size=None):
'''Recover the predicted pose to the true pose
@A_in_cam: 4x4 mat
@predB: trans, rot, ...
return ob pose in cam frame
'''
B_in_cam = np.eye(4)
trans_pred = predB[0]
rot_pred = predB[1]
trans_pred = trans_pred*self.trans_normalizer
B_in_cam[:3,3] = trans_pred+A_in_cam[:3,3]
rot_pred = rot_pred*self.rot_normalizer
A2B_in_cam_rot = cv2.Rodrigues(rot_pred)[0].reshape(3,3)
B_in_cam[:3,:3] = A2B_in_cam_rot.dot(A_in_cam[:3,:3])
return B_in_cam
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化