mirror of https://github.com/XingangPan/DragGAN
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
223 lines
9.1 KiB
Python
223 lines
9.1 KiB
Python
# Copyright (c) SenseTime Research. All rights reserved.
|
|
|
|
|
|
import os
|
|
import argparse
|
|
import numpy as np
|
|
import torch
|
|
from torch.utils.data import DataLoader
|
|
from torchvision.transforms import transforms
|
|
from utils.ImagesDataset import ImagesDataset
|
|
|
|
import cv2
|
|
import time
|
|
import copy
|
|
import imutils
|
|
|
|
# for openpose body keypoint detector : # (src:https://github.com/Hzzone/pytorch-openpose)
|
|
from openpose.src import util
|
|
from openpose.src.body import Body
|
|
|
|
# for paddlepaddle human segmentation : #(src: https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.5/contrib/PP-HumanSeg/)
|
|
from PP_HumanSeg.deploy.infer import Predictor as PP_HumenSeg_Predictor
|
|
|
|
import math
|
|
def angle_between_points(p0,p1,p2):
|
|
if p0[1]==-1 or p1[1]==-1 or p2[1]==-1:
|
|
return -1
|
|
a = (p1[0]-p0[0])**2 + (p1[1]-p0[1])**2
|
|
b = (p1[0]-p2[0])**2 + (p1[1]-p2[1])**2
|
|
c = (p2[0]-p0[0])**2 + (p2[1]-p0[1])**2
|
|
if a * b == 0:
|
|
return -1
|
|
return math.acos((a+b-c) / math.sqrt(4*a*b)) * 180 / math.pi
|
|
|
|
|
|
def crop_img_with_padding(img, keypoints, rect):
|
|
person_xmin,person_xmax, ymin, ymax= rect
|
|
img_h,img_w,_ = img.shape ## find body center using keypoints
|
|
middle_shoulder_x = keypoints[1][0]
|
|
middle_hip_x = (keypoints[8][0] + keypoints[11][0]) // 2
|
|
mid_x = (middle_hip_x + middle_shoulder_x) // 2
|
|
mid_y = (ymin + ymax) // 2
|
|
## find which side (l or r) is further than center x, use the further side
|
|
if abs(mid_x-person_xmin) > abs(person_xmax-mid_x): #left further
|
|
xmin = person_xmin
|
|
xmax = mid_x + (mid_x-person_xmin)
|
|
else:
|
|
############### may be negtive
|
|
### in this case, the script won't output any image, leave the case like this
|
|
### since we don't want to pad human body
|
|
xmin = mid_x - (person_xmax-mid_x)
|
|
xmax = person_xmax
|
|
|
|
w = xmax - xmin
|
|
h = ymax - ymin
|
|
## pad rectangle to w:h = 1:2 ## calculate desired border length
|
|
if h / w >= 2: #pad horizontally
|
|
target_w = h // 2
|
|
xmin_prime = int(mid_x - target_w / 2)
|
|
xmax_prime = int(mid_x + target_w / 2)
|
|
if xmin_prime < 0:
|
|
pad_left = abs(xmin_prime)# - xmin
|
|
xmin = 0
|
|
else:
|
|
pad_left = 0
|
|
xmin = xmin_prime
|
|
if xmax_prime > img_w:
|
|
pad_right = xmax_prime - img_w
|
|
xmax = img_w
|
|
else:
|
|
pad_right = 0
|
|
xmax = xmax_prime
|
|
|
|
cropped_img = img[int(ymin):int(ymax), int(xmin):int(xmax)]
|
|
im_pad = cv2.copyMakeBorder(cropped_img, 0, 0, int(pad_left), int(pad_right), cv2.BORDER_REPLICATE)
|
|
else: #pad vertically
|
|
target_h = w * 2
|
|
ymin_prime = mid_y - (target_h / 2)
|
|
ymax_prime = mid_y + (target_h / 2)
|
|
if ymin_prime < 0:
|
|
pad_up = abs(ymin_prime)# - ymin
|
|
ymin = 0
|
|
else:
|
|
pad_up = 0
|
|
ymin = ymin_prime
|
|
if ymax_prime > img_h:
|
|
pad_down = ymax_prime - img_h
|
|
ymax = img_h
|
|
else:
|
|
pad_down = 0
|
|
ymax = ymax_prime
|
|
print(ymin,ymax, xmin,xmax, img.shape)
|
|
|
|
cropped_img = img[int(ymin):int(ymax), int(xmin):int(xmax)]
|
|
im_pad = cv2.copyMakeBorder(cropped_img, int(pad_up), int(pad_down), 0,
|
|
0, cv2.BORDER_REPLICATE)
|
|
result = cv2.resize(im_pad,(512,1024),interpolation = cv2.INTER_AREA)
|
|
return result
|
|
|
|
|
|
def run(args):
|
|
os.makedirs(args.output_folder, exist_ok=True)
|
|
dataset = ImagesDataset(args.image_folder, transforms.Compose([transforms.ToTensor()]))
|
|
dataloader = DataLoader(dataset, batch_size=1, shuffle=False)
|
|
|
|
body_estimation = Body('openpose/model/body_pose_model.pth')
|
|
|
|
total = len(dataloader)
|
|
print('Num of dataloader : ', total)
|
|
os.makedirs(f'{args.output_folder}', exist_ok=True)
|
|
# os.makedirs(f'{args.output_folder}/middle_result', exist_ok=True)
|
|
|
|
## initialzide HumenSeg
|
|
human_seg_args = {}
|
|
human_seg_args['cfg'] = 'PP_HumanSeg/export_model/deeplabv3p_resnet50_os8_humanseg_512x512_100k_with_softmax/deploy.yaml'
|
|
human_seg_args['input_shape'] = [1024,512]
|
|
human_seg_args['save_dir'] = args.output_folder
|
|
human_seg_args['soft_predict'] = False
|
|
human_seg_args['use_gpu'] = True
|
|
human_seg_args['test_speed'] = False
|
|
human_seg_args['use_optic_flow'] = False
|
|
human_seg_args['add_argmax'] = True
|
|
human_seg_args= argparse.Namespace(**human_seg_args)
|
|
human_seg = PP_HumenSeg_Predictor(human_seg_args)
|
|
|
|
from tqdm import tqdm
|
|
for fname, image in tqdm(dataloader):
|
|
# try:
|
|
## tensor to numpy image
|
|
fname = fname[0]
|
|
print(f'Processing \'{fname}\'.')
|
|
|
|
image = (image.permute(0, 2, 3, 1) * 255).clamp(0, 255)
|
|
image = image.squeeze(0).numpy() # --> tensor to numpy, (H,W,C)
|
|
# avoid super high res img
|
|
if image.shape[0] >= 2000: # height ### for shein image
|
|
ratio = image.shape[0]/1200 #height
|
|
dim = (int(image.shape[1]/ratio),1200)#(width, height)
|
|
image = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
|
|
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
|
|
|
## create segmentation
|
|
# mybg = cv2.imread('mybg.png')
|
|
comb, segmentation, bg, ori_img = human_seg.run(image,None) #mybg)
|
|
# cv2.imwrite('comb.png',comb) # [0,255]
|
|
# cv2.imwrite('alpha.png',segmentation*255) # segmentation [0,1] --> [0.255]
|
|
# cv2.imwrite('bg.png',bg) #[0,255]
|
|
# cv2.imwrite('ori_img.png',ori_img) # [0,255]
|
|
|
|
masks_np = (segmentation* 255)# .byte().cpu().numpy() #1024,512,1
|
|
mask0_np = masks_np[:,:,0].astype(np.uint8)#[0, :, :]
|
|
contours = cv2.findContours(mask0_np, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
cnts = imutils.grab_contours(contours)
|
|
c = max(cnts, key=cv2.contourArea)
|
|
extTop = tuple(c[c[:, :, 1].argmin()][0])
|
|
extBot = tuple(c[c[:, :, 1].argmax()][0])
|
|
extBot = list(extBot)
|
|
extTop = list(extTop)
|
|
pad_range = int((extBot[1]-extTop[1])*0.05)
|
|
if (int(extTop[1])<=5 and int(extTop[1])>0) and (comb.shape[0]>int(extBot[1]) and int(extBot[1])>=comb.shape[0]-5): #seg mask already reaches to the edge
|
|
#pad with pure white, top 100 px, bottom 100 px
|
|
comb= cv2.copyMakeBorder(comb,pad_range+5,pad_range+5,0,0,cv2.BORDER_CONSTANT,value=[255,255,255])
|
|
elif int(extTop[1])<=0 or int(extBot[1])>=comb.shape[0]:
|
|
print('PAD: body out of boundary', fname) #should not happened
|
|
return {}
|
|
else:
|
|
comb = cv2.copyMakeBorder(comb, pad_range+5, pad_range+5, 0, 0, cv2.BORDER_REPLICATE) #105 instead of 100: give some extra space
|
|
extBot[1] = extBot[1] + pad_range+5
|
|
extTop[1] = extTop[1] + pad_range+5
|
|
|
|
extLeft = tuple(c[c[:, :, 0].argmin()][0])
|
|
extRight = tuple(c[c[:, :, 0].argmax()][0])
|
|
extLeft = list(extLeft)
|
|
extRight = list(extRight)
|
|
person_ymin = int(extTop[1])-pad_range # 100
|
|
person_ymax = int(extBot[1])+pad_range # 100 #height
|
|
if person_ymin<0 or person_ymax>comb.shape[0]: # out of range
|
|
return {}
|
|
person_xmin = int(extLeft[0])
|
|
person_xmax = int(extRight[0])
|
|
rect = [person_xmin,person_xmax,person_ymin, person_ymax]
|
|
# recimg = copy.deepcopy(comb)
|
|
# cv2.rectangle(recimg,(person_xmin,person_ymin),(person_xmax,person_ymax),(0,255,0),2)
|
|
# cv2.imwrite(f'{args.output_folder}/middle_result/{fname}_rec.png',recimg)
|
|
|
|
## detect keypoints
|
|
keypoints, subset = body_estimation(comb)
|
|
# print(keypoints, subset, len(subset))
|
|
if len(subset) != 1 or (len(subset)==1 and subset[0][-1]<15):
|
|
print(f'Processing \'{fname}\'. Please import image contains one person only. Also can check segmentation mask. ')
|
|
continue
|
|
|
|
# canvas = copy.deepcopy(comb)
|
|
# canvas = util.draw_bodypose(canvas, keypoints, subset, show_number=True)
|
|
# cv2.imwrite(f'{args.output_folder}/middle_result/{fname}_keypoints.png',canvas)
|
|
|
|
comb = crop_img_with_padding(comb, keypoints, rect)
|
|
|
|
|
|
cv2.imwrite(f'{args.output_folder}/{fname}.png', comb)
|
|
print(f' -- Finished processing \'{fname}\'. --')
|
|
# except:
|
|
# print(f'Processing \'{fname}\'. Not satisfied the alignment strategy.')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
torch.backends.cudnn.benchmark = True
|
|
torch.backends.cudnn.deterministic = False
|
|
|
|
t1 = time.time()
|
|
arg_formatter = argparse.ArgumentDefaultsHelpFormatter
|
|
description = 'StyleGAN-Human data process'
|
|
parser = argparse.ArgumentParser(formatter_class=arg_formatter,
|
|
description=description)
|
|
parser.add_argument('--image-folder', type=str, dest='image_folder')
|
|
parser.add_argument('--output-folder', dest='output_folder', default='results', type=str)
|
|
# parser.add_argument('--cfg', dest='cfg for segmentation', default='PP_HumanSeg/export_model/ppseg_lite_portrait_398x224_with_softmax/deploy.yaml', type=str)
|
|
|
|
print('parsing arguments')
|
|
cmd_args = parser.parse_args()
|
|
run(cmd_args)
|
|
|
|
print('total time elapsed: ', str(time.time() - t1)) |