utils.py

import os
import shutil
import subprocess
from os.path import join, abspath

import open3d as o3d
import smplx
import torch
from tqdm import tqdm

from data_processing import utils
from data_processing.prepare_saga_dataset import DatasetProcessor


class SMPLXWrapper:
    """
    Helper class for creating 3D animations from motion in the axis-angle representation.
    """

    def __init__(self, sequence_length, smplx_model_folder="smplx_model"):

        self.smplx_model = smplx.create(
            model_path=smplx_model_folder,
            model_type="smplx",
            gender="neutral",
            use_face_contour=False,
            num_betas=10,
            num_expression_coeffs=10,
            ext="npz",
            use_pca=False,
            batch_size=sequence_length,
        )

        self.seq_len = sequence_length
        self.smplx_betas = torch.zeros([sequence_length, 10], dtype=torch.float32)
        self.smplx_expr = torch.zeros([sequence_length, 10], dtype=torch.float32)
        self.smplx_hand_pose = torch.zeros([sequence_length, 45], dtype=torch.float32)
        self.smplx_upper_body_joint_idxs = get_upper_body_joint_names_and_idxs()[1]

    def create_video(
        self, upper_body_joint_rotations, output_file, audio_file=None, caption=None
    ):
        """
        Visualize the given joint rotations as a 3D render and save the results
        in a video file, with optional text caption and audio track.

        Args:
            upper_body_joint_rotations: (n_frames, N_u * 3) array of joint rotations
                as axis-angle vectors, where N_u is the nr. of upper body joints.

            output_file:  the path of the video file where the visualization will be saved

            audio_file: path to the audio file which will be added to the video

            caption: text caption to be added to the video
        """
        smplx_body_pose = self.to_smplx_input_pose(upper_body_joint_rotations)

        smplx_output = self.smplx_model(
            betas=self.smplx_betas,
            expression=self.smplx_expr,
            body_pose=smplx_body_pose,
            left_hand_pose=self.smplx_hand_pose,
            right_hand_pose=self.smplx_hand_pose,
            return_verts=True,
        )

        vertices = smplx_output.vertices.detach().cpu().numpy()

        self.render_motion(vertices, output_file)
        if audio_file is not None:
            self.add_audio_to_video(output_file, audio_file)
        if caption is not None:
            self.add_caption_to_video(output_file, caption)

    def to_joint_positions(self, upper_body_joint_rotations):
        """
        Convert a motion array from the axis-angle representation to
        the joint positions in the 3D render.

        Args:
            upper_body_joint_rotations: (n_frames, N_u * 3) tensor of joint rotations
                as axis-angle vectors, where N_u is the nr. of upper body joints.
        
        Returns:
            joint_positions: (n_frames, N_u, 3) tensor of joint coordinates
        """

        smplx_body_pose = self.to_smplx_input_pose(upper_body_joint_rotations)

        smplx_output = self.smplx_model(
            betas=self.smplx_betas,
            expression=self.smplx_expr,
            body_pose=smplx_body_pose,
            left_hand_pose=self.smplx_hand_pose,
            right_hand_pose=self.smplx_hand_pose,
            return_verts=True,
        )

        joint_positions = smplx_output.joints.detach().numpy()
        # Only return the upper body joint positions
        joint_positions = joint_positions[:, self.smplx_upper_body_joint_idxs, :]

        return joint_positions

    def to_smplx_input_pose(self, upper_body_joint_rotations):
        """
        Return a SMPL-X full-body joint rotation array containing the given 
        upper-body motion with the rest of the joints fixed in a static position.
        
        Args:
            upper_body_joint_rotations:  (n_frames, N_u * 3) array of joint rotations
                as axis-angle vectors, where N_u is the nr. of upper body joints.

        Returns:
            smplx_pose: an (n_frames, n_smplx_joints * 3) array.
        """

        # NOTE: there are 21 joints in the input to SMPL-X.
        # Initialize all joints with zeroes
        smplx_pose = torch.zeros([self.seq_len, 63], dtype=torch.float32)
        upper_body_idxs = DatasetProcessor._get_frankmocap_upper_body_idxs()
        # Then change only the upper body values
        smplx_pose[:, upper_body_idxs] = upper_body_joint_rotations

        return smplx_pose

    def render_motion(self, vertices, output_file):
        """
        Render `vertices` with Open3D and save them as a video to `output_file`.
    
        Args:
            vertices:       The vertices in the output of SMPL-X with shape (n_frames, n_points, 3)
            output_file:    Path to the output video file
        """
        temp_dir = abspath(join(output_file, "..", "_temp_dir"))
        os.makedirs(temp_dir)

        # Render the frames and save them as images
        for frame_idx, frame in tqdm(
            enumerate(vertices), desc="Saving video frames", leave=False
        ):
            mesh = o3d.geometry.TriangleMesh()
            mesh.vertices = o3d.utility.Vector3dVector(frame)
            mesh.triangles = o3d.utility.Vector3iVector(self.smplx_model.faces)
            mesh.compute_vertex_normals()
            mesh.paint_uniform_color([0.3, 0.3, 0.3])
            vis = o3d.visualization.Visualizer()
            vis.create_window(visible=False)

            vis.add_geometry(mesh)
            vis.update_geometry(mesh)
            vis.poll_events()
            vis.update_renderer()
            vis.capture_screen_image(join(temp_dir, f"frame_{frame_idx:04}.png"))

        self.create_video_from_frames(frame_dir=temp_dir, output_file=output_file)
        shutil.rmtree(temp_dir)

    def create_video_from_frames(self, frame_dir, output_file):
        ffmpeg_command = f"ffmpeg -y -hide_banner -loglevel error -r 25 -i {frame_dir}/frame_%04d.png {output_file}"
        subprocess.call(ffmpeg_command.split(), stdout=subprocess.DEVNULL)

    def add_audio_to_video(self, video_file, audio_file):
        temp_file = abspath(join(video_file, "..", "temp.mp4"))
        command = f"ffmpeg -y -hide_banner -loglevel error -i {audio_file} -i {video_file} -c:v libx264 -shortest {temp_file}"
        # Run the command and suppress the outputs
        subprocess.call(command.split(), stdout=subprocess.DEVNULL)

        os.replace(temp_file, video_file)

    def add_caption_to_video(self, video_file, caption):
        if len(caption) > 60:
            word_list = caption.split()
            n_words = len(word_list)
            word_list.insert(n_words // 2, "\n")
            caption = " ".join(word_list)

        temp_file = abspath(join(video_file, "..", "temp.mp4"))
        command = [
            "ffmpeg",
            "-y",
            "-hide_banner",
            "-loglevel",
            "error",
            "-i",
            video_file,
            "-vf",
            f"""drawtext=fontfile='/usr/share/fonts/truetype/ubuntu/UbuntuMono-BI.ttf':text='{caption}':fontcolor=white:fontsize=12:box=1:boxcolor=black@0.5:boxborderw=5:x=(w-text_w)/2:y=h-th-10""",
            "-codec:a",
            "copy",
            temp_file,
        ]
        subprocess.call(command, stdout=subprocess.DEVNULL)

        os.replace(temp_file, video_file)


def get_upper_body_joint_names_and_idxs():
    upper_body_names = [
        "spine1",
        "spine2",
        "spine3",
        "neck",
        "left_collar",
        "right_collar",
        "head",
        "left_shoulder",
        "right_shoulder",
        "left_elbow",
        "right_elbow",
        "left_wrist",
        "right_wrist",
    ]

    joint_idxs = [utils.SMPLX_JOINT_NAMES.index(name) for name in upper_body_names]

    return upper_body_names, joint_idxs