Pose estimation test (perfanalytics#116)

Edits from @hunminkim98's awesome work at integrating pose estimation into Pose2Sim with RTMLib. Most of the changes in syntax are not necessarily better, it is mostly for the code to be more consistent with the rest of the library. Thank you again for your fantastic work! General: - Automatically detects whether a valid CUDA install is available. If so, use the GPU with the ONNXRuntime backend. Otherwise, use the CPU with the OpenVINO backend - The tensorflow version used for marker augmentation was incompatible with the cuda torch installation for pose estimation: edited code and models for it to work with the latest tf version. - Added logging information to pose estimation - Readme.md: provided an installation procedure for CUDA (took me a while to find something simple and robust) - Readme.md: added information about PoseEstimation with RTMLib - added poseEstimation to tests.py - created videos for the multi-person case (used to only have json, no video), and reorganized Demo folders. Had to recreate calibration file as well Json files: - the json files only saved one person, I made it save all the detected ones - tracking was not taken into account by rtmlib, which caused issues in synchronization: fixed, waiting for merge - took the save_to_openpose function out from the main function - minified the json files (they take less space when all spaces are removed) Detection results: - Compared the triangulated locations of RTMpose keypoints to the ones of OpenPose to potentially edit model marker locations on OpenSim. Did not seem to need it. Others in Config.toml: - removed the "to_openpose" option, which is not needed - added the flag: save_video = 'to_images' # 'to_video' or 'to_images' or ['to_video', 'to_images'] - changed the way frame_range was handled (made me change synchronization in depth, as well as personAssociation and triangulation) - added the flag: time_range_around_maxspeed in synchronization - automatically detect framerate from video, or set to 60 fps if we work from images (or give a value) - frame_range -> time_range - moved height and weight to project (only read for markerAugmentation, and in the future for automatic scaling) - removed reorder_trc from triangulation and Config -> call it for markerAugmentation instead Others: - Provided an installation procedure for OpenSim (for the future) and made continuous installation check its install (a bit harder since it cannot be installed via pip) - scaling from motion instead of static pose (will have to study whether it's as good or not) - added logging to synchronization - Struggled quite a bit with continuous integration * Starting point of integrating RTMPose into Pose2Sim. (perfanalytics#111) * RTM_to_Open Convert format from RTMPose to OpenPose * rtm_intergrated * rtm_integrated * rtm_integrated * rtm_integrated * rtm * Delete build/lib/Pose2Sim directory * rtm * Delete build/lib/Pose2Sim directory * Delete onnxruntime-gpu * device = cpu * add pose folder * Update tests.py * added annotation * fix typo * Should work be still lots of tests to run. Detailed commit coming soon * intermediary commit * last checks before v0.9.0 * Update continuous-integration.yml * Update tests.py * replaced tabs with spaces * unittest issue * unittest typo * deactivated display for CI test of pose detection * Try to make continuous integration work * a * b * c * d * e * f * g * h * i * j * k * l --------- Co-authored-by: HunMinKim <[email protected]>
Xiong-biomech · Jul 9, 2024 · b2fe4f7 · b2fe4f7
1 parent f287b0e
commit b2fe4f7
Show file tree

Hide file tree

Showing 1,496 changed files with 2,016 additions and 19,556 deletions.
diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml
@@ -64,6 +64,8 @@ jobs:
           conda run -n pose2sim flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
       
       - name: Test with pytest
+        env: 
+          PYTHONIOENCODING: utf-8
         run: |
-          cd Pose2Sim/S00_Demo_BatchSession
+          cd Pose2Sim/Utilities
           conda run -n pose2sim pytest -v tests.py
diff --git a/.gitignore b/.gitignore
@@ -9,7 +9,10 @@ dist/
 **/*.trc
 **/*.sto
 **/*.c3d
+**/*.json
 
 **/Calib_qualisys.toml
 **/pose-3d/
+**/pose/
+**/pose-sync/
 **/pose-associated/
diff --git a/Content/Pose2D.png b/Content/Pose2D.png
diff --git a/Pose2Sim/S01_Demo_SingleTrial/Config.toml → Pose2Sim/Demo_MultiPerson/Config.toml b/Pose2Sim/S01_Demo_SingleTrial/Config.toml → Pose2Sim/Demo_MultiPerson/Config.toml
@@ -18,36 +18,48 @@
 
 
 [project]
-multi_person = false # If false, only the main person in scene is analyzed. 
-frame_rate = 60 # fps
-frame_range = [] # For example [10,300], or [] for all frames
+multi_person = true # true for trials with multiple participants. If false, only the main person in scene is analyzed (and it run much faster). 
+participant_height = [1.72, 1.40] # m # float if single person, list of float if multi-person (same order as the Static trials) # Only used for marker augmentation
+participant_mass = [70.0, 63.5] # kg # Only used for marker augmentation and scaling
+
+frame_rate = 'auto' # fps # int or 'auto'. If 'auto', finds from video (or defaults to 60 fps if you work with images) 
+frame_range = [] # For example [10,300], or [] for all frames. 
+## If cameras are not synchronized, designates the frame range of the camera with the shortest recording time
 ## N.B.: If you want a time range instead, use frame_range = time_range * frame_rate
 ## For example if you want to analyze from 0.1 to 2 seconds with a 60 fps frame rate, 
 ## frame_range = [0.1, 2.0]*frame_rate = [6, 120]
 
 exclude_from_batch = [] # List of trials to be excluded from batch analysis, ['<participant_dir/trial_dir>', 'etc'].
 # e.g. ['S00_P00_Participant/S00_P00_T00_StaticTrial', 'S00_P00_Participant/S00_P00_T01_BalancingTrial']
 
-
 [pose]
-pose_framework = 'openpose' # 'openpose', 'mediapipe', 'alphapose', 'deeplabcut' 
-pose_model = 'BODY_25B' #With openpose: BODY_25B, BODY_25, BODY_135, COCO, MPII.
-						      #With mediapipe: BLAZEPOSE. 
-						      #With alphapose: HALPE_26, HALPE_68, HALPE_136, COCO_133. 
-						      #With deeplabcut: CUSTOM. See example at the end of the file.
-# What follows has not been implemented yet
-overwrite_pose = false
-openpose_path = '' # only checked if OpenPose is used
+vid_img_extension = 'mp4' # any video or image extension
+pose_model = 'HALPE_26'  #With RTMLib: HALPE_26 (default, body and feet), COCO_133 (body, feet, hands), COCO_17 (body)
+						 # /!\ Only RTMPose is natively embeded in Pose2Sim. For all other pose estimation methods, you will have to run them yourself, and then refer to the documentation to convert the files if needed
+						 #With MMPose: HALPE_26 (default), COCO_133, COCO_17, CUSTOM. See example at the end of the file
+						 #With openpose: BODY_25B, BODY_25, BODY_135, COCO, MPII
+						 #With mediapipe: BLAZEPOSE
+						 #With alphapose: HALPE_26, HALPE_68, HALPE_136, COCO_133
+						 #With deeplabcut: CUSTOM. See example at the end of the file
+mode = 'balanced' # 'lightweight', 'balanced', 'performance'
+det_frequency = 1 # Run person detection only every N frames, and inbetween track previously detected bounding boxes ( (still run keypoint detection on all frame). 
+				  #Equal to or greater than 1, can be as high as you want in simple uncrowded cases. Much faster, but might be less accurate. 
+tracking = true # Gives consistent person ID across frames. Slightly slower but might facilitate synchronization if other people are in the background
+display_detection = true
+save_video = 'to_video' # 'to_video' or 'to_images', 'none', or ['to_video', 'to_images']
+output_format = 'openpose' # 'openpose', 'mmpose', 'deeplabcut', 'none' or a list of them # /!\ only 'openpose' is supported for now
 
 
 [synchronization]
 display_sync_plots = true # true or false (lowercase)
-keypoints_to_consider = 'all' # 'all' if all points should be considered (default)
-                           # ['RWrist', 'RElbow'] list of keypoint names if you want to specify the keypoints to consider
+keypoints_to_consider = 'all' # 'all' if all points should be considered, for example if the participant did not perform any particicular sharp movement. In this case, the capture needs to be 5-10 seconds long at least
+                           # ['RWrist', 'RElbow'] list of keypoint names if you want to specify the keypoints to consider.
 approx_time_maxspeed = 'auto' # 'auto' if you want to consider the whole capture (default, slower if long sequences)
-                           # [1.0, 0.2, 0.8, 1.1] list of times in seconds, one value per camera if you want to specify an approximate time where to search for synchronization.
-                           # Best times: only one person is in the scene, who performs a sharp vertical movement (with keypoints_to_consider).
-reset_sync = false # Revert to state before synchronization. Won't attempt to synchronize unless set to false.
+                           # [10.0, 2.0, 8.0, 11.0] list of times in seconds, one value per camera if you want to specify the approximate time of a clear vertical event by one person standing alone in the scene
+time_range_around_maxspeed = 2.0 # Search for best correlation in the range [approx_time_maxspeed - time_range_around_maxspeed, approx_time_maxspeed  + time_range_around_maxspeed]
+likelihood_threshold = 0.4 # Keypoints whose likelihood is below likelihood_threshold are filtered out
+filter_cutoff = 6 # time series are smoothed to get coherent time-lagged correlation
+filter_order = 4
 
 
 # Take heart, calibration is not that complicated once you get the hang of it!
@@ -70,7 +82,7 @@ calibration_type = 'convert' # 'convert' or 'calculate'
    [calibration.calculate] 
       # Camera properties, theoretically need to be calculated only once in a camera lifetime
       [calibration.calculate.intrinsics]
-      overwrite_intrinsics = true # set to false if you don't want to recalculate intrinsic parameters
+      overwrite_intrinsics = false # set to false if you don't want to recalculate intrinsic parameters
       show_detection_intrinsics = true # true or false (lowercase)
       intrinsics_extension = 'jpg' # any video or image extension
       extract_every_N_sec = 1 # if video, extract frames every N seconds (can be <1 )
@@ -116,7 +128,7 @@ calibration_type = 'convert' # 'convert' or 'calculate'
    [personAssociation.single_person]
    reproj_error_threshold_association = 20 # px
    tracked_keypoint = 'Neck' # If the neck is not detected by the pose_model, check skeleton.py 
-               # and choose a stable point for tracking the person of interest (e.g., 'right_shoulder' with BLAZEPOSE)
+               # and choose a stable point for tracking the person of interest (e.g., 'right_shoulder' or 'RShoulder')
 
    [personAssociation.multi_person]
    reconstruction_error_threshold = 0.1 # 0.1 = 10 cm
@@ -134,13 +146,13 @@ interp_if_gap_smaller_than = 10 # do not interpolate bigger gaps
 show_interp_indices = true # true or false (lowercase). For each keypoint, return the frames that need to be interpolated
 handle_LR_swap = false # Better if few cameras (eg less than 4) with risk of limb swapping (eg camera facing sagittal plane), otherwise slightly less accurate and slower
 undistort_points = false # Better if distorted image (parallel lines curvy on the edge or at least one param > 10^-2), but unnecessary (and slightly slower) if distortions are low
-make_c3d = false # save triangulated data in c3d format in addition to trc # Coming soon!
+make_c3d = false # save triangulated data in c3d format in addition to trc
 
 
 [filtering]
 type = 'butterworth' # butterworth, kalman, gaussian, LOESS, median, butterworth_on_speed
 display_figures = false # true or false (lowercase)
-make_c3d = false # save triangulated data in c3d format in addition to trc
+make_c3d = false # also save triangulated data in c3d format
 
    [filtering.butterworth]
    order = 4 
@@ -161,9 +173,9 @@ make_c3d = false # save triangulated data in c3d format in addition to trc
 
 
 [markerAugmentation] 
-## Only works on BODY_25 and BODY_25B models
-participant_height = 1.72 # m # float if single person, list of float if multi-person (same order as the Static trials)
-participant_mass = 70.0 # kg
+## Requires the following markers: ["Neck", "RShoulder", "LShoulder", "RHip", "LHip", "RKnee", "LKnee",
+##        "RAnkle", "LAnkle", "RHeel", "LHeel", "RSmallToe", "LSmallToe",
+##        "RBigToe", "LBigToe", "RElbow", "LElbow", "RWrist", "LWrist"]
 make_c3d = false # save triangulated data in c3d format in addition to trc
 
 
@@ -176,20 +188,20 @@ opensim_bin_path = 'C:\OpenSim 4.4\bin'
 
 
 
-# CUSTOM skeleton, if you trained your own DeepLabCut model for example. 
+# CUSTOM skeleton, if you trained your own model from DeepLabCut or MMPose for example. 
 # Make sure the node ids correspond to the column numbers of the 2D pose file, starting from zero.
 # 
 # If you want to perform inverse kinematics, you will also need to create an OpenSim model
 # and add to its markerset the location where you expect the triangulated keypoints to be detected.
 # 
-# In this example, CUSTOM reproduces the BODY_25B skeleton  (default skeletons are stored in skeletons.py).
+# In this example, CUSTOM reproduces the HALPE_26 skeleton (default skeletons are stored in skeletons.py).
 # You can create as many custom skeletons as you want, just add them further down and rename them.
 # 
 # Check your model hierarchy with:  for pre, _, node in RenderTree(model): 
 #                                      print(f'{pre}{node.name} id={node.id}')
 [pose.CUSTOM]
 name = "CHip"
-id = "None"
+id = "19"
   [[pose.CUSTOM.children]]
   name = "RHip"
   id = 12
@@ -201,13 +213,13 @@ id = "None"
         id = 16
            [[pose.CUSTOM.children.children.children.children]]
            name = "RBigToe"
-           id = 22
+           id = 21
               [[pose.CUSTOM.children.children.children.children.children]]
               name = "RSmallToe"
               id = 23
            [[pose.CUSTOM.children.children.children.children]]
            name = "RHeel"
-           id = 24
+           id = 25
   [[pose.CUSTOM.children]]
   name = "LHip"
   id = 11
@@ -219,19 +231,19 @@ id = "None"
         id = 15
            [[pose.CUSTOM.children.children.children.children]]
            name = "LBigToe"
-           id = 19
+           id = 20
               [[pose.CUSTOM.children.children.children.children.children]]
               name = "LSmallToe"
-              id = 20
+              id = 22
            [[pose.CUSTOM.children.children.children.children]]
            name = "LHeel"
-           id = 21
+           id = 24
   [[pose.CUSTOM.children]]
   name = "Neck"
-  id = 17
+  id = 18
      [[pose.CUSTOM.children.children]]
      name = "Head"
-     id = 18
+     id = 17
         [[pose.CUSTOM.children.children.children]]
         name = "Nose"
         id = 0

diff --git a/...emo_SingleTrial/calibration/Calib.qca.txt → ...emo_MultiPerson/calibration/Calib.qca.txt b/...emo_SingleTrial/calibration/Calib.qca.txt → ...emo_MultiPerson/calibration/Calib.qca.txt
@@ -8,31 +8,31 @@
       <fov_video bottom="1919" left="0" right="1087" top="0"/>
       <fov_video_max bottom="1919" left="0" right="1087" top="0"/>
       <transform r11="0.5536380477336265" r12="0.8046719867383512" r13="0.2144479094331626" r21="-0.4345300743838753" r22="0.05946489349975602" r23="0.8986921279821063" r31="0.7104002579937896" r32="-0.5907342212870501" r33="0.3825762057979294" x="1460.2323709212087" y="-1909.1590482454608" z="1896.5058524092062"/>
-      <intrinsic centerPointU="34110.316406" centerPointV="60680.792969" focalLengthU="107599.671875" focalLengthV="107588.828125" focallength="9.314096596679686" radialDistortion1="-0.046183" radialDistortion2="0.139983" radialDistortion3="0.000000" sensorMaxU="69568" sensorMaxV="122816" sensorMinU="0.000000" sensorMinV="0.000000" skew="0.000000" tangentalDistortion1="0.000608" tangentalDistortion2="0.00069"/>
+      <intrinsic centerPointU="35134.316406" centerPointV="61960.792969" focalLengthU="107599.671875" focalLengthV="107588.828125" focallength="9.314096596679686" radialDistortion1="-0.046183" radialDistortion2="0.139983" radialDistortion3="0.000000" sensorMaxU="69568" sensorMaxV="122816" sensorMinU="0.000000" sensorMinV="0.000000" skew="0.000000" tangentalDistortion1="0.000608" tangentalDistortion2="0.00069"/>
     </camera>
     <camera active="1" avg-residual="0.444749" model="none" point-count="999999999" serial="cam_02" viewrotation="0">
       <fov_marker bottom="1919" left="0" right="1087" top="0"/>
       <fov_marker_max bottom="1919" left="0" right="1087" top="0"/>
       <fov_video bottom="1919" left="0" right="1087" top="0"/>
       <fov_video_max bottom="1919" left="0" right="1087" top="0"/>
       <transform r11="-0.1966832217091926" r12="0.979523227309506" r13="-0.043011131806342306" r21="-0.31478107085300017" r22="-0.02153908797812718" r23="0.9489198834051846" r31="0.9285626460991909" r32="0.20017570994064315" r33="0.3125717476340885" x="2582.0136248568124" y="706.5662881637625" z="1690.9818366061595"/>
-      <intrinsic centerPointU="34207.652344" centerPointV="61646.457031" focalLengthU="107118.695313" focalLengthV="107123.023438" focallength="9.272462063031563" radialDistortion1="-0.047847" radialDistortion2="0.136786" radialDistortion3="0.000000" sensorMaxU="69568" sensorMaxV="122816" sensorMinU="0.000000" sensorMinV="0.000000" skew="0.000000" tangentalDistortion1="0.000972" tangentalDistortion2="0.000291"/>
+      <intrinsic centerPointU="35359.652344" centerPointV="62158.457031" focalLengthU="107118.695313" focalLengthV="107123.023438" focallength="9.272462063031563" radialDistortion1="-0.047847" radialDistortion2="0.136786" radialDistortion3="0.000000" sensorMaxU="69568" sensorMaxV="122816" sensorMinU="0.000000" sensorMinV="0.000000" skew="0.000000" tangentalDistortion1="0.000972" tangentalDistortion2="0.000291"/>
     </camera>
     <camera active="1" avg-residual="0.450323" model="none" point-count="999999999" serial="cam_03" viewrotation="0">
       <fov_marker bottom="1919" left="0" right="1087" top="0"/>
       <fov_marker_max bottom="1919" left="0" right="1087" top="0"/>
       <fov_video bottom="1919" left="0" right="1087" top="0"/>
       <fov_video_max bottom="1919" left="0" right="1087" top="0"/>
       <transform r11="-0.740641130147863" r12="-0.6704153360235378" r13="-0.044654154988568895" r21="0.25251118212268814" r22="-0.3393170985114385" r23="0.9061467925015139" r31="-0.6226466246887545" r32="0.6598539110153292" r33="0.4205995683324882" x="-3216.86586729648" y="2231.1786368869416" z="2088.19103027083"/>
-      <intrinsic centerPointU="32845.335938" centerPointV="61120.328125" focalLengthU="107622.296875" focalLengthV="107616.632813" focallength="9.316055073242188" radialDistortion1="-0.046705" radialDistortion2="0.137622" radialDistortion3="0.000000" sensorMaxU="69568" sensorMaxV="122816" sensorMinU="0.000000" sensorMinV="0.000000" skew="0.000000" tangentalDistortion1="-0.000542" tangentalDistortion2="-0.000517"/>
+      <intrinsic centerPointU="34893.335938" centerPointV="61440.328125" focalLengthU="107622.296875" focalLengthV="107616.632813" focallength="9.316055073242188" radialDistortion1="-0.046705" radialDistortion2="0.137622" radialDistortion3="0.000000" sensorMaxU="69568" sensorMaxV="122816" sensorMinU="0.000000" sensorMinV="0.000000" skew="0.000000" tangentalDistortion1="-0.000542" tangentalDistortion2="-0.000517"/>
     </camera>
     <camera active="1" avg-residual="0.504772" model="none" point-count="999999999" serial="cam_04" viewrotation="0">
       <fov_marker bottom="1919" left="0" right="1087" top="0"/>
       <fov_marker_max bottom="1919" left="0" right="1087" top="0"/>
       <fov_video bottom="1919" left="0" right="1087" top="0"/>
       <fov_video_max bottom="1919" left="0" right="1087" top="0"/>
       <transform r11="0.2586331447643361" r12="-0.875095795920923" r13="-0.40904308378315923" r21="0.49590579729377593" r22="-0.24310718522469382" r23="0.8336524076031634" r31="-0.8289670298870364" r32="-0.4184569804097585" r33="0.3710895025551282" x="-3758.720480260634" y="-1415.6654017496533" z="1881.7941041388026"/>
-      <intrinsic centerPointU="34566.796875" centerPointV="61697.9375" focalLengthU="107215.039063" focalLengthV="107213.070313" focallength="9.280801818890938" radialDistortion1="-0.047633" radialDistortion2="0.134667" radialDistortion3="0.000000" sensorMaxU="69568" sensorMaxV="122816" sensorMinU="0.000000" sensorMinV="0.000000" skew="0.000000" tangentalDistortion1="0.000277" tangentalDistortion2="0.000199"/>
+      <intrinsic centerPointU="35014.796875" centerPointV="61697.9375" focalLengthU="107215.039063" focalLengthV="107213.070313" focallength="9.280801818890938" radialDistortion1="-0.047633" radialDistortion2="0.134667" radialDistortion3="0.000000" sensorMaxU="69568" sensorMaxV="122816" sensorMinU="0.000000" sensorMinV="0.000000" skew="0.000000" tangentalDistortion1="0.000277" tangentalDistortion2="0.000199"/>
     </camera>
   </cameras>
 </calibration>
diff --git a/Pose2Sim/Demo_MultiPerson/videos/cam_01.mp4 b/Pose2Sim/Demo_MultiPerson/videos/cam_01.mp4
diff --git a/Pose2Sim/Demo_MultiPerson/videos/cam_02.mp4 b/Pose2Sim/Demo_MultiPerson/videos/cam_02.mp4
diff --git a/Pose2Sim/Demo_MultiPerson/videos/cam_03.mp4 b/Pose2Sim/Demo_MultiPerson/videos/cam_03.mp4
diff --git a/Pose2Sim/Demo_MultiPerson/videos/cam_04.mp4 b/Pose2Sim/Demo_MultiPerson/videos/cam_04.mp4